ApacheAde
diff --git a/Collapse file
‎Pipfile‎
Copy file name to clipboardExpand all lines: Pipfile
+5Lines changed: 5 additions & 0 deletions b/Collapse file
‎Pipfile‎
Copy file name to clipboardExpand all lines: Pipfile
+5Lines changed: 5 additions & 0 deletions
diff --git a/Collapse file
‎Pipfile.lock‎
Copy file name to clipboardExpand all lines: Pipfile.lock
+216-1Lines changed: 216 additions & 1 deletion b/Collapse file
‎Pipfile.lock‎
Copy file name to clipboardExpand all lines: Pipfile.lock
+216-1Lines changed: 216 additions & 1 deletion
diff --git a/Collapse file
‎stopwords.txt‎
Copy file name to clipboard
+113Lines changed: 113 additions & 0 deletions b/Collapse file
‎stopwords.txt‎
Copy file name to clipboard
+113Lines changed: 113 additions & 0 deletions
diff --git a/Collapse file
‎text.txt‎
Copy file name to clipboardExpand all lines: text.txt
+27,642Lines changed: 27642 additions & 0 deletions b/Collapse file
‎text.txt‎
Copy file name to clipboardExpand all lines: text.txt
+27,642Lines changed: 27642 additions & 0 deletions
diff --git a/Collapse file
‎utils.py‎
Copy file name to clipboard
+41Lines changed: 41 additions & 0 deletions b/Collapse file
‎utils.py‎
Copy file name to clipboard
+41Lines changed: 41 additions & 0 deletions
diff --git a/Collapse file
‎youbute_comment/__init__.py‎
Copy file name to clipboard
+36-31Lines changed: 36 additions & 31 deletions b/Collapse file
‎youbute_comment/__init__.py‎
Copy file name to clipboard
+36-31Lines changed: 36 additions & 31 deletions
@@ -7,6 +7,11 @@ verify_ssl = true
 
 [packages]
 requests = {extras = ["socks"],version = "*"}
+jieba = "*"
+numpy = "*"
+scipy = "*"
+imageio = "*"
+wordcloud = "*"
 
 [requires]
 python_version = "3.7"
@@ -0,0 +1,113 @@
+the
+of
+is
+and
+to
+he
+she
+...
+just
+so
+my
+in
+..
+that
+we
+very
+me
+was
+but
+your
+how
+do
+ng
+for
+an
+are
+by
+be
+as
+on
+with
+can
+if
+from
+which
+you
+it
+this
+then
+at
+have
+all
+not
+one
+has
+or
+that
+的
+了
+和
+是
+就
+都
+而
+及
+與
+著
+或
+什么
+还是
+就是
+还要
+可以
+没有
+看看
+怎么
+那么
+不能
+分享
+出来
+已经
+下载
+有点
+今天
+很多
+因为
+你们
+完全
+一次
+quot
+不是
+这样
+这么
+觉得
+知道
+只有
+不过
+需要
+还有
+一个
+这个
+回复
+现在
+不错
+大家
+应该
+我刚
+不会
+如果
+时候
+开始
+正在
+为啥
+各种
+一個
+沒有
+我們
+你們
+妳們
+他們
+她們
+是否
+留言
@@ -0,0 +1,41 @@
+import json
+import re
+import time
+from http.cookies import SimpleCookie
+from imageio import imread
+from wordcloud import WordCloud
+import jieba.analyse
+
+
+def word_segment(text):
+	# 分词处理
+	jieba.analyse.set_stop_words("./stopwords.txt")
+	words = jieba.cut(text)
+	from collections import Counter
+	result = Counter(words).most_common(20)
+	print(result)
+
+	tags = jieba.analyse.extract_tags(text, topK=20)
+	print(tags)
+	# yield " ".join(tags)
+
+if __name__ == '__main__':
+	text = open("text.txt", encoding="utf-8").read()
+	word_segment(text)
+	pass
+
+
+def word_cloud(texts):
+	"""
+	根据文本生成词云图片
+	"""
+	data = " ".join(text for text in texts)
+	mask_img = imread('./python-logo.png', flatten=True)
+	wordcloud = WordCloud(
+		font_path='/Library/Fonts//华文黑体.ttf',
+		background_color='white',
+		mask=mask_img
+	).generate(data)
+	plt.imshow(wordcloud)
+	plt.axis('off')
+	plt.savefig('./wordcloud.jpg', dpi=600)
@@ -1,19 +1,16 @@
 """
 获取youtube视频下的评论
-
 思路：
+基于youtube官方的API来获取, 这里是关于如何初始化配置的文档 https://developers.google.com/youtube/v3/getting-started
 
-基于youtube官方的API来获取, 这里是关于如何初始化配置的文档https://developers.google.com/youtube/v3/getting-started
-
-接口文档：https://developers.google.com/youtube/v3/docs/channelSections/list
-
-视频地址：https://www.youtube.com/watch?v=FWMIPukvdsQ
+评论接口文档：https://developers.google.com/youtube/v3/docs/channelSections/list
 
+任意视频地址：https://www.youtube.com/watch?v=FWMIPukvdsQ
 """
 import requests
 
 #  在 API Console 配置生成
-key = "xxxxxx"
+key = "AIzaSyCtJuC7oMed0xxZYPcid913vPxOnl72sHg"
 # 视频ID
 videoId = "FWMIPukvdsQ"
 
@@ -22,39 +19,47 @@
       f"textFormat=plainText&" \
       f"part=snippet&" \
       f"videoId={videoId}&" \
-      f"maxResults=100"
+      f"maxResults=100"  # 分页参数
 
 proxies = {
-    'http': 'socks5://127.0.0.1:1080',
-    'https': 'socks5://127.0.0.1:1080',
+	'http': 'socks5://127.0.0.1:1080',
+	'https': 'socks5://127.0.0.1:1080',
 }
 
 
-# 获取一下页的凭证
+def spider(next_page_token):
+	if next_page_token:
+		params = {"pageToken": next_page_token}
+	else:
+		params = None
+	res = requests.get(url, proxies=proxies, params=params)
+	data = res.json()
+	import pprint
+	next_page_token = data.get("nextPageToken")
+
+	items = data.get("items")
+	for item in items:
 
+		comment = item.get("snippet").get("topLevelComment").get("snippet").get("textDisplay")
+		print(comment)
+	return next_page_token
 
-def main():
-    nextPageToken = "QURTSl9pMkR0YkxlcE1iOHhlLU1lNi1XWGhzTHdpUmlCN2w1UmJDNlVBaEhnT1dyejVFb3dnVGdWbExRSFNtMVRrNjE1TWVPWC04UVN2VGJrMkhjZ01KVmJpNllrRlVkdUFRWk1yVHp2cW1ZbjVNcXpFc2ZzRlI3ZkRlM3ZPUm1CalZSX1NkaE9qcEY4Tl8yUWRyMmN3"
-    while nextPageToken is not None:
-        if nextPageToken:
-            params = {"pageToken": nextPageToken}
-        else:
-            params = None
-        res = requests.get(url, proxies=proxies, params=params)
-        data = res.json()
-        import pprint
-        nextPageToken = data.get("nextPageToken")
-        print(nextPageToken)
-        items = data.get("items")
-        for item in items:
-            comment = item.get("snippet").get("topLevelComment").get("snippet").get("textDisplay")
-            print(comment)
-        import time
-        time.sleep(1)
-        print("==================")
+def run():
+	next_page_token = spider(None)
 
+	while next_page_token:
+		try:
+			print(next_page_token)
+			next_page_token = spider(next_page_token)
+			import time
+			time.sleep(1)
+		except Exception as e:
+			# 请求超时重试
+			import traceback
+			print(next_page_token)
+			print(traceback.format_exc())
 
 
 
 if __name__ == '__main__':
-    main()
+	run()
-Original file line number
+Diff line change
@@ -0,0 +1,113 @@
 +the
 +of
 +is
 +and
 +to
 +he
 +she
 +...
 +just
 +so
 +my
 +in
 +..
 +that
 +we
 +very
 +me
 +was
 +but
 +your
 +how
 +do
 +ng
 +for
 +an
 +are
 +by
 +be
 +as
 +on
 +with
 +can
 +if
 +from
 +which
 +you
 +it
 +this
 +then
 +at
 +have
 +all
 +not
 +one
 +has
 +or
 +that
 +的
 +了
 +和
 +是
 +就
 +都
 +而
 +及
 +與
 +著
 +或
 +什么
 +还是
 +就是
 +还要
 +可以
 +没有
 +看看
 +怎么
 +那么
 +不能
 +分享
 +出来
 +已经
 +下载
 +有点
 +今天
 +很多
 +因为
 +你们
 +完全
 +一次
 +quot
 +不是
 +这样
 +这么
 +觉得
 +知道
 +只有
 +不过
 +需要
 +还有
 +一个
 +这个
 +回复
 +现在
 +不错
 +大家
 +应该
 +我刚
 +不会
 +如果
 +时候
 +开始
 +正在
 +为啥
 +各种
 +一個
 +沒有
 +我們
 +你們
 +妳們
 +他們
 +她們
 +是否
 +留言