Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Latest commit

 

History

History
History
91 lines (72 loc) · 2.54 KB

File metadata and controls

91 lines (72 loc) · 2.54 KB
Copy raw file
Download raw file
Open symbols panel
Edit and raw actions
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# encoding = utf-8
import concurrent
import os
from concurrent.futures import ThreadPoolExecutor
import requests
from bs4 import BeautifulSoup
def header(referer):
headers = {
'Host': 'i.meizitu.net',
'Pragma': 'no-cache',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',
'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
'Referer': '{}'.format(referer),
}
return headers
def request_page(url):
try:
response = requests.get(url)
if response.status_code == 200:
return response.text
except requests.RequestException:
return None
def get_page_urls():
for i in range(1, 2):
baseurl = 'https://www.mzitu.com/page/{}'.format(i)
html = request_page(baseurl)
soup = BeautifulSoup(html, 'lxml')
list = soup.find(class_='postlist').find_all('li')
urls = []
for item in list:
url = item.find('span').find('a').get('href')
print('页面链接:%s' % url)
urls.append(url)
return urls
def download_Pic(title, image_list):
# 新建文件夹
os.mkdir(title)
j = 1
# 下载图片
for item in image_list:
filename = '%s/%s.jpg' % (title, str(j))
print('downloading....%s : NO.%s' % (title, str(j)))
with open(filename, 'wb') as f:
img = requests.get(item, headers=header(item)).content
f.write(img)
j += 1
def download(url):
html = request_page(url)
soup = BeautifulSoup(html, 'lxml')
total = soup.find(class_='pagenavi').find_all('a')[-2].find('span').string
title = soup.find('h2').string
image_list = []
for i in range(int(total)):
html = request_page(url + '/%s' % (i + 1))
soup = BeautifulSoup(html, 'lxml')
img_url = soup.find('img').get('src')
image_list.append(img_url)
download_Pic(title, image_list)
def download_all_images(list_page_urls):
# 获取每一个详情妹纸
# works = len(list_page_urls)
with concurrent.futures.ProcessPoolExecutor(max_workers=5) as exector:
for url in list_page_urls:
exector.submit(download, url)
if __name__ == '__main__':
# 获取每一页的链接和名称
list_page_urls = get_page_urls()
download_all_images(list_page_urls)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.