#encoing : utf-8
import urllib.request
import os
import requests
from bs4 import BeautifulSoup
import lxml
# 抓取函数
def url_img(url):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
#url = 'https://www.51tietu.net/tp/364992.html'
res = requests.get(url, headers=headers)
res.encoding = 'utf-8'
all_img = BeautifulSoup(res.text, 'lxml').find_all('img')
img_url = []
for i in all_img:
img = i['src']
if 'pic' in img:
img_url.append(img)
return img_url
def get_img(folder_path,lists):
if not os.path.exists(folder_path):
print('文件夹不存在')
os.makedirs(folder_path)
else:
for urls in lists:
print('{}'.format(urls))
filename = urls.split('/')[-1]
filepath = folder_path + filename
if os.path.exists(filepath):
print('文件已存在')
else:
try:
urllib.request.urlretrieve(urls,filename=filepath)
except Exception as e:
print('下截文件出错',e)
fh = url_img('xxx')
fs = r'root_path'
get_img(fs,fh)
先把代码打上去。后面慢慢完善。
评论区