目 录CONTENT

文章目录

Python3 爬虫图片,新手入坑(持续更新中)

Seven
2021-07-23 / 0 评论 / 0 点赞 / 769 阅读 / 2315 字 / 正在检测是否收录...
#encoing : utf-8
import urllib.request
import os
import requests
from bs4 import BeautifulSoup
import lxml

# 抓取函数

def url_img(url):
        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
        #url = 'https://www.51tietu.net/tp/364992.html'
        res = requests.get(url, headers=headers)
        res.encoding = 'utf-8'
        all_img = BeautifulSoup(res.text, 'lxml').find_all('img')
        img_url = []
        for i in all_img:
            img = i['src']
            if 'pic' in img:
                img_url.append(img)

        return img_url


def get_img(folder_path,lists):
    if not os.path.exists(folder_path):
        print('文件夹不存在')
        os.makedirs(folder_path)
    else:
        for urls in lists:
            print('{}'.format(urls))
            filename = urls.split('/')[-1]
            filepath = folder_path + filename
            if os.path.exists(filepath):
                print('文件已存在')
            else:
                try:
                    urllib.request.urlretrieve(urls,filename=filepath)
                except Exception as e:
                    print('下截文件出错',e)

fh = url_img('xxx')
fs = r'root_path'
get_img(fs,fh)

先把代码打上去。后面慢慢完善。

0

评论区