Python批量爬取写真网站

Python批量爬取写真网站

枫
2023-04-17 / 0 评论 / 99 阅读 / 耗时 127ms / 正在检测是否收录...

介绍

  一个批量爬取写真网站的爬虫,无聊随便写的。代码也非常的简单,随便分享一下吧 表情
站点:http://www.tdz4.com/

效果

lgkzefa6.png

lgkzcu52.png

代码

import re
import os
import requests
num =1
if not os.path.exists('D:/image/'):
    os.mkdir(('D:/image/'))

pages =int(input('输入你要爬取的页数'))
for page in range(0,pages):
    print(f'\n===========\t正在爬取第{page}页\t===========')
    url = f"http://www.tdz4.com/whzm/page/{page}"

    resp = requests.get(url=url)
    # print(resp.text)
    data =re.findall('<h2><a  href="(.*?)">(.*?)</a></h2>',resp.text)

    for i in data:
        name =i[1]
        img_url=i[0]
        print(img_url,name)
        data =requests.get(url=img_url)
        img_url= re.findall('data-src="(.*?)"> <img',data.text)

        for a in img_url:
         print(a)
         data =requests.get(url=a).content
         with open(f'D:/image/{name}{num}.jpg',"wb")as f:
             f.write(data)
             num +=1

print('\n\n===========\t爬取完毕!!!===========\t')
print(f'\n共爬取{num-1}张')

爬取url链接

差不多,上面代码是爬取图片下载,下面是爬取图片url然后保存到文本!

import re
import os
import requests
num =1
if not os.path.exists('D:/image/'):
    os.mkdir(('D:/image/'))

pages =int(input('输入你要爬取的页数'))
for page in range(0,pages):
    print(f'\n===========\t正在爬取第{page}页\t===========')
    url = f"http://www.tdz4.com/whzm/page/{page}"

    resp = requests.get(url=url)
    # print(resp.text)
    data =re.findall('<h2><a  href="(.*?)">(.*?)</a></h2>',resp.text)

    for i in data:
        name =i[1]
        img_url=i[0]
        print(img_url,name)
        data =requests.get(url=img_url)
        img_url= re.findall('data-src="(.*?)"> <img',data.text)

        for a in img_url:
         print(a)
         data =requests.get(url=a).content
         with open(f'写真.txt', mode='a', encoding='utf-8', ) as f:  # 保存路径,追加写入
             f.write(a)
             f.write('\n')
             num +=1

print('\n\n===========\t爬取完毕!!!===========\t')
print(f'\n共爬取{num-1}张')

3

海报

正在生成.....

评论 (0)

取消