Python爬取快手搜索页面视频

Python爬取快手搜索页面视频

枫
2023-02-25 / 0 评论 / 69 阅读 / 耗时 150ms / 正在检测是否收录...

介绍

  这个跟上个快手帖子差不多,但可设置爬取页面,嗯嗯 随便分享一下吧!
这个爬取的是快手搜索页面下面的视频?
PMKOK5SF)2MB}TLNQZ2L[C1.png

效果

别忘记在D盘新建video文件夹
也可以自己修改下面代码路径
6SG2342950OD%L)}0VHI~PM.png
Test

代码

import requests
import  re

name =input('请输入你要爬取的内容')
page =input('请输入你要爬取的页数')
json = {
'operationName': "visionSearchPhoto",
'query': "fragment photoContent on PhotoEntity {\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n  __typename\n}\n\nfragment feedContent on Feed {\n  type\n  author {\n    id\n    name\n    headerUrl\n    following\n    headerUrls {\n      url\n      __typename\n    }\n    __typename\n  }\n  photo {\n    ...photoContent\n    __typename\n  }\n  canAddComment\n  llsid\n  status\n  currentPcursor\n  tags {\n    type\n    name\n    __typename\n  }\n  __typename\n}\n\nquery visionSearchPhoto($keyword: String, $pcursor: String, $searchSessionId: String, $page: String, $webPageArea: String) {\n  visionSearchPhoto(keyword: $keyword, pcursor: $pcursor, searchSessionId: $searchSessionId, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      ...feedContent\n      __typename\n    }\n    searchSessionId\n    pcursor\n    aladdinBanner {\n      imgUrl\n      link\n      __typename\n    }\n    __typename\n  }\n}\n",
'variables': {'keyword': name, 'pcursor': page, 'page': "search",},

}

headers = {

'Cookie': "kpf=PC_WEB; clientid=3; did=web_8fe7e63f0eb61560a3d9d584a0192980; didv=1676463621529; ktrace-context=1|MS43NjQ1ODM2OTgyODY2OTgyLjQ1MTk4MTEzLjE2NzY1MjM4MzIxMzQuMjE3Mzk4|MS43NjQ1ODM2OTgyODY2OTgyLjYyNjI3Mzg5LjE2NzY1MjM4MzIxMzQuMjE3Mzk5|0|graphql-server|webservice|false|NA; userId=1448552402; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABHECygd_FAYtj391KlgQ26rwaUEmFI-rxQsP1qmfmA-_rwKkThSPxfFNebSG0e1hVLgS627iuFjUW0aOStjm-lzRJQD5xkI7jI2pR9zDT6HHSHuyRmJQLNBGQ3XZNn0zjAwrOD0XvHpPWKLxspPJHFgLkvC-faqh1sleDAbprtd3uMJqpP3-2dzA42q823RlLJqC406oJkGvgDjeMnIQt4hoSnIqSq99L0mk4jolsseGdcwiNIiDPok1ufZOQoy1uG6Y5fWcP8CbK1qh5dscVxn3PcsG6KCgFMAE; kuaishou.server.web_ph=27aa693d1ac88453398b2d8a6c9e9fb51229; kpn=KUAISHOU_VISION",
'Host': "www.kuaishou.com",
'Origin': "https://www.kuaishou.com",
'Referer': f"https://www.kuaishou.com/search/video?searchKey=%E8%9B%87%E5%A7%90",
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.190.400 QQBrowser/11.5.5240.400"
#请求头

}


url = 'https://www.kuaishou.com/graphql'
#快手链接
response = requests.post(url=url,headers=headers,json=json)

json_data = response.json()
feeds = json_data['data']['visionSearchPhoto']['feeds']
for feed in feeds:
    titil = feed['photo']['caption']
  #取视频标题
    video_url = feed['photo']['photoUrl']
  #取视频链接url
    titil = re.sub('[\\/:*?<>|\\n#@)\》\."\《(\r]','',titil)
  #正则表达式替换特殊符号
    print(titil,video_url)
  #打印提示
    with open(f'D:/video/{titil}''.mp4',mode='wb')as f:
      #保存路径
        video = requests.get(video_url).content
        f.write(video)
print('\n\n')
print('下载完毕!!!')
4

海报

正在生成.....

评论 (0)

取消