Python批量爬取抖音主页视频

Python批量爬取抖音主页视频

枫
2023-02-28 / 7 评论 / 310 阅读 / 耗时 136ms / 正在检测是否收录...

介绍

保存路径修改

保存路径自己在这里修改
C[K_{HOL`@F9)WMPLH~)D3D.png

模块导入

所需模块:

cmd 复制输入下面安装命令回车:

浏览器驱动

使用的是谷歌浏览器,查看你自己的浏览器版本,下载对应的驱动器,

驱动器链接:https://registry.npmmirror.com/binary.html?path=chromedriver

效果

K6W}[LSSGX}NIG8HGP1%MT4.png

CW95S0E86E0WARKTY`YZH8T.png

代码

import re
import requests
import time
import os
from selenium import webdriver
#引用模块 selenium==3.141.0

data_url = input('输入你要爬取抖音博主主页链接')
driver= webdriver.Chrome(r'D:\python\chromedriver.exe')
# 引用chromedriver.exe程序,填你自己的路径

driver.get(f'{data_url}')

def drop_down():
    for x in range(1, 30, 4):
        time.sleep(2)
        #延时
        j = x / 9
        js = 'document.documentElement.scrollTop=document.documentElement.scrollHeight * %f'%j
        #下滑
        driver.execute_script(js)

drop_down()


lis = driver.find_elements_by_css_selector('.Eie04v01')
#selenium取值
if not os.path.exists('D:/video/'):
    os.mkdir(('D:/video/'))
for li in lis:
    try:
        url = li.find_element_by_css_selector('a').get_attribute('href')
        #下面路径

        print(url)


        headers ={
            'cookie': 'douyin.com; ttwid=1%7CWi46JI7KdSaF9yqta1kL28XUbEiDv91IIfMOxY-EhZ0%7C1675841330%7C89a9430cc447d8576d53d4fbc9546dfa417bc4e88d586762cbe878514cc1df57; passport_csrf_token=9d7dab91f7a045a68d9fa2deb1f60b0c; passport_csrf_token_default=9d7dab91f7a045a68d9fa2deb1f60b0c; s_v_web_id=verify_ldvcnlvk_10t9slUd_4n0m_42o1_8p9b_8ZRx0cAV4nv5; home_can_add_dy_2_desktop=%220%22; xgplayer_user_id=646767496422; passport_assist_user=CkGmgQ_jszMN1m-PPWYjH_QNdsf_8klBB_8wS0bJsZWcfTnMc97HC73w9WNOnHbLoE1PnrcXtGsuQy6FV7HUCWBMZhpICjxihoJBypQ-JpI9KH--ZN_-TY41fsc-wLsvlbmXM97JsrDcbP2eTP44_kJCdfLHGFu-6P8ZZJ6MfHQMHRAQsrKpDRiJr9ZUIgEDDUbOBg%3D%3D; n_mh=1a3e5XCqMARKIH9Y88jP23zsLolfuhxxp5ZQomXRvOY; sso_uid_tt=c2f6884d45856a3a866e96b167c36a10; sso_uid_tt_ss=c2f6884d45856a3a866e96b167c36a10; toutiao_sso_user=ab04894ee6c7df3eeecec15922d832ea; toutiao_sso_user_ss=ab04894ee6c7df3eeecec15922d832ea; sid_ucp_sso_v1=1.0.0-KDdhNTJmMjRlNmI0Yzg5OWVmNDcxMzllNWFlMWQ1M2M5MTEwNDE1NTYKHwjT1eCOovTiBhCw-bOfBhjvMSAMMKu7iOkFOAZA9AcaAmhsIiBhYjA0ODk0ZWU2YzdkZjNlZWVjZWMxNTkyMmQ4MzJlYQ; ssid_ucp_sso_v1=1.0.0-KDdhNTJmMjRlNmI0Yzg5OWVmNDcxMzllNWFlMWQ1M2M5MTEwNDE1NTYKHwjT1eCOovTiBhCw-bOfBhjvMSAMMKu7iOkFOAZA9AcaAmhsIiBhYjA0ODk0ZWU2YzdkZjNlZWVjZWMxNTkyMmQ4MzJlYQ; odin_tt=fc686e88a993cd8b3c475705e2e286b79bea48c0f1571b1d71907cb4bc263bd8e81c55f29d0d2d82e6f2f828e0f9322ffc9a0c11a8d50f931542468903f614d5; passport_auth_status=f7cba991b8c1ae560c1f55df240d23f4%2C; passport_auth_status_ss=f7cba991b8c1ae560c1f55df240d23f4%2C; uid_tt=4b64917790b6f7fa2f4452c2c2322ae0; uid_tt_ss=4b64917790b6f7fa2f4452c2c2322ae0; sid_tt=052095ac92e67fd17382c560a00588f4; sessionid=052095ac92e67fd17382c560a00588f4; sessionid_ss=052095ac92e67fd17382c560a00588f4; sid_guard=052095ac92e67fd17382c560a00588f4%7C1676475573%7C5183995%7CSun%2C+16-Apr-2023+15%3A39%3A28+GMT; sid_ucp_v1=1.0.0-KGU5MTMzMTcwOTk5OWIyNTQxZTVjZmQ4NTU2YWYwZGQ5ZGZlMDYxOGEKGwjT1eCOovTiBhC1-bOfBhjvMSAMOAZA9AdIBBoCbGYiIDA1MjA5NWFjOTJlNjdmZDE3MzgyYzU2MGEwMDU4OGY0; ssid_ucp_v1=1.0.0-KGU5MTMzMTcwOTk5OWIyNTQxZTVjZmQ4NTU2YWYwZGQ5ZGZlMDYxOGEKGwjT1eCOovTiBhC1-bOfBhjvMSAMOAZA9AdIBBoCbGYiIDA1MjA5NWFjOTJlNjdmZDE3MzgyYzU2MGEwMDU4OGY0; LOGIN_STATUS=1; store-region=cn-hn; store-region-src=uid; douyin.com; strategyABtestKey=%221677080469.918%22; bd_ticket_guard_client_data=eyJiZC10aWNrZXQtZ3VhcmQtdmVyc2lvbiI6MiwiYmQtdGlja2V0LWd1YXJkLWNsaWVudC1jZXJ0IjoiLS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tXG5NSUlDRkRDQ0FicWdBd0lCQWdJVVpoK2V0RUhDZlB4SjBJUnhGMFFKcGhhRXVjMHdDZ1lJS29aSXpqMEVBd0l3XG5NVEVMTUFrR0ExVUVCaE1DUTA0eElqQWdCZ05WQkFNTUdYUnBZMnRsZEY5bmRXRnlaRjlqWVY5bFkyUnpZVjh5XG5OVFl3SGhjTk1qTXdNakUxTVRVek9UTXdXaGNOTXpNd01qRTFNak16T1RNd1dqQW5NUXN3Q1FZRFZRUUdFd0pEXG5UakVZTUJZR0ExVUVBd3dQWW1SZmRHbGphMlYwWDJkMVlYSmtNRmt3RXdZSEtvWkl6ajBDQVFZSUtvWkl6ajBEXG5BUWNEUWdBRUpHUW1kaWNMU1hHQXl4QzE2ZlplVFNhdXpqNjI4T3o2RUYydTJaaG1HUTh0NnRCS1BZZjRGSnkrXG52S3ZEWTBTNExwMHg4T2NXSnpHM1p0bHdvcnV3SXFPQnVUQ0J0akFPQmdOVkhROEJBZjhFQkFNQ0JhQXdNUVlEXG5WUjBsQkNvd0tBWUlLd1lCQlFVSEF3RUdDQ3NHQVFVRkJ3TUNCZ2dyQmdFRkJRY0RBd1lJS3dZQkJRVUhBd1F3XG5LUVlEVlIwT0JDSUVJTGkxVmVSK01UVElWQ3NEMzQ4ZitCNDBwYkNxUTZvaVBvbGIyQ0c4ckxKbU1Dc0dBMVVkXG5Jd1FrTUNLQUlES2xaK3FPWkVnU2pjeE9UVUI3Y3hTYlIyMVRlcVRSZ05kNWxKZDdJa2VETUJrR0ExVWRFUVFTXG5NQkNDRG5kM2R5NWtiM1Y1YVc0dVkyOXRNQW9HQ0NxR1NNNDlCQU1DQTBnQU1FVUNJUUNtS3MwTktNZ1BUaVdiXG4wRzdNN2s0K2ZOckNIRmRMc0FCVmErUnpwWUZBR1FJZ0E4czE4dS95MHZKOEd0YVlGVjNHQzdTVXJ3bTdITVZBXG5XTkV0ZTVTUkw3cz1cbi0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS1cbiJ9; csrf_session_id=a20f3074e912cd7499eb53ecdc4db1a9; _tea_utm_cache_1243=undefined; MONITOR_WEB_ID=0d0f5307-22e5-4da5-9733-509ce9a07bff; __ac_nonce=063f637ba00a0a8d51802; __ac_signature=_02B4Z6wo00f017RWXlgAAIDDNFSkGepfEH-0dlrAAI7sBLimHJxVv1T5BysGBkNlcpvr3LeKyLCmY4XZwDrZ0jqSxokT6IUn7HfSFn.hlK-QNAZuu532oIbDVIs0LIs4.MYYg6YpRUbfLUFk66; VIDEO_FILTER_MEMO_SELECT=%7B%22expireTime%22%3A1677685307532%2C%22type%22%3A1%7D; tt_scid=77Cu9dRAW7f0mFfa6zzUUQROFT8L1BR0CaDGGuTexi1Q8PTvg7FzDpvupBEHERJq5644; download_guide=%221%2F20230222%22; FOLLOW_LIVE_POINT_INFO=%22MS4wLjABAAAAsvjdwafT6SV5V7SI5uK5KRQj0h2akfJsbUU4Tr9IQ3_wXxWaNlCrcjtqZ2lusCwh%2F1677081600000%2F0%2F0%2F1677081445546%22; FOLLOW_NUMBER_YELLOW_POINT_INFO=%22MS4wLjABAAAAsvjdwafT6SV5V7SI5uK5KRQj0h2akfJsbUU4Tr9IQ3_wXxWaNlCrcjtqZ2lusCwh%2F1677081600000%2F0%2F1677080845546%2F0%22; msToken=1KOGSu7iKfb2VJQJ89TVgUWYYeX3eXGQof9T1ZSz027OKe8TVsgg_okULDbFjWGCq6MUnIo-5aUW6cgPiYGVHZQy1xSFwiE5HoBgl5gh5PrLoGvRpTmtGdJ3hU9-StVk; msToken=FbYQeqlbqNPVPa6dRII68yh3bHHd67y1lWGYIVDyOlYcydKxzxSC80eRqQR2bC_P4W75pjmlT3eZSD2gMMXYmpvA5KweN62l-c_nq7O0iSO6yo0e_KhyglwRWCwESIM=; passport_fe_beating_status=false',
              'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.190.400 QQBrowser/11.5.5240.400'
              }

        # url = 'https://www.douyin.com/video/6950985129997126952'
        #测试url

        response = requests.get(url=url,headers=headers)
        response.encoding = 'utf-8'

        title = re.findall('<title data-react-helmet="true">(.*?)</title>',response.text)[0]
        video_title =re.sub('[#@。“”&\- 抖音]','',title)
        #re取视频标题
        data = re.findall('playApi(.*?)playAddrH265',response.text)[0]
        #取加密url
        data_url =requests.utils.unquote(data).replace('","','').replace('":"','https:')
        #解密

        print(video_title,data_url)
        #打印输出标题跟url

        video_url = requests.get(url=data_url,headers=headers).content
        with open('D:/video/'+video_title+'.mp4',mode='wb')as f:
            #文件保存 路径,可自己修改
         f.write(video_url)
    except Exception as e:
        print(e)


9

海报

正在生成.....

评论 (7)

取消
  1. 头像
    schermerdusenberycmp5p8+3vgavkg3o11j@gmail.com
    XX ·MacOS · Safari
    俄罗斯 俄罗斯 沙发

    illo sed ut est at voluptas veniam laboriosam maxime voluptatem corrupti quibusdam earum voluptas eos dolorem et. placeat eius nihil ut itaque repellat officia modi quidem harum quia nemo consequuntur

    Mac OS X 10.15.7   Safari  回复 删除 垃圾
  2. 头像
    35.01hst8vwnr6smmvbgrc309abhp@mail4u.fun
    XX ·MacOS · Safari
    俄罗斯 俄罗斯 板凳

    id esse adipisci sed aut ea architecto quas. fuga eius est sint velit praesentium labore autem omnis facere delectus commodi voluptatem sed repudiandae quaerat quaerat eum velit aliquid.

    Mac OS X 10.15.7   Safari  回复 删除 垃圾
  3. 头像
    32.01hst8vwnr6smmvbgrc309abhp@mail4u.lt
    XX ·MacOS · Safari
    俄罗斯 俄罗斯 地毯

    doloribus ullam dolorem explicabo ipsa commodi quibusdam. quasi nulla qui illum ipsum vel quis.

    Mac OS X 10.15.7   Safari  回复 删除 垃圾
  4. 头像
    light
    上海 ·Windows 10 · Google Chrome
    上海市 电信 上海市 第4楼

    代码这里的cookie值要抓取替换吗

    Windows 10   Microsoft Edge  回复 删除 垃圾
    1. 头像
      博主
      XX ·Windows 10 · Google Chrome
      广东省东莞市 移动 广东省
      @ light

      yes

      Windows 10   Microsoft Edge  回复 删除 垃圾
  5. 头像
    light
    上海 ·Windows 10 · Google Chrome
    上海市 电信 上海市 第5楼
    该回复疑似异常,已被系统拦截!
    Windows 10   Microsoft Edge  回复 删除 垃圾
    1. 头像
      博主
      XX ·Windows 10 · Google Chrome
      广东省东莞市 移动 广东省
      @ light

      是的,需要你自己抓cookie,代码好久之前写的,我也不确定能不能用,你自己试试

      Windows 10   Microsoft Edge  回复 删除 垃圾