import re import json import requests proxy_url = "https://qsyapi.wishpal.cn/proxy?url=" class Video: def douyin(self, url): # Extract ID from URL id_match = self.extract_id(url) if not id_match: return {"code": 400, "msg": "Unable to parse video ID"} video_id = id_match # Construct request headers headers = { "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1 Edg/122.0.0.0" } # Send request to get video info response = requests.get( f"https://www.iesdouyin.com/share/video/{video_id}", headers=headers ) pattern = r"window\._ROUTER_DATA\s*=\s*(.*?)" matches = re.search(pattern, response.text, re.DOTALL) if not matches: return {"code": 201, "msg": "Parsing failed"} video_info = json.loads(matches.group(1).strip()) if "loaderData" not in video_info: return {"code": 201, "msg": "Parsing failed"} video_res_url = video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][ "item_list" ][0]["video"]["play_addr"]["url_list"][0] video_res_url = video_res_url.replace("playwm", "play") # 处理重定向 response2 = requests.get(video_res_url, allow_redirects=True) final_video_res_url = response2.url return { "code": 200, "msg": "Parsing successful", "data": { "author": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][ "item_list" ][0]["author"]["nickname"], "uid": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][ "item_list" ][0]["author"]["unique_id"], "avatar": proxy_url + video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][ "item_list" ][0]["author"]["avatar_medium"]["url_list"][0], "like": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][ "item_list" ][0]["statistics"]["digg_count"], "time": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][ "item_list" ][0]["create_time"], "title": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][ "item_list" ][0]["desc"], "cover": proxy_url + video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][ "item_list" ][0]["video"]["cover"]["url_list"][0], "url": proxy_url + final_video_res_url, "music": { "author": video_info["loaderData"]["video_(id)/page"][ "videoInfoRes" ]["item_list"][0]["music"]["author"], "avatar": proxy_url + video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][ "item_list" ][0]["music"]["cover_large"]["url_list"][0], }, }, } def extract_id(self, url): try: response = requests.head(url, allow_redirects=True) final_url = response.url id_match = re.search(r"/(\d+)", final_url) return id_match.group(1) if id_match else None except Exception as e: return None def get_raw_url(share_url): regex = r"http[s]?://[\w.]+[\w\/]*[\w.]*\??[\w=&:\-\+\%]*[/]*" match = re.search(regex, share_url) if match: url = match.group(0) return url else: return None def clean_mask(share_url): raw_url = get_raw_url(share_url) video = Video() result = video.douyin(raw_url) print(result) return result