import re import json import requests class Video: def pipixia(self, url): loc = requests.head(url).headers.get("Location") id_match = re.search(r"item/(.*)\?", loc) if id_match: item_id = id_match.group(1) response = requests.get( f"https://is.snssdk.com/bds/cell/detail/?cell_type=1&aid=1319&app_name=super&cell_id={item_id}" ) arr = response.json() video_url = arr["data"]["data"]["item"]["origin_video_download"][ "url_list" ][0]["url"] if video_url: return { "code": 200, "data": { "author": arr["data"]["data"]["item"]["author"]["name"], "avatar": arr["data"]["data"]["item"]["author"]["avatar"][ "download_list" ][0]["url"], "time": arr["data"]["data"]["display_time"], "title": arr["data"]["data"]["item"]["content"], "cover": arr["data"]["data"]["item"]["cover"]["url_list"][0][ "url" ], "url": video_url, }, } return None def douyin(self, url): # Extract ID from URL id_match = self.extract_id(url) if not id_match: return {"code": 400, "msg": "Unable to parse video ID"} video_id = id_match # Construct request headers headers = { "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1 Edg/122.0.0.0" } # Send request to get video info response = requests.get( f"https://www.iesdouyin.com/share/video/{video_id}", headers=headers ) pattern = r"window\._ROUTER_DATA\s*=\s*(.*?)" matches = re.search(pattern, response.text, re.DOTALL) if not matches: return {"code": 201, "msg": "Parsing failed"} video_info = json.loads(matches.group(1).strip()) if "loaderData" not in video_info: return {"code": 201, "msg": "Parsing failed"} video_res_url = video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][ "item_list" ][0]["video"]["play_addr"]["url_list"][0] video_res_url = video_res_url.replace("playwm", "play") return { "code": 200, "msg": "Parsing successful", "data": { "author": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][ "item_list" ][0]["author"]["nickname"], "uid": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][ "item_list" ][0]["author"]["unique_id"], "avatar": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][ "item_list" ][0]["author"]["avatar_medium"]["url_list"][0], "like": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][ "item_list" ][0]["statistics"]["digg_count"], "time": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][ "item_list" ][0]["create_time"], "title": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][ "item_list" ][0]["desc"], "cover": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][ "item_list" ][0]["video"]["cover"]["url_list"][0], "url": video_res_url, "music": { "author": video_info["loaderData"]["video_(id)/page"][ "videoInfoRes" ]["item_list"][0]["music"]["author"], "avatar": video_info["loaderData"]["video_(id)/page"][ "videoInfoRes" ]["item_list"][0]["music"]["cover_large"]["url_list"][0], }, }, } def extract_id(self, url): try: response = requests.head(url, allow_redirects=True) final_url = response.url id_match = re.search(r"/(\d+)", final_url) return id_match.group(1) if id_match else None except Exception as e: return None def get_raw_url(share_url): regex = r"http[s]?://[\w.]+[\w\/]*[\w.]*\??[\w=&:\-\+\%]*[/]*" match = re.search(regex, share_url) if match: url = match.group(0) return url else: return None def clean_mask(share_url): raw_url = get_raw_url(share_url) video = Video() result = video.douyin(raw_url) print(result) return result