111 lines
3.9 KiB
Python
111 lines
3.9 KiB
Python
import re
|
|
import json
|
|
import requests
|
|
|
|
proxy_url = "https://qsyapi.wishpal.cn/proxy?url="
|
|
|
|
|
|
class Video:
|
|
def douyin(self, url):
|
|
# Extract ID from URL
|
|
id_match = self.extract_id(url)
|
|
if not id_match:
|
|
return {"code": 400, "msg": "Unable to parse video ID"}
|
|
|
|
video_id = id_match
|
|
|
|
# Construct request headers
|
|
headers = {
|
|
"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1 Edg/122.0.0.0"
|
|
}
|
|
|
|
# Send request to get video info
|
|
response = requests.get(
|
|
f"https://www.iesdouyin.com/share/video/{video_id}", headers=headers
|
|
)
|
|
pattern = r"window\._ROUTER_DATA\s*=\s*(.*?)</script>"
|
|
matches = re.search(pattern, response.text, re.DOTALL)
|
|
|
|
if not matches:
|
|
return {"code": 201, "msg": "Parsing failed"}
|
|
|
|
video_info = json.loads(matches.group(1).strip())
|
|
if "loaderData" not in video_info:
|
|
return {"code": 201, "msg": "Parsing failed"}
|
|
|
|
video_res_url = video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
|
|
"item_list"
|
|
][0]["video"]["play_addr"]["url_list"][0]
|
|
video_res_url = video_res_url.replace("playwm", "play")
|
|
|
|
# 处理重定向
|
|
response2 = requests.get(video_res_url, allow_redirects=True)
|
|
final_video_res_url = response2.url
|
|
|
|
return {
|
|
"code": 200,
|
|
"msg": "Parsing successful",
|
|
"data": {
|
|
"author": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
|
|
"item_list"
|
|
][0]["author"]["nickname"],
|
|
"uid": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
|
|
"item_list"
|
|
][0]["author"]["unique_id"],
|
|
"avatar": proxy_url
|
|
+ video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
|
|
"item_list"
|
|
][0]["author"]["avatar_medium"]["url_list"][0],
|
|
"like": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
|
|
"item_list"
|
|
][0]["statistics"]["digg_count"],
|
|
"time": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
|
|
"item_list"
|
|
][0]["create_time"],
|
|
"title": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
|
|
"item_list"
|
|
][0]["desc"],
|
|
"cover": proxy_url
|
|
+ video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
|
|
"item_list"
|
|
][0]["video"]["cover"]["url_list"][0],
|
|
"url": proxy_url + final_video_res_url,
|
|
"music": {
|
|
"author": video_info["loaderData"]["video_(id)/page"][
|
|
"videoInfoRes"
|
|
]["item_list"][0]["music"]["author"],
|
|
"avatar": proxy_url
|
|
+ video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
|
|
"item_list"
|
|
][0]["music"]["cover_large"]["url_list"][0],
|
|
},
|
|
},
|
|
}
|
|
|
|
def extract_id(self, url):
|
|
try:
|
|
response = requests.head(url, allow_redirects=True)
|
|
final_url = response.url
|
|
id_match = re.search(r"/(\d+)", final_url)
|
|
return id_match.group(1) if id_match else None
|
|
except Exception as e:
|
|
return None
|
|
|
|
|
|
def get_raw_url(share_url):
|
|
regex = r"http[s]?://[\w.]+[\w\/]*[\w.]*\??[\w=&:\-\+\%]*[/]*"
|
|
match = re.search(regex, share_url)
|
|
if match:
|
|
url = match.group(0)
|
|
return url
|
|
else:
|
|
return None
|
|
|
|
|
|
def clean_mask(share_url):
|
|
raw_url = get_raw_url(share_url)
|
|
video = Video()
|
|
result = video.douyin(raw_url)
|
|
print(result)
|
|
return result
|