qushuiyin/qushuiyin.py

111 lines
3.9 KiB
Python

import re
import json
import requests
proxy_url = "https://qsyapi.wishpal.cn/proxy?url="
class Video:
def douyin(self, url):
# Extract ID from URL
id_match = self.extract_id(url)
if not id_match:
return {"code": 400, "msg": "Unable to parse video ID"}
video_id = id_match
# Construct request headers
headers = {
"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1 Edg/122.0.0.0"
}
# Send request to get video info
response = requests.get(
f"https://www.iesdouyin.com/share/video/{video_id}", headers=headers
)
pattern = r"window\._ROUTER_DATA\s*=\s*(.*?)</script>"
matches = re.search(pattern, response.text, re.DOTALL)
if not matches:
return {"code": 201, "msg": "Parsing failed"}
video_info = json.loads(matches.group(1).strip())
if "loaderData" not in video_info:
return {"code": 201, "msg": "Parsing failed"}
video_res_url = video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
"item_list"
][0]["video"]["play_addr"]["url_list"][0]
video_res_url = video_res_url.replace("playwm", "play")
# 处理重定向
response2 = requests.get(video_res_url, allow_redirects=True)
final_video_res_url = response2.url
return {
"code": 200,
"msg": "Parsing successful",
"data": {
"author": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
"item_list"
][0]["author"]["nickname"],
"uid": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
"item_list"
][0]["author"]["unique_id"],
"avatar": proxy_url
+ video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
"item_list"
][0]["author"]["avatar_medium"]["url_list"][0],
"like": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
"item_list"
][0]["statistics"]["digg_count"],
"time": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
"item_list"
][0]["create_time"],
"title": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
"item_list"
][0]["desc"],
"cover": proxy_url
+ video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
"item_list"
][0]["video"]["cover"]["url_list"][0],
"url": proxy_url + final_video_res_url,
"music": {
"author": video_info["loaderData"]["video_(id)/page"][
"videoInfoRes"
]["item_list"][0]["music"]["author"],
"avatar": proxy_url
+ video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
"item_list"
][0]["music"]["cover_large"]["url_list"][0],
},
},
}
def extract_id(self, url):
try:
response = requests.head(url, allow_redirects=True)
final_url = response.url
id_match = re.search(r"/(\d+)", final_url)
return id_match.group(1) if id_match else None
except Exception as e:
return None
def get_raw_url(share_url):
regex = r"http[s]?://[\w.]+[\w\/]*[\w.]*\??[\w=&:\-\+\%]*[/]*"
match = re.search(regex, share_url)
if match:
url = match.group(0)
return url
else:
return None
def clean_mask(share_url):
raw_url = get_raw_url(share_url)
video = Video()
result = video.douyin(raw_url)
print(result)
return result