qushuiyin/qushuiyin.py

132 lines
4.9 KiB
Python

import re
import json
import requests
class Video:
def pipixia(self, url):
loc = requests.head(url).headers.get("Location")
id_match = re.search(r"item/(.*)\?", loc)
if id_match:
item_id = id_match.group(1)
response = requests.get(
f"https://is.snssdk.com/bds/cell/detail/?cell_type=1&aid=1319&app_name=super&cell_id={item_id}"
)
arr = response.json()
video_url = arr["data"]["data"]["item"]["origin_video_download"][
"url_list"
][0]["url"]
if video_url:
return {
"code": 200,
"data": {
"author": arr["data"]["data"]["item"]["author"]["name"],
"avatar": arr["data"]["data"]["item"]["author"]["avatar"][
"download_list"
][0]["url"],
"time": arr["data"]["data"]["display_time"],
"title": arr["data"]["data"]["item"]["content"],
"cover": arr["data"]["data"]["item"]["cover"]["url_list"][0][
"url"
],
"url": video_url,
},
}
return None
def douyin(self, url):
# Extract ID from URL
id_match = self.extract_id(url)
if not id_match:
return {"code": 400, "msg": "Unable to parse video ID"}
video_id = id_match
# Construct request headers
headers = {
"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1 Edg/122.0.0.0"
}
# Send request to get video info
response = requests.get(
f"https://www.iesdouyin.com/share/video/{video_id}", headers=headers
)
pattern = r"window\._ROUTER_DATA\s*=\s*(.*?)</script>"
matches = re.search(pattern, response.text, re.DOTALL)
if not matches:
return {"code": 201, "msg": "Parsing failed"}
video_info = json.loads(matches.group(1).strip())
if "loaderData" not in video_info:
return {"code": 201, "msg": "Parsing failed"}
video_res_url = video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
"item_list"
][0]["video"]["play_addr"]["url_list"][0]
video_res_url = video_res_url.replace("playwm", "play")
return {
"code": 200,
"msg": "Parsing successful",
"data": {
"author": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
"item_list"
][0]["author"]["nickname"],
"uid": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
"item_list"
][0]["author"]["unique_id"],
"avatar": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
"item_list"
][0]["author"]["avatar_medium"]["url_list"][0],
"like": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
"item_list"
][0]["statistics"]["digg_count"],
"time": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
"item_list"
][0]["create_time"],
"title": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
"item_list"
][0]["desc"],
"cover": video_info["loaderData"]["video_(id)/page"]["videoInfoRes"][
"item_list"
][0]["video"]["cover"]["url_list"][0],
"url": video_res_url,
"music": {
"author": video_info["loaderData"]["video_(id)/page"][
"videoInfoRes"
]["item_list"][0]["music"]["author"],
"avatar": video_info["loaderData"]["video_(id)/page"][
"videoInfoRes"
]["item_list"][0]["music"]["cover_large"]["url_list"][0],
},
},
}
def extract_id(self, url):
try:
response = requests.head(url, allow_redirects=True)
final_url = response.url
id_match = re.search(r"/(\d+)", final_url)
return id_match.group(1) if id_match else None
except Exception as e:
return None
def get_raw_url(share_url):
regex = r"http[s]?://[\w.]+[\w\/]*[\w.]*\??[\w=&:\-\+\%]*[/]*"
match = re.search(regex, share_url)
if match:
url = match.group(0)
return url
else:
return None
def clean_mask(share_url):
raw_url = get_raw_url(share_url)
video = Video()
result = video.douyin(raw_url)
print(result)
return result