box/02/py/荐片.py
2025-02-05 22:03:12 +08:00

369 lines
12 KiB
Python

"""
作者 凯悦宾馆 🚓 内容均从互联网收集而来 仅供交流学习使用 版权归原创者所有 如侵犯了您的权益 请通知作者 将及时删除侵权内容
====================kaiyuebinguan====================
"""
from Crypto.Util.Padding import unpad
from urllib.parse import unquote
from Crypto.Cipher import ARC4
from base.spider import Spider
from bs4 import BeautifulSoup
import urllib.request
import urllib.parse
import binascii
import requests
import base64
import json
import time
import sys
import re
import os
sys.path.append('..')
xurl = "http://img.shifen.me"
headerx = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36'
}
pm = ''
class Spider(Spider):
global xurl
global headerx
def getName(self):
return "首页"
def init(self, extend):
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def extract_middle_text(self, text, start_str, end_str, pl, start_index1: str = '', end_index2: str = ''):
if pl == 3:
plx = []
while True:
start_index = text.find(start_str)
if start_index == -1:
break
end_index = text.find(end_str, start_index + len(start_str))
if end_index == -1:
break
middle_text = text[start_index + len(start_str):end_index]
plx.append(middle_text)
text = text.replace(start_str + middle_text + end_str, '')
if len(plx) > 0:
purl = ''
for i in range(len(plx)):
matches = re.findall(start_index1, plx[i])
output = ""
for match in matches:
match3 = re.search(r'(?:^|[^0-9])(\d+)(?:[^0-9]|$)', match[1])
if match3:
number = match3.group(1)
else:
number = 0
if 'http' not in match[0]:
output += f"#{'📽️丢丢👉' + match[1]}${number}{xurl}{match[0]}"
else:
output += f"#{'📽️丢丢👉' + match[1]}${number}{match[0]}"
output = output[1:]
purl = purl + output + "$$$"
purl = purl[:-3]
return purl
else:
return ""
else:
start_index = text.find(start_str)
if start_index == -1:
return ""
end_index = text.find(end_str, start_index + len(start_str))
if end_index == -1:
return ""
if pl == 0:
middle_text = text[start_index + len(start_str):end_index]
return middle_text.replace("\\", "")
if pl == 1:
middle_text = text[start_index + len(start_str):end_index]
matches = re.findall(start_index1, middle_text)
if matches:
jg = ' '.join(matches)
return jg
if pl == 2:
middle_text = text[start_index + len(start_str):end_index]
matches = re.findall(start_index1, middle_text)
if matches:
new_list = [f'✨丢丢👉{item}' for item in matches]
jg = '$$$'.join(new_list)
return jg
def homeContent(self, filter):
result = {}
result = {"class": [{"type_id": "5", "type_name": "电影🌠"},
{"type_id": "14", "type_name": "剧集🌠"},
{"type_id": "19", "type_name": "动漫🌠"},
{"type_id": "23", "type_name": "综艺🌠"}]
}
return result
def homeVideoContent(self):
videos = []
try:
xurl1 = "http://42.194.235.17:20000/api/bt/list?genere_id&order&lang&keywords&code=unknownec1280db12795506&category_id=1&limit=24&channel=wandoujia&page=1&sort=update"
detail = requests.get(url=xurl1, headers=headerx)
detail.encoding = "utf-8"
if detail.status_code == 200:
data = detail.json()
for vod in data['data']:
name = vod['title']
id = vod['id']
id = f"http://42.194.235.17:20000/api/node/detail?channel=wandoujia&id={vod['id']}"
pic = vod['images']['poster']
remark = vod['torrents']['zh'][0]['title']
video = {
"vod_id": id,
"vod_name": name,
"vod_pic": pic,
"vod_remarks": remark
}
videos.append(video)
result = {'list': videos}
return result
except:
pass
def categoryContent(self, cid, pg, filter, ext):
result = {}
videos = []
if pg:
page = int(pg)
else:
page = 1
if page == '1':
url = f'http://42.194.235.17:20000/api/bt/list?genere_id&order&lang&keywords&code=unknownec1280db12795506&category_id={cid}&limit=24&channel=wandoujia&page={str(page)}&sort=update'
else:
url = f'http://42.194.235.17:20000/api/bt/list?genere_id&order&lang&keywords&code=unknownec1280db12795506&category_id={cid}&limit=24&channel=wandoujia&page={str(page)}&sort=update'
try:
detail = requests.get(url=url, headers=headerx)
detail.encoding = "utf-8"
if detail.status_code == 200:
data = detail.json()
for vod in data['data']:
name = vod['title']
id = vod['id']
id = f"http://42.194.235.17:20000/api/node/detail?channel=wandoujia&id={id}"
pic = vod['images']['poster']
remark = vod['torrents']['zh'][0]['title']
video = {
"vod_id": id,
"vod_name": name,
"vod_pic": pic,
"vod_remarks": remark
}
videos.append(video)
except:
pass
result = {'list': videos}
result['page'] = pg
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
return result
def detailContent(self, ids):
global pm
did = ids[0]
result = {}
videos = []
purl = ''
detail = requests.get(url=did, headers=headerx)
detail.encoding = "utf-8"
if detail.status_code == 200:
data = detail.json()
content = data['data']['description']
content = content.replace('\u3000', '').replace(' ', '').replace('\r', '').replace('\n', '')
for vod in data['data']['btbo_downlist']:
name = vod['title']
url1 = vod['url']
purl = purl + name + '$' + url1 + '#'
purl = purl[:-1]
videos.append({
"vod_id": did,
"vod_actor": '😸皮皮 😸灰灰',
"vod_director": '😸丢丢',
"vod_content": content,
"vod_play_from": '丢丢专线',
"vod_play_url": purl
})
result['list'] = videos
return result
def playerContent(self, flag, id, vipFlags):
result = {}
result["parse"] = 0
result["playUrl"] = ''
result["url"] = id
result["header"] = headerx
return result
def searchContentPage(self, key, quick, page):
result = {}
videos = []
if not page:
page = '1'
if page == '1':
url = f'http://42.194.235.17:20000/api/video/search?page={str(page)}&key={key}'
else:
url = f'http://42.194.235.17:20000/api/video/search?page={str(page)}&key={key}'
detail = requests.get(url=url, headers=headerx)
detail.encoding = "utf-8"
if detail.status_code == 200:
data = detail.json()
for vod in data['data']:
name = vod['title']
id = vod['id']
id = f"http://42.194.235.17:20000/api/node/detail?channel=wandoujia&id={vod['id']}"
pic = vod['thumbnail']
remark = vod['mask']
video = {
"vod_id": id,
"vod_name": name,
"vod_pic": pic,
"vod_remarks": remark
}
videos.append(video)
result['list'] = videos
result['page'] = page
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
return result
def searchContent(self, key, quick):
return self.searchContentPage(key, quick, '1')
def localProxy(self, params):
if params['type'] == "m3u8":
return self.proxyM3u8(params)
elif params['type'] == "media":
return self.proxyMedia(params)
elif params['type'] == "ts":
return self.proxyTs(params)
return None
"""
=======================================
换行 \n 零个或者多个空格 \s+ 数字型 int 文本型 str 分页{} '年代':'2021'
性能要求高"lxml" 处理不规范的HTML"html5lib" 简单应用"html.parser" 解析XML"xml"
=======================================
/rss/index.xml?wd=爱情&page=1 搜索有验证
/index.php/ajax/suggest?mid=1&wd=爱情&page=1&limit=30 搜索有验证
/index.php/ajax/data?mid=1&tid={cateId}&class={class}&area={area}&page={catePg}&limit=30 分类有验证
/index.php/vod/type/class/{cid}/id/41/page/{str(page)}/year/{NdType}.html 隐藏分类
/{cateId}-{area}-{by}-{class}-{lang}-{letter}---{catePg}---{year}.html
短剧 穿越 古装 仙侠 女频 恋爱 反转 现代 都市 剧情 玄幻 脑洞 悬疑
=======================================
aaa = self.extract_middle_text(res, 'bbb', 'ccc', 0)
aaa = aaa.replace('aaa', '').replace('bbb', '') 替换多余
取头 取尾 (不循环) 截取项 (不循环) 长用于直链 二次截取 0号子程序
aaa =self.extract_middle_text(res, 'bbb', 'ccc',1,'html">(.*?)<')
aaa = aaa.replace('aaa', '').replace('bbb', '') 替换多余
取头 取尾 (不循环) 截取项 (循环) 长用于详情 和2号区别没有$$$ 1号子程序
aaa = self.extract_middle_text(res, 'bbb','ccc', 2,'<span class=".*?" id=".*?">(.*?)</span>')
aaa = aaa.replace('aaa', '').replace('bbb', '') 替换多余
取头 取尾 (不循环) 截取项 (循环) 只能用于线路数组 里面包含$$$ 2号子程序
aaa = self.extract_middle_text(res, 'bbb', 'ccc', 3,'href="(.*?)" class=".*?">(.*?)</a>')
aaa = aaa.replace('aaa', '').replace('bbb', '') 替换多余
取头 取尾 (循环) 截取项 (循环) 长用于播放数组 3号子程序
=======================================
"""
if __name__ == '__main__':
spider_instance = Spider()
# res=spider_instance.homeContent('filter') # 分类🚨
# res = spider_instance.homeVideoContent() # 首页🚨
# res=spider_instance.categoryContent('5', 1, 'filter', {}) # 分页🚨
res = spider_instance.detailContent(['http://42.194.235.17:20000/api/node/detail?channel=wandoujia&id=559366']) # 详情页🚨
# res = spider_instance.playerContent('1', 'http://42.194.235.17:20000/api/nUser/commentList?url_id=176366&page=1&token=', 'vipFlags') # 播放页🚨
# res = spider_instance.searchContentPage('爱情', 'quick', '1') # 搜索页🚨
print(res)