dr_py/txt/hipy/两个BT.py
2024-01-13 22:06:57 +08:00

535 lines
21 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File : 两个BT.py
# Author: DaShenHan&道长-----先苦后甜,任凭晚风拂柳颜------
# Author's Blog: https://blog.csdn.net/qq_32394351
# Date : 2024/1/8
import os.path
import sys
sys.path.append('..')
try:
# from base.spider import Spider as BaseSpider
from base.spider import BaseSpider
except ImportError:
from t4.base.spider import BaseSpider
import json
import time
import base64
import re
from pathlib import Path
import io
import tokenize
from urllib.parse import quote
from Crypto.Cipher import AES, PKCS1_v1_5 as PKCS1_cipher
from Crypto.Util.Padding import unpad
"""
配置示例:
t4的配置里ext节点会自动变成api对应query参数extend,但t4的ext字符串不支持路径格式比如./开头或者.json结尾
api里会自动含有ext参数是base64编码后的选中的筛选条件
{
"key":"hipy_t4_两个BT",
"name":"两个BT(hipy_t4)",
"type":4,
"api":"http://192.168.31.49:5707/api/v1/vod/两个BT?api_ext={{host}}/txt/hipy/两个BT.json",
"searchable":1,
"quickSearch":0,
"filterable":1,
"ext":"两个BT"
},
{
"key": "hipy_t3_两个BT",
"name": "两个BT(hipy_t3)",
"type": 3,
"api": "{{host}}/txt/hipy/两个BT.py",
"searchable": 1,
"quickSearch": 0,
"filterable": 1,
"ext": "{{host}}/txt/hipy/两个BT.json"
},
"""
class Spider(BaseSpider): # 元类 默认的元类 type
api: str = 'https://www.bttwo.net'
api_ext_file: str = api + '/movie_bt/'
def getName(self):
return "规则名称如:基础示例"
def init_api_ext_file(self):
"""
这个函数用于初始化py文件对应的json文件用于存筛选规则。
执行此函数会自动生成筛选文件
@return:
"""
ext_file = __file__.replace('.py', '.json')
print(f'ext_file:{ext_file}')
# 全部电影网页: https://www.bttwo.net/movie_bt/
# ==================== 获取全部电影筛选条件 ======================
r = self.fetch(self.api_ext_file)
html = r.text
html = self.html(html)
filter_movie_bt = []
lis = html.xpath('//*[@id="beautiful-taxonomy-filters-tax-movie_bt_cat"]/a')
li_value = []
for li in lis:
li_value.append({
'n': ''.join(li.xpath('./text()')),
'v': ''.join(li.xpath('@cat-url')).replace(self.api, ''),
})
# print(li_value)
filter_movie_bt.append({
"key": "cat",
"name": "地区",
"value": li_value
})
lis = html.xpath('//*[@id="beautiful-taxonomy-filters-tax-movie_bt_year"]/a')
li_value = []
for li in lis:
li_value.append({
'n': ''.join(li.xpath('./text()')),
'v': ''.join(li.xpath('@cat-url')).replace(self.api, ''),
})
# print(li_value)
filter_movie_bt.append({
"key": "year",
"name": "年份",
"value": li_value
})
lis = html.xpath('//*[@id="beautiful-taxonomy-filters-tax-movie_bt_tags"]/a')
li_value = []
for li in lis:
li_value.append({
'n': ''.join(li.xpath('./text()')),
'v': ''.join(li.xpath('@cat-url')).replace(self.api, ''),
})
# print(li_value)
filter_movie_bt.append({
"key": "tags",
"name": "影片类型",
"value": li_value
})
print(filter_movie_bt)
ext_file_dict = {
"movie_bt": filter_movie_bt,
}
with open(ext_file, mode='w+', encoding='utf-8') as f:
f.write(json.dumps(ext_file_dict, ensure_ascii=False))
def init(self, extend=""):
"""
初始化加载extend一般与py文件名同名的json文件作为扩展筛选
@param extend:
@return:
"""
def init_file(ext_file):
"""
根据与py对应的json文件去扩展规则的筛选条件
"""
ext_file = Path(ext_file).as_posix()
if os.path.exists(ext_file):
with open(ext_file, mode='r', encoding='utf-8') as f:
try:
ext_dict = json.loads(f.read())
self.config['filter'].update(ext_dict)
except Exception as e:
print(f'更新扩展筛选条件发生错误:{e}')
ext = self.extend
print(f"============{extend}============")
if isinstance(ext, str):
if ext.startswith('./'):
ext_file = os.path.join(os.path.dirname(__file__), ext)
init_file(ext_file)
elif ext.startswith('http'):
try:
r = self.fetch(ext)
self.config['filter'].update(r.json())
except Exception as e:
print(f'更新扩展筛选条件发生错误:{e}')
elif not ext.startswith('./') and not ext.startswith('http'):
ext_file = os.path.join(os.path.dirname(__file__), './' + ext + '.json')
init_file(ext_file)
# 装载模块,这里只要一个就够了
if isinstance(extend, list):
for lib in extend:
if '.Spider' in str(type(lib)):
self.module = lib
break
def isVideo(self):
"""
返回是否为视频的匹配字符串
@return: None空 reg:正则表达式 js:input js代码
"""
# return 'js:input.includes("https://zf.13to.com/")?true:false'
return 'reg:zf\.13to\.com'
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def homeContent(self, filterable=False):
"""
获取首页分类及筛选数据
@param filterable: 能否筛选跟t3/t4配置里的filterable参数一致
@return:
"""
class_name = '影片库&最新电影&热门下载&本月热门&国产剧&美剧&日韩剧' # 静态分类名称拼接
class_url = 'movie_bt&new-movie&hot&hot-month&zgjun&meiju&jpsrtv' # 静态分类标识拼接
result = {}
classes = []
if all([class_name, class_url]):
class_names = class_name.split('&')
class_urls = class_url.split('&')
cnt = min(len(class_urls), len(class_names))
for i in range(cnt):
classes.append({
'type_name': class_names[i],
'type_id': class_urls[i]
})
result['class'] = classes
if filterable:
result['filters'] = self.config['filter']
return result
def homeVideoContent(self):
"""
首页推荐列表
@return:
"""
r = self.fetch(self.api)
html = r.text
html = self.html(html)
d = []
lis = html.xpath('//*[contains(@class,"leibox")]/ul/li')
print(len(lis))
for li in lis:
d.append({
'vod_name': ''.join(li.xpath('h3//text()')),
'vod_id': ''.join(li.xpath('a/@href')),
'vod_pic': ''.join(li.xpath('.//img//@data-original')),
'vod_remarks': ''.join(li.xpath('.//*[contains(@class,"jidi")]//text()')),
})
result = {
'list': d
}
return result
def categoryContent(self, tid, pg, filterable, extend):
"""
返回一级列表页数据
@param tid: 分类id
@param pg: 当前页数
@param filterable: 能否筛选
@param extend: 当前筛选数据
@return:
"""
page_count = 24 # 默认赋值一页列表24条数据
if tid != 'movie_bt':
url = self.api + f'/{tid}/page/{pg}'
else:
fls = extend.keys() # 哪些刷新数据
url = self.api + f'/{tid}'
if 'cat' in fls:
url += extend['cat']
if 'year' in fls:
url += extend['year']
if 'tags' in fls:
url += extend['tags']
url += f'/page/{pg}'
print(url)
r = self.fetch(url)
html = r.text
html = self.html(html)
d = []
lis = html.xpath('//*[contains(@class,"bt_img")]/ul/li')
# print(len(lis))
for li in lis:
d.append({
'vod_name': ''.join(li.xpath('h3//text()')),
'vod_id': ''.join(li.xpath('a/@href')),
'vod_pic': ''.join(li.xpath('.//img//@data-original')),
'vod_remarks': ''.join(li.xpath('.//*[contains(@class,"hdinfo")]//text()')),
})
result = {
'list': d,
'page': pg,
'pagecount': 9999 if len(d) >= page_count else pg,
'limit': 90,
'total': 999999,
}
return result
def detailContent(self, ids):
"""
返回二级详情页数据
@param ids: 一级传过来的vod_id列表
@return:
"""
vod_id = ids[0]
r = self.fetch(vod_id)
html = r.text
html = self.html(html)
lis = html.xpath('//*[contains(@class,"dytext")]/ul/li')
plis = html.xpath('//*[contains(@class,"paly_list_btn")]/a')
vod = {"vod_id": vod_id,
"vod_name": ''.join(html.xpath('//*[contains(@class,"dytext")]//h1//text()')),
"vod_pic": ''.join(html.xpath('//*[contains(@class,"dyimg")]/img/@src')),
"type_name": ''.join(lis[0].xpath('.//text()')) if len(lis) > 0 else '',
"vod_year": ''.join(lis[2].xpath('.//text()')) if len(lis) > 2 else '',
"vod_area": ''.join(lis[1].xpath('.//text()')) if len(lis) > 1 else '',
"vod_remarks": ''.join(lis[4].xpath('.//text()')) if len(lis) > 4 else '',
"vod_actor": ''.join(lis[7].xpath('.//text()')) if len(lis) > 7 else '',
"vod_director": ''.join(lis[5].xpath('.//text()')) if len(lis) > 5 else '',
"vod_content": ''.join(html.xpath('//*[contains(@class,"yp_context")]/p//text()')),
"vod_play_from": '在线播放',
"vod_play_url": '选集播放1$1.mp4#选集播放2$2.mp4$$$选集播放3$3.mp4#选集播放4$4.mp4'}
vod_play_urls = []
for pli in plis:
vname = ''.join(pli.xpath('./text()'))
vurl = ''.join(pli.xpath('./@href'))
vod_play_urls.append(vname + '$' + vurl)
vod['vod_play_url'] = '#'.join(vod_play_urls)
result = {
'list': [vod]
}
return result
def searchContent(self, wd, quick=False, pg=1):
"""
返回搜索列表
@param wd: 搜索关键词
@param quick: 是否来自快速搜索。t3/t4配置里启用了快速搜索在快速搜索在执行才会是True
@return:
"""
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36",
"Host": "www.bttwo.net",
"Referer": self.api
}
url = f'{self.api}/xssearch?q={quote(wd)}'
r = self.fetch(url, headers=headers)
cookies = ['myannoun=1']
for key, value in r.headers.items():
if str(key).lower() == 'set-cookie':
cookies.append(value.split(';')[0])
new_headers = {
'Cookie': ';'.join(cookies),
# 'Pragma': 'no-cache',
# 'Origin': 'https://www.bttwo.net',
# 'Referer': url,
# 'Sec-Ch-Ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
# 'Sec-Ch-Ua-Mobile': '?0',
# 'Sec-Ch-Ua-Platform': '"Windows"',
# 'Sec-Fetch-Dest': 'document',
# 'Sec-Fetch-Mode': 'navigate',
# 'Sec-Fetch-Site': 'same-origin',
# 'Sec-Fetch-User': '?1',
# 'Upgrade-Insecure-Requests': '1',
}
headers.update(new_headers)
# print(headers)
html = self.html(r.text)
captcha = ''.join(html.xpath('//*[@class="erphp-search-captcha"]/form/text()')).strip()
# print('验证码:', captcha)
answer = self.eval_computer(captcha)
# print('回答:', captcha, answer)
data = {'result': str(answer)}
# print('待post数据:', data)
self.post(url, data=data, headers=headers, cookies=None)
r = self.fetch(url, headers=headers)
# print(r.text)
html = self.html(r.text)
lis = html.xpath('//*[contains(@class,"search_list")]/ul/li')
print('搜索结果数:', len(lis))
d = []
if len(lis) < 1:
d.append({
'vod_name': wd,
'vod_id': 'index.html',
'vod_pic': 'https://gitee.com/CherishRx/imagewarehouse/raw/master/image/13096725fe56ce9cf643a0e4cd0c159c.gif',
'vod_remarks': '测试搜索',
})
else:
for li in lis:
d.append({
'vod_name': ''.join(li.xpath('h3//text()')),
'vod_id': ''.join(li.xpath('a/@href')),
'vod_pic': ''.join(li.xpath('a/img/@data-original')),
'vod_remarks': ''.join(li.xpath('p//text()')),
})
result = {
'list': d
}
# print(result)
return result
def playerContent(self, flag, id, vipFlags):
"""
解析播放,返回json。壳子视情况播放直链或进行嗅探
@param flag: vod_play_from 播放来源线路
@param id: vod_play_url 播放的链接
@param vipFlags: vip标识
@return:
"""
headers = {
'User-Agent': 'Mozilla/5.0 (Linux Android 11 M2007J3SC Build/RKQ1.200826.002 wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/99.0.4844.48 Mobile Safari/537.36',
'Referer': id,
}
return {
'parse': 1, # 1=嗅探,0=播放
'playUrl': '', # 解析链接
'url': id, # 直链或待嗅探地址
'header': headers, # 播放UA
}
r = self.fetch(id)
html = r.text
text = html.split('window.wp_nonce=')[1].split('eval')[0]
# print(text)
code = self.regStr(text, 'var .*?=.*?"(.*?)"')
key = self.regStr(text, 'var .*?=md5.enc.Utf8.parse\\("(.*?)"')
iv = self.regStr(text, 'var iv=.*?\\((\\d+)')
text = self.aes_cbs_decode(code, key, iv)
# print(code)
# print(key,iv)
# print(text)
url = self.regStr(text, 'url: "(.*?)"')
# print(url)
parse = 0
headers = {
'User-Agent': 'Mozilla/5.0 (Linux Android 11 M2007J3SC Build/RKQ1.200826.002 wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/99.0.4844.48 Mobile Safari/537.36',
'Referer': url,
}
result = {
'parse': parse, # 1=嗅探,0=播放
'playUrl': '', # 解析链接
'url': url, # 直链或待嗅探地址
'header': headers, # 播放UA
}
print(result)
return result
config = {
"player": {},
"filter": {}
}
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36",
"Host": "www.bttwo.net",
"Referer": "https://www.bttwo.net/"
}
def localProxy(self, params):
return [200, "video/MP2T", ""]
# -----------------------------------------------自定义函数-----------------------------------------------
def eval_computer(self, text):
"""
自定义的字符串安全计算器
@param text:字符串的加减乘除
@return:计算后得到的值
"""
localdict = {}
self.safe_eval(f'ret={text.replace("=", "")}', localdict)
ret = localdict.get('ret') or None
return ret
def safe_eval(self, code: str = '', localdict: dict = None):
code = code.strip()
if not code:
return {}
if localdict is None:
localdict = {}
builtins = __builtins__
if not isinstance(builtins, dict):
builtins = builtins.__dict__.copy()
else:
builtins = builtins.copy()
for key in ['__import__', 'eval', 'exec', 'globals', 'dir', 'copyright', 'open', 'quit']:
del builtins[key] # 删除不安全的关键字
# print(builtins)
global_dict = {'__builtins__': builtins,
'json': json, 'print': print,
're': re, 'time': time, 'base64': base64
} # 禁用内置函数,不允许导入包
try:
self.check_unsafe_attributes(code)
exec(code, global_dict, localdict)
return localdict
except Exception as e:
return {'error': f'执行报错:{e}'}
# ==================== 静态函数 ======================
@staticmethod
def aes_cbs_decode(ciphertext, key, iv):
# 将密文转换成byte数组
ciphertext = base64.b64decode(ciphertext)
# 构建AES解密器
decrypter = AES.new(key.encode(), AES.MODE_CBC, iv.encode())
# 解密
plaintext = decrypter.decrypt(ciphertext)
# 去除填充
plaintext = unpad(plaintext, AES.block_size)
# 输出明文
# print(plaintext.decode('utf-8'))
return plaintext.decode('utf-8')
@staticmethod
def check_unsafe_attributes(string):
"""
安全检测需要exec执行的python代码
:param string:
:return:
"""
g = tokenize.tokenize(io.BytesIO(string.encode('utf-8')).readline)
pre_op = ''
for toktype, tokval, _, _, _ in g:
if toktype == tokenize.NAME and pre_op == '.' and tokval.startswith('_'):
attr = tokval
msg = "access to attribute '{0}' is unsafe.".format(attr)
raise AttributeError(msg)
elif toktype == tokenize.OP:
pre_op = tokval
if __name__ == '__main__':
from t4.core.loader import t4_spider_init
spider = Spider()
t4_spider_init(spider)
spider.init_api_ext_file() # 生成筛选对应的json文件
# print(spider.homeVideoContent())
# print(spider.categoryContent('movie_bt', 1, True, {}))
# print(spider.searchContent('斗罗大陆'))
# print(spider.detailContent(['https://www.bttwo.net/movie/20107.html']))
# print(spider.playerContent('在线播放', spider.decodeStr('https%3A%2F%2Fwww.bttwo.net%2Fv_play%2FbXZfMzY4Nzgtbm1fMQ%3D%3D.html','utf-8'), None))
# print(spider.playerContent('在线播放', spider.decodeStr('https://www.bttwo.net/v_play/bXZfMTMyNjkwLW5tXzE=.html','utf-8'), None))
# print(spider.playerContent('在线播放', 'https://www.bttwo.net/v_play/bXZfMTMyNjA2LW5tXzE=.html', None))
# ciphertext = '+T77kORPkp6wtgdzcqQgPmUXomqshgO6IfTIGE8/40Iht0nDYW9pcGGUk/1157KS876b7FW1m6JMjPY2G+pwtscUjTcCq2G2NTnAX+1iMIexjK+nfTobgi2qYMtke/sWWe51RH/9IxqvoosAhH4dlN+QT/TIHKFFa6OyFiFp2hlUvPNpukbtZcHHshHMolQc9JmW3av+Js9AcyKDLuoFg9N38jrBidnUadw/9Pog/lsoRXUp7JFhdiVujAIkxTJjabvQXT2jGQS88MY7/kiem5SikAh/D+zVPnwO3E7z87o3GIC4agtWKbjTCfeRsUCGg20fEiEl79YoJAaBofZ67cHYNvjcvu6DPSE1Nf29keNMoZlSCLvJPOzSv1+nBi4aVz4s5M2puSDczFyFPPE6aW4Zpr1tVRstr/RuMPLZoDu2D/p6Znxrvwcgj8N6g997Y8P6jNGhdSdmLaFQNgjJT/4cBV1X8W3UzohaapewK3Zum6lmyzcNRlXHHdoCyM4WNYoEOTjln0oKexGIXEBoGijjTzVpng9eGAjMyjYoPKAC0ZCAPTMv94UlLRruUbEtCxlMN0AYzNB2mC/otT6bu/063/ECzCvBS7LjJuamYX+2zsSomIUMiNzfx4S4/ZY9M8tGdVclNKKCzCQ+ovWUPMvEtKDW+g/qUdfx8a/cXMYkEeR66D5ChMGlEVwayytjjJDn4a0/4SxpcOkNVwRMFfhyuFNAPyS65m7ieJe+r5QuwlMa67DwQdBRkw4t2bmt3CXU+qPvfeCchNcVKjHPAwWaHbI3NGN+/4sZ5aa9aLV/r0jIwL8ThWHwbbvox/VCfCLtrtNX1JW7VPnqHudvuqDb2VE5nYPU96VdNGUoGSNUJraXPQ2J1YG0x6DKOznfPiwrK6pD0emY3mtCQcN1UB62q0nTvavI3GBpFKd5y9w4idS+pjHBpdedL4lFc9ynq9oYNgd4xuGNj35a+SgZfdR7DqiaxIU9kDA1yW5nzOw05ui0h8TbPWJX9YypLm/CZu5AQxkS92gbzxXYGwjBrEqqgrAoWFxAUb1FsU5WZZl4+soOYbbKUwSe4zXj+agwpSQs6XuV+b4OKB9GOLYlxSxrLMPnGGBObl8qHmren1Drdw3UtF55MEgV402fvj/ClPCeWIlgUaZdD2c802qd8cc9lzTEwyuLUVvtfrMGCxJV1tbe0w4i+WFVaxXX/cIfzQ7QNxUHfYNDW/zp80f5jaL9zbbPo3aKUroWrhlsM7ecT1M78PG4orVC3stAoNRo3mURlHQepkjVvaiufvxb2Zf/ofao9ou1vlHN0+CFyM8vCRLnH1zY3E3gyCGHMJCPAiRyZGOMIsECw5w/+K+FkcLWBTz9CnYCcIsyIaQGUyoMecYE+RZSbYYoC5xhI18xzZZZ1UJCjnKJRhdAumb5y3aAnOOX5Hj2KL6CD3PmPbSzE08ihcwxaRbME+2/zIxErr1j0MJmSvHBi9L1KCfGhizwFtJmu0MG0laGskYJflJUsIJE9BmuG7GCvCl4CKHYueKgpGn0ogd5QVDg5F/R3/tinEcw4n1Re0qlhKKyKhg8rCnOigAZCgET68/EOSMLxTlP4wY3Jtts12Zc5bL1MB6HkANlbwGryiiej4I8HmoH13AaS65cWmfZw9bJ4PffJYdhyns0qScbzGxQBiwJHZn7/mO6Yc7c0bfrevUeM4HogAHZTZYd7QIeH5ehmEUnPHv11GXtVJcN4sHhaaxDA4RVV5aN+4vRA3OgUhbuqebYcB5rVuMx7t3fw5kwQzQP7lnkPcXjjCLrLueCYyWJgUAKHi5TrAS9YtgHaIOA1lH0dIKAq+V8SoZPBxjxPr7AywT0d8qZc321NCbavu4voMZfh5ylrAuP7hYe1n9qGCFwZ/mQUoYLhPW0T6t3zmLEJgI9S0vm8SE0Z7BHam8O1P4xD9gFk/O1AumNs9rxFQT+exE+pZKJPKDXAgfEG11oUuB8sW/cgEwRZeLy3J543uWVS/LWY08SbVovKVWaTzm8JVGlwz2puLt5amzTLKUc'
# key = 'ae05c73de8a193cf'
# iv = '1234567890983456'
# print(spider.aes_cbs_decode(ciphertext, key, iv))