dr_py/txt/hipy/两个BT.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# File  : 两个BT.py
# Author: DaShenHan&道长-----先苦后甜，任凭晚风拂柳颜------
# Author's Blog: https://blog.csdn.net/qq_32394351
# Date  : 2024/1/8

import os.path
import sys

sys.path.append('..')
try:
    # from base.spider import Spider as BaseSpider
    from base.spider import BaseSpider
except ImportError:
    from t4.base.spider import BaseSpider
import json
import time
import base64
import re
from pathlib import Path
import io
import tokenize
from urllib.parse import quote
from Crypto.Cipher import AES, PKCS1_v1_5 as PKCS1_cipher
from Crypto.Util.Padding import unpad

"""
配置示例:
t4的配置里ext节点会自动变成api对应query参数extend,但t4的ext字符串不支持路径格式，比如./开头或者.json结尾
api里会自动含有ext参数是base64编码后的选中的筛选条件
 {
    "key":"hipy_t4_两个BT",
    "name":"两个BT(hipy_t4)",
    "type":4,
    "api":"http://192.168.31.49:5707/api/v1/vod/两个BT?api_ext={{host}}/txt/hipy/两个BT.json",
    "searchable":1,
    "quickSearch":0,
    "filterable":1,
    "ext":"两个BT"
},
{
    "key": "hipy_t3_两个BT",
    "name": "两个BT(hipy_t3)",
    "type": 3,
    "api": "{{host}}/txt/hipy/两个BT.py",
    "searchable": 1,
    "quickSearch": 0,
    "filterable": 1,
    "ext": "{{host}}/txt/hipy/两个BT.json"
},
"""


class Spider(BaseSpider):  # 元类 默认的元类 type
    api: str = 'https://www.bttwo.net'
    api_ext_file: str = api + '/movie_bt/'

    def getName(self):
        return "规则名称如:基础示例"

    def init_api_ext_file(self):
        """
        这个函数用于初始化py文件对应的json文件，用于存筛选规则。
        执行此函数会自动生成筛选文件
        @return:
        """
        ext_file = __file__.replace('.py', '.json')
        print(f'ext_file:{ext_file}')

        # 全部电影网页: https://www.bttwo.net/movie_bt/
        # ==================== 获取全部电影筛选条件 ======================
        r = self.fetch(self.api_ext_file)
        html = r.text
        html = self.html(html)

        filter_movie_bt = []
        lis = html.xpath('//*[@id="beautiful-taxonomy-filters-tax-movie_bt_cat"]/a')
        li_value = []
        for li in lis:
            li_value.append({
                'n': ''.join(li.xpath('./text()')),
                'v': ''.join(li.xpath('@cat-url')).replace(self.api, ''),
            })
        # print(li_value)
        filter_movie_bt.append({
            "key": "cat",
            "name": "地区",
            "value": li_value
        })

        lis = html.xpath('//*[@id="beautiful-taxonomy-filters-tax-movie_bt_year"]/a')
        li_value = []
        for li in lis:
            li_value.append({
                'n': ''.join(li.xpath('./text()')),
                'v': ''.join(li.xpath('@cat-url')).replace(self.api, ''),
            })
        # print(li_value)
        filter_movie_bt.append({
            "key": "year",
            "name": "年份",
            "value": li_value
        })

        lis = html.xpath('//*[@id="beautiful-taxonomy-filters-tax-movie_bt_tags"]/a')
        li_value = []
        for li in lis:
            li_value.append({
                'n': ''.join(li.xpath('./text()')),
                'v': ''.join(li.xpath('@cat-url')).replace(self.api, ''),
            })
        # print(li_value)
        filter_movie_bt.append({
            "key": "tags",
            "name": "影片类型",
            "value": li_value
        })

        print(filter_movie_bt)

        ext_file_dict = {
            "movie_bt": filter_movie_bt,
        }
        with open(ext_file, mode='w+', encoding='utf-8') as f:
            f.write(json.dumps(ext_file_dict, ensure_ascii=False))

    def init(self, extend=""):
        """
        初始化加载extend，一般与py文件名同名的json文件作为扩展筛选
        @param extend:
        @return:
        """

        def init_file(ext_file):
            """
            根据与py对应的json文件去扩展规则的筛选条件
            """
            ext_file = Path(ext_file).as_posix()
            if os.path.exists(ext_file):
                with open(ext_file, mode='r', encoding='utf-8') as f:
                    try:
                        ext_dict = json.loads(f.read())
                        self.config['filter'].update(ext_dict)
                    except Exception as e:
                        print(f'更新扩展筛选条件发生错误:{e}')

        ext = self.extend
        print(f"============{extend}============")
        if isinstance(ext, str):
            if ext.startswith('./'):
                ext_file = os.path.join(os.path.dirname(__file__), ext)
                init_file(ext_file)
            elif ext.startswith('http'):
                try:
                    r = self.fetch(ext)
                    self.config['filter'].update(r.json())
                except Exception as e:
                    print(f'更新扩展筛选条件发生错误:{e}')
            elif not ext.startswith('./') and not ext.startswith('http'):
                ext_file = os.path.join(os.path.dirname(__file__), './' + ext + '.json')
                init_file(ext_file)

        # 装载模块，这里只要一个就够了
        if isinstance(extend, list):
            for lib in extend:
                if '.Spider' in str(type(lib)):
                    self.module = lib
                    break

    def isVideo(self):
        """
        返回是否为视频的匹配字符串
        @return: None空 reg:正则表达式  js:input js代码
        """
        # return 'js:input.includes("https://zf.13to.com/")?true:false'
        return 'reg:zf\.13to\.com'

    def isVideoFormat(self, url):
        pass

    def manualVideoCheck(self):
        pass

    def homeContent(self, filterable=False):
        """
        获取首页分类及筛选数据
        @param filterable: 能否筛选，跟t3/t4配置里的filterable参数一致
        @return:
        """
        class_name = '影片库&最新电影&热门下载&本月热门&国产剧&美剧&日韩剧'  # 静态分类名称拼接
        class_url = 'movie_bt&new-movie&hot&hot-month&zgjun&meiju&jpsrtv'  # 静态分类标识拼接

        result = {}
        classes = []

        if all([class_name, class_url]):
            class_names = class_name.split('&')
            class_urls = class_url.split('&')
            cnt = min(len(class_urls), len(class_names))
            for i in range(cnt):
                classes.append({
                    'type_name': class_names[i],
                    'type_id': class_urls[i]
                })

        result['class'] = classes
        if filterable:
            result['filters'] = self.config['filter']
        return result

    def homeVideoContent(self):
        """
        首页推荐列表
        @return:
        """
        r = self.fetch(self.api)
        html = r.text
        html = self.html(html)
        d = []

        lis = html.xpath('//*[contains(@class,"leibox")]/ul/li')
        print(len(lis))
        for li in lis:
            d.append({
                'vod_name': ''.join(li.xpath('h3//text()')),
                'vod_id': ''.join(li.xpath('a/@href')),
                'vod_pic': ''.join(li.xpath('.//img//@data-original')),
                'vod_remarks': ''.join(li.xpath('.//*[contains(@class,"jidi")]//text()')),
            })
        result = {
            'list': d
        }
        return result

    def categoryContent(self, tid, pg, filterable, extend):
        """
        返回一级列表页数据
        @param tid: 分类id
        @param pg: 当前页数
        @param filterable: 能否筛选
        @param extend: 当前筛选数据
        @return:
        """
        page_count = 24  # 默认赋值一页列表24条数据
        if tid != 'movie_bt':
            url = self.api + f'/{tid}/page/{pg}'
        else:
            fls = extend.keys()  # 哪些刷新数据
            url = self.api + f'/{tid}'
            if 'cat' in fls:
                url += extend['cat']
            if 'year' in fls:
                url += extend['year']
            if 'tags' in fls:
                url += extend['tags']
            url += f'/page/{pg}'
        print(url)

        r = self.fetch(url)
        html = r.text
        html = self.html(html)
        d = []
        lis = html.xpath('//*[contains(@class,"bt_img")]/ul/li')
        # print(len(lis))
        for li in lis:
            d.append({
                'vod_name': ''.join(li.xpath('h3//text()')),
                'vod_id': ''.join(li.xpath('a/@href')),
                'vod_pic': ''.join(li.xpath('.//img//@data-original')),
                'vod_remarks': ''.join(li.xpath('.//*[contains(@class,"hdinfo")]//text()')),
            })

        result = {
            'list': d,
            'page': pg,
            'pagecount': 9999 if len(d) >= page_count else pg,
            'limit': 90,
            'total': 999999,
        }
        return result

    def detailContent(self, ids):
        """
        返回二级详情页数据
        @param ids: 一级传过来的vod_id列表
        @return:
        """
        vod_id = ids[0]
        r = self.fetch(vod_id)
        html = r.text
        html = self.html(html)
        lis = html.xpath('//*[contains(@class,"dytext")]/ul/li')
        plis = html.xpath('//*[contains(@class,"paly_list_btn")]/a')
        vod = {"vod_id": vod_id,
               "vod_name": ''.join(html.xpath('//*[contains(@class,"dytext")]//h1//text()')),
               "vod_pic": ''.join(html.xpath('//*[contains(@class,"dyimg")]/img/@src')),
               "type_name": ''.join(lis[0].xpath('.//text()')) if len(lis) > 0 else '',
               "vod_year": ''.join(lis[2].xpath('.//text()')) if len(lis) > 2 else '',
               "vod_area": ''.join(lis[1].xpath('.//text()')) if len(lis) > 1 else '',
               "vod_remarks": ''.join(lis[4].xpath('.//text()')) if len(lis) > 4 else '',
               "vod_actor": ''.join(lis[7].xpath('.//text()')) if len(lis) > 7 else '',
               "vod_director": ''.join(lis[5].xpath('.//text()')) if len(lis) > 5 else '',
               "vod_content": ''.join(html.xpath('//*[contains(@class,"yp_context")]/p//text()')),
               "vod_play_from": '在线播放',
               "vod_play_url": '选集播放1$1.mp4#选集播放2$2.mp4$$$选集播放3$3.mp4#选集播放4$4.mp4'}
        vod_play_urls = []
        for pli in plis:
            vname = ''.join(pli.xpath('./text()'))
            vurl = ''.join(pli.xpath('./@href'))
            vod_play_urls.append(vname + '$' + vurl)
        vod['vod_play_url'] = '#'.join(vod_play_urls)
        result = {
            'list': [vod]
        }
        return result

    def searchContent(self, wd, quick=False, pg=1):
        """
        返回搜索列表
        @param wd: 搜索关键词
        @param quick: 是否来自快速搜索。t3/t4配置里启用了快速搜索，在快速搜索在执行才会是True
        @return:
        """
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36",
            "Host": "www.bttwo.net",
            "Referer": self.api
        }

        url = f'{self.api}/xssearch?q={quote(wd)}'
        r = self.fetch(url, headers=headers)
        cookies = ['myannoun=1']
        for key, value in r.headers.items():
            if str(key).lower() == 'set-cookie':
                cookies.append(value.split(';')[0])
        new_headers = {
            'Cookie': ';'.join(cookies),
            # 'Pragma': 'no-cache',
            # 'Origin': 'https://www.bttwo.net',
            # 'Referer': url,
            # 'Sec-Ch-Ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
            # 'Sec-Ch-Ua-Mobile': '?0',
            # 'Sec-Ch-Ua-Platform': '"Windows"',
            # 'Sec-Fetch-Dest': 'document',
            # 'Sec-Fetch-Mode': 'navigate',
            # 'Sec-Fetch-Site': 'same-origin',
            # 'Sec-Fetch-User': '?1',
            # 'Upgrade-Insecure-Requests': '1',
        }
        headers.update(new_headers)
        # print(headers)
        html = self.html(r.text)
        captcha = ''.join(html.xpath('//*[@class="erphp-search-captcha"]/form/text()')).strip()
        # print('验证码:', captcha)
        answer = self.eval_computer(captcha)
        # print('回答:', captcha, answer)
        data = {'result': str(answer)}
        # print('待post数据:', data)
        self.post(url, data=data, headers=headers, cookies=None)
        r = self.fetch(url, headers=headers)
        # print(r.text)
        html = self.html(r.text)
        lis = html.xpath('//*[contains(@class,"search_list")]/ul/li')
        print('搜索结果数:', len(lis))
        d = []
        if len(lis) < 1:
            d.append({
                'vod_name': wd,
                'vod_id': 'index.html',
                'vod_pic': 'https://gitee.com/CherishRx/imagewarehouse/raw/master/image/13096725fe56ce9cf643a0e4cd0c159c.gif',
                'vod_remarks': '测试搜索',
            })
        else:
            for li in lis:
                d.append({
                    'vod_name': ''.join(li.xpath('h3//text()')),
                    'vod_id': ''.join(li.xpath('a/@href')),
                    'vod_pic': ''.join(li.xpath('a/img/@data-original')),
                    'vod_remarks': ''.join(li.xpath('p//text()')),
                })
        result = {
            'list': d
        }
        # print(result)
        return result

    def playerContent(self, flag, id, vipFlags):
        """
        解析播放,返回json。壳子视情况播放直链或进行嗅探
        @param flag: vod_play_from 播放来源线路
        @param id: vod_play_url 播放的链接
        @param vipFlags: vip标识
        @return:
        """
        headers = {
            'User-Agent': 'Mozilla/5.0 (Linux；； Android 11；； M2007J3SC Build/RKQ1.200826.002；； wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/99.0.4844.48 Mobile Safari/537.36',
            'Referer': id,
        }
        return {
            'parse': 1,  # 1=嗅探,0=播放
            'playUrl': '',  # 解析链接
            'url': id,  # 直链或待嗅探地址
            'header': headers,  # 播放UA
        }
        r = self.fetch(id)
        html = r.text
        text = html.split('window.wp_nonce=')[1].split('eval')[0]
        # print(text)
        code = self.regStr(text, 'var .*?=.*?"(.*?)"')
        key = self.regStr(text, 'var .*?=md5.enc.Utf8.parse\\("(.*?)"')
        iv = self.regStr(text, 'var iv=.*?\\((\\d+)')
        text = self.aes_cbs_decode(code, key, iv)
        # print(code)
        # print(key,iv)
        # print(text)
        url = self.regStr(text, 'url: "(.*?)"')
        # print(url)
        parse = 0
        headers = {
            'User-Agent': 'Mozilla/5.0 (Linux；； Android 11；； M2007J3SC Build/RKQ1.200826.002；； wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/99.0.4844.48 Mobile Safari/537.36',
            'Referer': url,
        }
        result = {
            'parse': parse,  # 1=嗅探,0=播放
            'playUrl': '',  # 解析链接
            'url': url,  # 直链或待嗅探地址
            'header': headers,  # 播放UA
        }
        print(result)
        return result

    config = {
        "player": {},
        "filter": {}
    }
    header = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36",
        "Host": "www.bttwo.net",
        "Referer": "https://www.bttwo.net/"
    }

    def localProxy(self, params):
        return [200, "video/MP2T", ""]

    # -----------------------------------------------自定义函数-----------------------------------------------
    def eval_computer(self, text):
        """
        自定义的字符串安全计算器
        @param text:字符串的加减乘除
        @return:计算后得到的值
        """
        localdict = {}
        self.safe_eval(f'ret={text.replace("=", "")}', localdict)
        ret = localdict.get('ret') or None
        return ret

    def safe_eval(self, code: str = '', localdict: dict = None):
        code = code.strip()
        if not code:
            return {}
        if localdict is None:
            localdict = {}
        builtins = __builtins__
        if not isinstance(builtins, dict):
            builtins = builtins.__dict__.copy()
        else:
            builtins = builtins.copy()
        for key in ['__import__', 'eval', 'exec', 'globals', 'dir', 'copyright', 'open', 'quit']:
            del builtins[key]  # 删除不安全的关键字
        # print(builtins)
        global_dict = {'__builtins__': builtins,
                       'json': json, 'print': print,
                       're': re, 'time': time, 'base64': base64
                       }  # 禁用内置函数,不允许导入包
        try:
            self.check_unsafe_attributes(code)
            exec(code, global_dict, localdict)
            return localdict
        except Exception as e:
            return {'error': f'执行报错:{e}'}

    # ==================== 静态函数 ======================
    @staticmethod
    def aes_cbs_decode(ciphertext, key, iv):
        # 将密文转换成byte数组
        ciphertext = base64.b64decode(ciphertext)
        # 构建AES解密器
        decrypter = AES.new(key.encode(), AES.MODE_CBC, iv.encode())
        # 解密
        plaintext = decrypter.decrypt(ciphertext)
        # 去除填充
        plaintext = unpad(plaintext, AES.block_size)
        # 输出明文
        # print(plaintext.decode('utf-8'))
        return plaintext.decode('utf-8')

    @staticmethod
    def check_unsafe_attributes(string):
        """
        安全检测需要exec执行的python代码
        :param string:
        :return:
        """
        g = tokenize.tokenize(io.BytesIO(string.encode('utf-8')).readline)
        pre_op = ''
        for toktype, tokval, _, _, _ in g:
            if toktype == tokenize.NAME and pre_op == '.' and tokval.startswith('_'):
                attr = tokval
                msg = "access to attribute '{0}' is unsafe.".format(attr)
                raise AttributeError(msg)
            elif toktype == tokenize.OP:
                pre_op = tokval


if __name__ == '__main__':
    from t4.core.loader import t4_spider_init

    spider = Spider()
    t4_spider_init(spider)
    spider.init_api_ext_file()  # 生成筛选对应的json文件

    # print(spider.homeVideoContent())
    # print(spider.categoryContent('movie_bt', 1, True, {}))
    # print(spider.searchContent('斗罗大陆'))
    # print(spider.detailContent(['https://www.bttwo.net/movie/20107.html']))
    # print(spider.playerContent('在线播放', spider.decodeStr('https%3A%2F%2Fwww.bttwo.net%2Fv_play%2FbXZfMzY4Nzgtbm1fMQ%3D%3D.html','utf-8'), None))
    # print(spider.playerContent('在线播放', spider.decodeStr('https://www.bttwo.net/v_play/bXZfMTMyNjkwLW5tXzE=.html','utf-8'), None))
    # print(spider.playerContent('在线播放', 'https://www.bttwo.net/v_play/bXZfMTMyNjA2LW5tXzE=.html', None))

    # ciphertext = '+T77kORPkp6wtgdzcqQgPmUXomqshgO6IfTIGE8/40Iht0nDYW9pcGGUk/1157KS876b7FW1m6JMjPY2G+pwtscUjTcCq2G2NTnAX+1iMIexjK+nfTobgi2qYMtke/sWWe51RH/9IxqvoosAhH4dlN+QT/TIHKFFa6OyFiFp2hlUvPNpukbtZcHHshHMolQc9JmW3av+Js9AcyKDLuoFg9N38jrBidnUadw/9Pog/lsoRXUp7JFhdiVujAIkxTJjabvQXT2jGQS88MY7/kiem5SikAh/D+zVPnwO3E7z87o3GIC4agtWKbjTCfeRsUCGg20fEiEl79YoJAaBofZ67cHYNvjcvu6DPSE1Nf29keNMoZlSCLvJPOzSv1+nBi4aVz4s5M2puSDczFyFPPE6aW4Zpr1tVRstr/RuMPLZoDu2D/p6Znxrvwcgj8N6g997Y8P6jNGhdSdmLaFQNgjJT/4cBV1X8W3UzohaapewK3Zum6lmyzcNRlXHHdoCyM4WNYoEOTjln0oKexGIXEBoGijjTzVpng9eGAjMyjYoPKAC0ZCAPTMv94UlLRruUbEtCxlMN0AYzNB2mC/otT6bu/063/ECzCvBS7LjJuamYX+2zsSomIUMiNzfx4S4/ZY9M8tGdVclNKKCzCQ+ovWUPMvEtKDW+g/qUdfx8a/cXMYkEeR66D5ChMGlEVwayytjjJDn4a0/4SxpcOkNVwRMFfhyuFNAPyS65m7ieJe+r5QuwlMa67DwQdBRkw4t2bmt3CXU+qPvfeCchNcVKjHPAwWaHbI3NGN+/4sZ5aa9aLV/r0jIwL8ThWHwbbvox/VCfCLtrtNX1JW7VPnqHudvuqDb2VE5nYPU96VdNGUoGSNUJraXPQ2J1YG0x6DKOznfPiwrK6pD0emY3mtCQcN1UB62q0nTvavI3GBpFKd5y9w4idS+pjHBpdedL4lFc9ynq9oYNgd4xuGNj35a+SgZfdR7DqiaxIU9kDA1yW5nzOw05ui0h8TbPWJX9YypLm/CZu5AQxkS92gbzxXYGwjBrEqqgrAoWFxAUb1FsU5WZZl4+soOYbbKUwSe4zXj+agwpSQs6XuV+b4OKB9GOLYlxSxrLMPnGGBObl8qHmren1Drdw3UtF55MEgV402fvj/ClPCeWIlgUaZdD2c802qd8cc9lzTEwyuLUVvtfrMGCxJV1tbe0w4i+WFVaxXX/cIfzQ7QNxUHfYNDW/zp80f5jaL9zbbPo3aKUroWrhlsM7ecT1M78PG4orVC3stAoNRo3mURlHQepkjVvaiufvxb2Zf/ofao9ou1vlHN0+CFyM8vCRLnH1zY3E3gyCGHMJCPAiRyZGOMIsECw5w/+K+FkcLWBTz9CnYCcIsyIaQGUyoMecYE+RZSbYYoC5xhI18xzZZZ1UJCjnKJRhdAumb5y3aAnOOX5Hj2KL6CD3PmPbSzE08ihcwxaRbME+2/zIxErr1j0MJmSvHBi9L1KCfGhizwFtJmu0MG0laGskYJflJUsIJE9BmuG7GCvCl4CKHYueKgpGn0ogd5QVDg5F/R3/tinEcw4n1Re0qlhKKyKhg8rCnOigAZCgET68/EOSMLxTlP4wY3Jtts12Zc5bL1MB6HkANlbwGryiiej4I8HmoH13AaS65cWmfZw9bJ4PffJYdhyns0qScbzGxQBiwJHZn7/mO6Yc7c0bfrevUeM4HogAHZTZYd7QIeH5ehmEUnPHv11GXtVJcN4sHhaaxDA4RVV5aN+4vRA3OgUhbuqebYcB5rVuMx7t3fw5kwQzQP7lnkPcXjjCLrLueCYyWJgUAKHi5TrAS9YtgHaIOA1lH0dIKAq+V8SoZPBxjxPr7AywT0d8qZc321NCbavu4voMZfh5ylrAuP7hYe1n9qGCFwZ/mQUoYLhPW0T6t3zmLEJgI9S0vm8SE0Z7BHam8O1P4xD9gFk/O1AumNs9rxFQT+exE+pZKJPKDXAgfEG11oUuB8sW/cgEwRZeLy3J543uWVS/LWY08SbVovKVWaTzm8JVGlwz2puLt5amzTLKUc'
    # key = 'ae05c73de8a193cf'
    # iv = '1234567890983456'
    # print(spider.aes_cbs_decode(ciphertext, key, iv))