dr_py/txt/hipy/新浪资源.py

# coding=utf-8
# !/usr/bin/python
import sys

sys.path.append('..')
try:
    # from base.spider import Spider as BaseSpider
    from base.spider import BaseSpider
except ImportError:
    from t4.base.spider import BaseSpider
import time
import re
from urllib import request, parse
import urllib
import urllib.request
from xml.etree.ElementTree import fromstring, ElementTree as et

"""
配置示例:
t4的配置里ext节点会自动变成api对应query参数extend,但t4的ext字符串不支持路径格式，比如./开头或者.json结尾
api里会自动含有ext参数是base64编码后的选中的筛选条件
 {
    "key":"hipy_t4_新浪资源",
    "name":"新浪资源(hipy_t4)",
    "type":4,
    "api":"http://192.168.31.49:5707/api/v1/vod/新浪资源",
    "searchable":1,
    "quickSearch":0,
    "filterable":1,
    "ext":""
},
{
    "key": "hipy_t3_新浪资源",
    "name": "新浪资源(hipy_t3)",
    "type": 3,
    "api": "{{host}}/txt/hipy/新浪资源.py",
    "searchable": 1,
    "quickSearch": 0,
    "filterable": 1,
    "ext": ""
},
"""


class Spider(BaseSpider):  # 元类 默认的元类 type
    def getName(self):
        return "新浪资源"  # 除去少儿不宜的内容

    filterate = False

    def init(self, extend=""):
        print("============{0}============".format(extend))
        pass

    def isVideoFormat(self, url):
        pass

    def manualVideoCheck(self):
        pass

    def homeContent(self, filter):
        result = {}
        timeClass = time.localtime(time.time())
        cateManual = {
            '动漫': '3',
            '动漫电影': '17',
            '综艺': '4',
            '纪录片': '5',
            '动作片': '6',
            '爱情片': '7',
            '科幻片': '8',
            '战争片': '9',
            '剧情片': '10',
            '恐怖片': '11',
            '喜剧片': '12',
            '大陆剧': '13',
            '港澳剧': '14',
            '台湾剧': '15',
            '欧美剧': '16',
            '韩剧': '18',
            '日剧': '20',
            '泰剧': '21',
            '体育': '23'
        }
        # if timeClass.tm_hour>22:
        # 	cateManual['伦理片']='22'
        # 	self.filterate=False
        classes = []
        for k in cateManual:
            classes.append({
                'type_name': k,
                'type_id': cateManual[k]
            })
        result['class'] = classes
        if (filter):
            result['filters'] = self.config['filter']
        return result

    def homeVideoContent(self):
        xmlTxt = self.custom_webReadFile(
            urlStr='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&h=24')
        tree = et(fromstring(xmlTxt))
        root = tree.getroot()
        listXml = root.iter('list')
        videos = self.custom_list(html=listXml)
        result = {
            'list': videos
        }
        return result

    def categoryContent(self, tid, pg, filter, extend):
        result = {}
        videos = []
        pagecount = 1
        limit = 20
        total = 9999
        Url = 'https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&t={0}&pg={1}'.format(
            tid, pg)
        xmlTxt = self.custom_webReadFile(urlStr=Url)
        tree = et(fromstring(xmlTxt))
        root = tree.getroot()
        listXml = root.iter('list')
        for vod in listXml:
            pagecount = vod.attrib['pagecount']
            limit = vod.attrib['pagesize']
            total = vod.attrib['recordcount']
        videos = self.custom_list(html=root.iter('list'))
        result['list'] = videos
        result['page'] = pg
        result['pagecount'] = pagecount
        result['limit'] = limit
        result['total'] = total
        return result

    def detailContent(self, array):
        result = {}
        aid = array[0].split('###')
        id = aid[1]
        logo = aid[2]
        title = aid[0]
        vod_play_from = ['播放线路', ]
        vod_year = ''
        vod_actor = ''
        vod_content = ''
        vod_director = ''
        type_name = ''
        vod_area = ''
        vod_lang = ''
        vodItems = []
        vod_play_url = []
        try:
            url = 'https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=detail&ids=' + id
            xmlTxt = self.custom_webReadFile(urlStr=url)
            jRoot = et(fromstring(xmlTxt))
            xmlList = jRoot.iter('list')
            for vod in xmlList:
                for x in vod:
                    for v in x:
                        if v.tag == 'actor':
                            vod_actor = v.text
                        if v.tag == 'director':
                            vod_director = v.text
                        if v.tag == 'des':
                            vod_content = v.text
                        if v.tag == 'area':
                            vod_area = v.text
                        if v.tag == 'year':
                            vod_year = v.text
                        if v.tag == 'type':
                            type_name = v.text
                        if v.tag == 'lang':
                            vod_lang = v.text

            temporary = self.custom_RegexGetText(Text=xmlTxt, RegexText=r'<dd flag="xlyun">(.+?)</dd>', Index=1)
            temporary = temporary.replace('<![CDATA[', '').replace(']]>', '')
            vodItems = self.custom_EpisodesList(temporary)
            joinStr = "#".join(vodItems)
            vod_play_url.append(joinStr)
        except:
            pass
        vod = {
            "vod_id": array[0],
            "vod_name": title,
            "vod_pic": logo,
            "type_name": type_name,
            "vod_year": vod_year,
            "vod_area": vod_area,
            "vod_remarks": vod_lang,
            "vod_actor": vod_actor,
            "vod_director": vod_director,
            "vod_content": vod_content
        }
        vod['vod_play_from'] = "$$$".join(vod_play_from)
        vod['vod_play_url'] = "$$$".join(vod_play_url)
        result = {
            'list': [
                vod
            ]
        }
        if self.filterate == True and self.custom_RegexGetText(Text=type_name, RegexText=r'(伦理|倫理|福利)',
                                                               Index=1) != '':
            result = {'list': []}
        return result

    def searchContent(self, key, quick, pg=1):
        Url = 'https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&wd={0}&pg={1}'.format(
            urllib.parse.quote(key), '1')
        xmlTxt = self.custom_webReadFile(urlStr=Url)
        tree = et(fromstring(xmlTxt))
        root = tree.getroot()
        listXml = root.iter('list')
        videos = self.custom_list(html=listXml)
        result = {
            'list': videos
        }
        return result

    def playerContent(self, flag, id, vipFlags):
        result = {}
        parse = 1
        url = id
        htmlTxt = self.custom_webReadFile(urlStr=url, header=self.header)
        url = self.custom_RegexGetText(Text=htmlTxt, RegexText=r'(https{0,1}://.+?\.m3u8)', Index=1)
        if url.find('.m3u8') < 1:
            url = id
            parse = 0
        result["parse"] = parse  # 0=直接播放、1=嗅探
        result["playUrl"] = ''
        result["url"] = url
        result['jx'] = 0  # VIP解析,0=不解析、1=解析
        result["header"] = ''
        return result

    config = {
        "player": {},
        "filter": {}
    }
    header = {}

    def localProxy(self, params):
        return [200, "video/MP2T", ""]

    # -----------------------------------------------自定义函数-----------------------------------------------
    # 正则取文本
    def custom_RegexGetText(self, Text, RegexText, Index):
        returnTxt = ""
        Regex = re.search(RegexText, Text, re.M | re.S)
        if Regex is None:
            returnTxt = ""
        else:
            returnTxt = Regex.group(Index)
        return returnTxt

    # 分类取结果
    def custom_list(self, html):
        ListRe = html
        videos = []
        temporary = []
        for vod in ListRe:
            for value in vod:
                for x in value:

                    if x.tag == 'name':
                        title = x.text
                    if x.tag == 'id':
                        id = x.text
                    if x.tag == 'type':
                        tid = x.text
                    if x.tag == 'last':
                        last = x.text
                temporary.append({
                    "name": title,
                    "id": id,
                    "last": last
                })

        if len(temporary) > 0:
            idTxt = ''
            for vod in temporary:
                idTxt = idTxt + vod['id'] + ','
            if len(idTxt) > 1:
                idTxt = idTxt[0:-1]
                url = 'https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=detail&ids=' + idTxt
                xmlTxt = self.custom_webReadFile(urlStr=url)
                jRoot = et(fromstring(xmlTxt))
                xmlList = jRoot.iter('list')
                for vod in xmlList:
                    for x in vod:
                        for v in x:
                            if v.tag == 'name':
                                title = v.text
                            if v.tag == 'id':
                                vod_id = v.text
                            if v.tag == 'pic':
                                img = v.text
                            if v.tag == 'note':
                                remarks = v.text
                            if v.tag == 'year':
                                vod_year = v.text
                            if v.tag == 'type':
                                type_name = v.text
                        if self.filterate == True and self.custom_RegexGetText(Text=type_name,
                                                                               RegexText=r'(伦理|倫理|福利)',
                                                                               Index=1) != '':
                            continue
                        vod_id = '{0}###{1}###{2}'.format(title, vod_id, img)
                        # vod_id='{0}###{1}###{2}###{3}###{4}###{5}###{6}###{7}###{8}###{9}###{10}'.format(title,vod_id,img,vod_actor,vod_director,'/'.join(type_name),'/'.join(vod_time),'/'.join(vod_area),vod_lang,vod_content,vod_play_url)
                        # print(vod_id)
                        videos.append({
                            "vod_id": vod_id,
                            "vod_name": title,
                            "vod_pic": img,
                            "vod_year": vod_year,
                            "vod_remarks": remarks
                        })
        return videos

    # 访问网页
    def custom_webReadFile(self, urlStr, header=None, codeName='utf-8'):
        html = ''
        if header == None:
            header = {
                "Referer": urlStr,
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36',
                "Host": self.custom_RegexGetText(Text=urlStr, RegexText='https*://(.*?)(/|$)', Index=1)
            }
        # import ssl
        # ssl._create_default_https_context = ssl._create_unverified_context#全局取消证书验证
        req = urllib.request.Request(url=urlStr, headers=header)  # ,headers=header
        with  urllib.request.urlopen(req) as response:
            html = response.read().decode(codeName)
        return html

    # 取剧集区
    def custom_lineList(self, Txt, mark, after):
        circuit = []
        origin = Txt.find(mark)
        while origin > 8:
            end = Txt.find(after, origin)
            circuit.append(Txt[origin:end])
            origin = Txt.find(mark, end)
        return circuit

    # 正则取文本,返回数组
    def custom_RegexGetTextLine(self, Text, RegexText, Index):
        returnTxt = []
        pattern = re.compile(RegexText, re.M | re.S)
        ListRe = pattern.findall(Text)
        if len(ListRe) < 1:
            return returnTxt
        for value in ListRe:
            returnTxt.append(value)
        return returnTxt

    # 取集数
    def custom_EpisodesList(self, html):
        ListRe = html.split('#')
        videos = []
        for vod in ListRe:
            t = vod.split('$')
            url = t[1]
            title = t[0]
            if len(url) == 0:
                continue
            videos.append(title + "$" + url)
        return videos

    # 取分类
    def custom_classification(self):
        xmlTxt = self.custom_webReadFile(
            urlStr='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/')
        tree = et(fromstring(xmlTxt))
        root = tree.getroot()
        classXml = root.iter('class')
        temporaryClass = {}
        for vod in classXml:
            for value in vod:
                if self.custom_RegexGetText(Text=value.text, RegexText=r'(福利|倫理片|伦理片)', Index=1) != '':
                    continue
                temporaryClass[value.text] = value.attrib['id']
                print("'{0}':'{1}',".format(value.text, value.attrib['id']))
        return temporaryClass


if __name__ == '__main__':
    from t4.core.loader import t4_spider_init

    spider = Spider()
    t4_spider_init(spider)
    print(spider.homeContent(True))
    print(spider.homeVideoContent())

# T=Spider()
# T. homeContent(filter=False)
# T.custom_classification()
# l=T.homeVideoContent()
# l=T.searchContent(key='柯南',quick='')
# l=T.categoryContent(tid='22',pg='1',filter=False,extend={})
# for x in l['list']:
# 	print(x['vod_name'])
# mubiao= l['list'][2]['vod_id']
# # print(mubiao)
# playTabulation=T.detailContent(array=[mubiao,])
# # print(playTabulation)
# vod_play_from=playTabulation['list'][0]['vod_play_from']
# vod_play_url=playTabulation['list'][0]['vod_play_url']
# url=vod_play_url.split('$$$')
# vod_play_from=vod_play_from.split('$$$')[0]
# url=url[0].split('$')
# url=url[1].split('#')[0]
# # print(url)
# m3u8=T.playerContent(flag=vod_play_from,id=url,vipFlags=True)
# print(m3u8)