# coding=utf-8 # !/usr/bin/python import sys sys.path.append('..') try: # from base.spider import Spider as BaseSpider from base.spider import BaseSpider except ImportError: from t4.base.spider import BaseSpider import time import re from urllib import request, parse import urllib import urllib.request from xml.etree.ElementTree import fromstring, ElementTree as et """ 配置示例: t4的配置里ext节点会自动变成api对应query参数extend,但t4的ext字符串不支持路径格式,比如./开头或者.json结尾 api里会自动含有ext参数是base64编码后的选中的筛选条件 { "key":"hipy_t4_新浪资源", "name":"新浪资源(hipy_t4)", "type":4, "api":"http://192.168.31.49:5707/api/v1/vod/新浪资源", "searchable":1, "quickSearch":0, "filterable":1, "ext":"" }, { "key": "hipy_t3_新浪资源", "name": "新浪资源(hipy_t3)", "type": 3, "api": "{{host}}/txt/hipy/新浪资源.py", "searchable": 1, "quickSearch": 0, "filterable": 1, "ext": "" }, """ class Spider(BaseSpider): # 元类 默认的元类 type def getName(self): return "新浪资源" # 除去少儿不宜的内容 filterate = False def init(self, extend=""): print("============{0}============".format(extend)) pass def isVideoFormat(self, url): pass def manualVideoCheck(self): pass def homeContent(self, filter): result = {} timeClass = time.localtime(time.time()) cateManual = { '动漫': '3', '动漫电影': '17', '综艺': '4', '纪录片': '5', '动作片': '6', '爱情片': '7', '科幻片': '8', '战争片': '9', '剧情片': '10', '恐怖片': '11', '喜剧片': '12', '大陆剧': '13', '港澳剧': '14', '台湾剧': '15', '欧美剧': '16', '韩剧': '18', '日剧': '20', '泰剧': '21', '体育': '23' } # if timeClass.tm_hour>22: # cateManual['伦理片']='22' # self.filterate=False classes = [] for k in cateManual: classes.append({ 'type_name': k, 'type_id': cateManual[k] }) result['class'] = classes if (filter): result['filters'] = self.config['filter'] return result def homeVideoContent(self): xmlTxt = self.custom_webReadFile( urlStr='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&h=24') tree = et(fromstring(xmlTxt)) root = tree.getroot() listXml = root.iter('list') videos = self.custom_list(html=listXml) result = { 'list': videos } return result def categoryContent(self, tid, pg, filter, extend): result = {} videos = [] pagecount = 1 limit = 20 total = 9999 Url = 'https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&t={0}&pg={1}'.format( tid, pg) xmlTxt = self.custom_webReadFile(urlStr=Url) tree = et(fromstring(xmlTxt)) root = tree.getroot() listXml = root.iter('list') for vod in listXml: pagecount = vod.attrib['pagecount'] limit = vod.attrib['pagesize'] total = vod.attrib['recordcount'] videos = self.custom_list(html=root.iter('list')) result['list'] = videos result['page'] = pg result['pagecount'] = pagecount result['limit'] = limit result['total'] = total return result def detailContent(self, array): result = {} aid = array[0].split('###') id = aid[1] logo = aid[2] title = aid[0] vod_play_from = ['播放线路', ] vod_year = '' vod_actor = '' vod_content = '' vod_director = '' type_name = '' vod_area = '' vod_lang = '' vodItems = [] vod_play_url = [] try: url = 'https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=detail&ids=' + id xmlTxt = self.custom_webReadFile(urlStr=url) jRoot = et(fromstring(xmlTxt)) xmlList = jRoot.iter('list') for vod in xmlList: for x in vod: for v in x: if v.tag == 'actor': vod_actor = v.text if v.tag == 'director': vod_director = v.text if v.tag == 'des': vod_content = v.text if v.tag == 'area': vod_area = v.text if v.tag == 'year': vod_year = v.text if v.tag == 'type': type_name = v.text if v.tag == 'lang': vod_lang = v.text temporary = self.custom_RegexGetText(Text=xmlTxt, RegexText=r'
(.+?)
', Index=1) temporary = temporary.replace('', '') vodItems = self.custom_EpisodesList(temporary) joinStr = "#".join(vodItems) vod_play_url.append(joinStr) except: pass vod = { "vod_id": array[0], "vod_name": title, "vod_pic": logo, "type_name": type_name, "vod_year": vod_year, "vod_area": vod_area, "vod_remarks": vod_lang, "vod_actor": vod_actor, "vod_director": vod_director, "vod_content": vod_content } vod['vod_play_from'] = "$$$".join(vod_play_from) vod['vod_play_url'] = "$$$".join(vod_play_url) result = { 'list': [ vod ] } if self.filterate == True and self.custom_RegexGetText(Text=type_name, RegexText=r'(伦理|倫理|福利)', Index=1) != '': result = {'list': []} return result def searchContent(self, key, quick, pg=1): Url = 'https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&wd={0}&pg={1}'.format( urllib.parse.quote(key), '1') xmlTxt = self.custom_webReadFile(urlStr=Url) tree = et(fromstring(xmlTxt)) root = tree.getroot() listXml = root.iter('list') videos = self.custom_list(html=listXml) result = { 'list': videos } return result def playerContent(self, flag, id, vipFlags): result = {} parse = 1 url = id htmlTxt = self.custom_webReadFile(urlStr=url, header=self.header) url = self.custom_RegexGetText(Text=htmlTxt, RegexText=r'(https{0,1}://.+?\.m3u8)', Index=1) if url.find('.m3u8') < 1: url = id parse = 0 result["parse"] = parse # 0=直接播放、1=嗅探 result["playUrl"] = '' result["url"] = url result['jx'] = 0 # VIP解析,0=不解析、1=解析 result["header"] = '' return result config = { "player": {}, "filter": {} } header = {} def localProxy(self, params): return [200, "video/MP2T", ""] # -----------------------------------------------自定义函数----------------------------------------------- # 正则取文本 def custom_RegexGetText(self, Text, RegexText, Index): returnTxt = "" Regex = re.search(RegexText, Text, re.M | re.S) if Regex is None: returnTxt = "" else: returnTxt = Regex.group(Index) return returnTxt # 分类取结果 def custom_list(self, html): ListRe = html videos = [] temporary = [] for vod in ListRe: for value in vod: for x in value: if x.tag == 'name': title = x.text if x.tag == 'id': id = x.text if x.tag == 'type': tid = x.text if x.tag == 'last': last = x.text temporary.append({ "name": title, "id": id, "last": last }) if len(temporary) > 0: idTxt = '' for vod in temporary: idTxt = idTxt + vod['id'] + ',' if len(idTxt) > 1: idTxt = idTxt[0:-1] url = 'https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=detail&ids=' + idTxt xmlTxt = self.custom_webReadFile(urlStr=url) jRoot = et(fromstring(xmlTxt)) xmlList = jRoot.iter('list') for vod in xmlList: for x in vod: for v in x: if v.tag == 'name': title = v.text if v.tag == 'id': vod_id = v.text if v.tag == 'pic': img = v.text if v.tag == 'note': remarks = v.text if v.tag == 'year': vod_year = v.text if v.tag == 'type': type_name = v.text if self.filterate == True and self.custom_RegexGetText(Text=type_name, RegexText=r'(伦理|倫理|福利)', Index=1) != '': continue vod_id = '{0}###{1}###{2}'.format(title, vod_id, img) # vod_id='{0}###{1}###{2}###{3}###{4}###{5}###{6}###{7}###{8}###{9}###{10}'.format(title,vod_id,img,vod_actor,vod_director,'/'.join(type_name),'/'.join(vod_time),'/'.join(vod_area),vod_lang,vod_content,vod_play_url) # print(vod_id) videos.append({ "vod_id": vod_id, "vod_name": title, "vod_pic": img, "vod_year": vod_year, "vod_remarks": remarks }) return videos # 访问网页 def custom_webReadFile(self, urlStr, header=None, codeName='utf-8'): html = '' if header == None: header = { "Referer": urlStr, 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36', "Host": self.custom_RegexGetText(Text=urlStr, RegexText='https*://(.*?)(/|$)', Index=1) } # import ssl # ssl._create_default_https_context = ssl._create_unverified_context#全局取消证书验证 req = urllib.request.Request(url=urlStr, headers=header) # ,headers=header with urllib.request.urlopen(req) as response: html = response.read().decode(codeName) return html # 取剧集区 def custom_lineList(self, Txt, mark, after): circuit = [] origin = Txt.find(mark) while origin > 8: end = Txt.find(after, origin) circuit.append(Txt[origin:end]) origin = Txt.find(mark, end) return circuit # 正则取文本,返回数组 def custom_RegexGetTextLine(self, Text, RegexText, Index): returnTxt = [] pattern = re.compile(RegexText, re.M | re.S) ListRe = pattern.findall(Text) if len(ListRe) < 1: return returnTxt for value in ListRe: returnTxt.append(value) return returnTxt # 取集数 def custom_EpisodesList(self, html): ListRe = html.split('#') videos = [] for vod in ListRe: t = vod.split('$') url = t[1] title = t[0] if len(url) == 0: continue videos.append(title + "$" + url) return videos # 取分类 def custom_classification(self): xmlTxt = self.custom_webReadFile( urlStr='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/') tree = et(fromstring(xmlTxt)) root = tree.getroot() classXml = root.iter('class') temporaryClass = {} for vod in classXml: for value in vod: if self.custom_RegexGetText(Text=value.text, RegexText=r'(福利|倫理片|伦理片)', Index=1) != '': continue temporaryClass[value.text] = value.attrib['id'] print("'{0}':'{1}',".format(value.text, value.attrib['id'])) return temporaryClass if __name__ == '__main__': from t4.core.loader import t4_spider_init spider = Spider() t4_spider_init(spider) print(spider.homeContent(True)) print(spider.homeVideoContent()) # T=Spider() # T. homeContent(filter=False) # T.custom_classification() # l=T.homeVideoContent() # l=T.searchContent(key='柯南',quick='') # l=T.categoryContent(tid='22',pg='1',filter=False,extend={}) # for x in l['list']: # print(x['vod_name']) # mubiao= l['list'][2]['vod_id'] # # print(mubiao) # playTabulation=T.detailContent(array=[mubiao,]) # # print(playTabulation) # vod_play_from=playTabulation['list'][0]['vod_play_from'] # vod_play_url=playTabulation['list'][0]['vod_play_url'] # url=vod_play_url.split('$$$') # vod_play_from=vod_play_from.split('$$$')[0] # url=url[0].split('$') # url=url[1].split('#')[0] # # print(url) # m3u8=T.playerContent(flag=vod_play_from,id=url,vipFlags=True) # print(m3u8)