dr_py/txt/hipy/新浪资源.py
晚风拂柳颜 96e85aefd8 更新py
2024-01-11 23:42:19 +08:00

414 lines
14 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# coding=utf-8
# !/usr/bin/python
import sys
sys.path.append('..')
try:
# from base.spider import Spider as BaseSpider
from base.spider import BaseSpider
except ImportError:
from t4.base.spider import BaseSpider
import time
import re
from urllib import request, parse
import urllib
import urllib.request
from xml.etree.ElementTree import fromstring, ElementTree as et
"""
配置示例:
t4的配置里ext节点会自动变成api对应query参数extend,但t4的ext字符串不支持路径格式比如./开头或者.json结尾
api里会自动含有ext参数是base64编码后的选中的筛选条件
{
"key":"hipy_t4_新浪资源",
"name":"新浪资源(hipy_t4)",
"type":4,
"api":"http://192.168.31.49:5707/api/v1/vod/新浪资源",
"searchable":1,
"quickSearch":0,
"filterable":1,
"ext":""
},
{
"key": "hipy_t3_新浪资源",
"name": "新浪资源(hipy_t3)",
"type": 3,
"api": "{{host}}/txt/hipy/新浪资源.py",
"searchable": 1,
"quickSearch": 0,
"filterable": 1,
"ext": ""
},
"""
class Spider(BaseSpider): # 元类 默认的元类 type
def getName(self):
return "新浪资源" # 除去少儿不宜的内容
filterate = False
def init(self, extend=""):
print("============{0}============".format(extend))
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def homeContent(self, filter):
result = {}
timeClass = time.localtime(time.time())
cateManual = {
'动漫': '3',
'动漫电影': '17',
'综艺': '4',
'纪录片': '5',
'动作片': '6',
'爱情片': '7',
'科幻片': '8',
'战争片': '9',
'剧情片': '10',
'恐怖片': '11',
'喜剧片': '12',
'大陆剧': '13',
'港澳剧': '14',
'台湾剧': '15',
'欧美剧': '16',
'韩剧': '18',
'日剧': '20',
'泰剧': '21',
'体育': '23'
}
# if timeClass.tm_hour>22:
# cateManual['伦理片']='22'
# self.filterate=False
classes = []
for k in cateManual:
classes.append({
'type_name': k,
'type_id': cateManual[k]
})
result['class'] = classes
if (filter):
result['filters'] = self.config['filter']
return result
def homeVideoContent(self):
xmlTxt = self.custom_webReadFile(
urlStr='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&h=24')
tree = et(fromstring(xmlTxt))
root = tree.getroot()
listXml = root.iter('list')
videos = self.custom_list(html=listXml)
result = {
'list': videos
}
return result
def categoryContent(self, tid, pg, filter, extend):
result = {}
videos = []
pagecount = 1
limit = 20
total = 9999
Url = 'https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&t={0}&pg={1}'.format(
tid, pg)
xmlTxt = self.custom_webReadFile(urlStr=Url)
tree = et(fromstring(xmlTxt))
root = tree.getroot()
listXml = root.iter('list')
for vod in listXml:
pagecount = vod.attrib['pagecount']
limit = vod.attrib['pagesize']
total = vod.attrib['recordcount']
videos = self.custom_list(html=root.iter('list'))
result['list'] = videos
result['page'] = pg
result['pagecount'] = pagecount
result['limit'] = limit
result['total'] = total
return result
def detailContent(self, array):
result = {}
aid = array[0].split('###')
id = aid[1]
logo = aid[2]
title = aid[0]
vod_play_from = ['播放线路', ]
vod_year = ''
vod_actor = ''
vod_content = ''
vod_director = ''
type_name = ''
vod_area = ''
vod_lang = ''
vodItems = []
vod_play_url = []
try:
url = 'https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=detail&ids=' + id
xmlTxt = self.custom_webReadFile(urlStr=url)
jRoot = et(fromstring(xmlTxt))
xmlList = jRoot.iter('list')
for vod in xmlList:
for x in vod:
for v in x:
if v.tag == 'actor':
vod_actor = v.text
if v.tag == 'director':
vod_director = v.text
if v.tag == 'des':
vod_content = v.text
if v.tag == 'area':
vod_area = v.text
if v.tag == 'year':
vod_year = v.text
if v.tag == 'type':
type_name = v.text
if v.tag == 'lang':
vod_lang = v.text
temporary = self.custom_RegexGetText(Text=xmlTxt, RegexText=r'<dd flag="xlyun">(.+?)</dd>', Index=1)
temporary = temporary.replace('<![CDATA[', '').replace(']]>', '')
vodItems = self.custom_EpisodesList(temporary)
joinStr = "#".join(vodItems)
vod_play_url.append(joinStr)
except:
pass
vod = {
"vod_id": array[0],
"vod_name": title,
"vod_pic": logo,
"type_name": type_name,
"vod_year": vod_year,
"vod_area": vod_area,
"vod_remarks": vod_lang,
"vod_actor": vod_actor,
"vod_director": vod_director,
"vod_content": vod_content
}
vod['vod_play_from'] = "$$$".join(vod_play_from)
vod['vod_play_url'] = "$$$".join(vod_play_url)
result = {
'list': [
vod
]
}
if self.filterate == True and self.custom_RegexGetText(Text=type_name, RegexText=r'(伦理|倫理|福利)',
Index=1) != '':
result = {'list': []}
return result
def searchContent(self, key, quick, pg=1):
Url = 'https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=list&wd={0}&pg={1}'.format(
urllib.parse.quote(key), '1')
xmlTxt = self.custom_webReadFile(urlStr=Url)
tree = et(fromstring(xmlTxt))
root = tree.getroot()
listXml = root.iter('list')
videos = self.custom_list(html=listXml)
result = {
'list': videos
}
return result
def playerContent(self, flag, id, vipFlags):
result = {}
parse = 1
url = id
htmlTxt = self.custom_webReadFile(urlStr=url, header=self.header)
url = self.custom_RegexGetText(Text=htmlTxt, RegexText=r'(https{0,1}://.+?\.m3u8)', Index=1)
if url.find('.m3u8') < 1:
url = id
parse = 0
result["parse"] = parse # 0=直接播放、1=嗅探
result["playUrl"] = ''
result["url"] = url
result['jx'] = 0 # VIP解析,0=不解析、1=解析
result["header"] = ''
return result
config = {
"player": {},
"filter": {}
}
header = {}
def localProxy(self, params):
return [200, "video/MP2T", ""]
# -----------------------------------------------自定义函数-----------------------------------------------
# 正则取文本
def custom_RegexGetText(self, Text, RegexText, Index):
returnTxt = ""
Regex = re.search(RegexText, Text, re.M | re.S)
if Regex is None:
returnTxt = ""
else:
returnTxt = Regex.group(Index)
return returnTxt
# 分类取结果
def custom_list(self, html):
ListRe = html
videos = []
temporary = []
for vod in ListRe:
for value in vod:
for x in value:
if x.tag == 'name':
title = x.text
if x.tag == 'id':
id = x.text
if x.tag == 'type':
tid = x.text
if x.tag == 'last':
last = x.text
temporary.append({
"name": title,
"id": id,
"last": last
})
if len(temporary) > 0:
idTxt = ''
for vod in temporary:
idTxt = idTxt + vod['id'] + ','
if len(idTxt) > 1:
idTxt = idTxt[0:-1]
url = 'https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/?ac=detail&ids=' + idTxt
xmlTxt = self.custom_webReadFile(urlStr=url)
jRoot = et(fromstring(xmlTxt))
xmlList = jRoot.iter('list')
for vod in xmlList:
for x in vod:
for v in x:
if v.tag == 'name':
title = v.text
if v.tag == 'id':
vod_id = v.text
if v.tag == 'pic':
img = v.text
if v.tag == 'note':
remarks = v.text
if v.tag == 'year':
vod_year = v.text
if v.tag == 'type':
type_name = v.text
if self.filterate == True and self.custom_RegexGetText(Text=type_name,
RegexText=r'(伦理|倫理|福利)',
Index=1) != '':
continue
vod_id = '{0}###{1}###{2}'.format(title, vod_id, img)
# vod_id='{0}###{1}###{2}###{3}###{4}###{5}###{6}###{7}###{8}###{9}###{10}'.format(title,vod_id,img,vod_actor,vod_director,'/'.join(type_name),'/'.join(vod_time),'/'.join(vod_area),vod_lang,vod_content,vod_play_url)
# print(vod_id)
videos.append({
"vod_id": vod_id,
"vod_name": title,
"vod_pic": img,
"vod_year": vod_year,
"vod_remarks": remarks
})
return videos
# 访问网页
def custom_webReadFile(self, urlStr, header=None, codeName='utf-8'):
html = ''
if header == None:
header = {
"Referer": urlStr,
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.54 Safari/537.36',
"Host": self.custom_RegexGetText(Text=urlStr, RegexText='https*://(.*?)(/|$)', Index=1)
}
# import ssl
# ssl._create_default_https_context = ssl._create_unverified_context#全局取消证书验证
req = urllib.request.Request(url=urlStr, headers=header) # ,headers=header
with urllib.request.urlopen(req) as response:
html = response.read().decode(codeName)
return html
# 取剧集区
def custom_lineList(self, Txt, mark, after):
circuit = []
origin = Txt.find(mark)
while origin > 8:
end = Txt.find(after, origin)
circuit.append(Txt[origin:end])
origin = Txt.find(mark, end)
return circuit
# 正则取文本,返回数组
def custom_RegexGetTextLine(self, Text, RegexText, Index):
returnTxt = []
pattern = re.compile(RegexText, re.M | re.S)
ListRe = pattern.findall(Text)
if len(ListRe) < 1:
return returnTxt
for value in ListRe:
returnTxt.append(value)
return returnTxt
# 取集数
def custom_EpisodesList(self, html):
ListRe = html.split('#')
videos = []
for vod in ListRe:
t = vod.split('$')
url = t[1]
title = t[0]
if len(url) == 0:
continue
videos.append(title + "$" + url)
return videos
# 取分类
def custom_classification(self):
xmlTxt = self.custom_webReadFile(
urlStr='https://api.xinlangapi.com/xinlangapi.php/provide/vod/from/xlyun/at/xml/')
tree = et(fromstring(xmlTxt))
root = tree.getroot()
classXml = root.iter('class')
temporaryClass = {}
for vod in classXml:
for value in vod:
if self.custom_RegexGetText(Text=value.text, RegexText=r'(福利|倫理片|伦理片)', Index=1) != '':
continue
temporaryClass[value.text] = value.attrib['id']
print("'{0}':'{1}',".format(value.text, value.attrib['id']))
return temporaryClass
if __name__ == '__main__':
from t4.core.loader import t4_spider_init
spider = Spider()
t4_spider_init(spider)
print(spider.homeContent(True))
print(spider.homeVideoContent())
# T=Spider()
# T. homeContent(filter=False)
# T.custom_classification()
# l=T.homeVideoContent()
# l=T.searchContent(key='柯南',quick='')
# l=T.categoryContent(tid='22',pg='1',filter=False,extend={})
# for x in l['list']:
# print(x['vod_name'])
# mubiao= l['list'][2]['vod_id']
# # print(mubiao)
# playTabulation=T.detailContent(array=[mubiao,])
# # print(playTabulation)
# vod_play_from=playTabulation['list'][0]['vod_play_from']
# vod_play_url=playTabulation['list'][0]['vod_play_url']
# url=vod_play_url.split('$$$')
# vod_play_from=vod_play_from.split('$$$')[0]
# url=url[0].split('$')
# url=url[1].split('#')[0]
# # print(url)
# m3u8=T.playerContent(flag=vod_play_from,id=url,vipFlags=True)
# print(m3u8)