# coding=utf-8 # !/usr/bin/python import sys import requests from bs4 import BeautifulSoup import re import base64 from base.spider import Spider import random sys.path.append('..') xurl = "https://heiliaowang-44.buzz" headerx = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36', } class Spider(Spider): global xurl global headerx def getName(self): return "首页" def init(self, extend): pass def destroy(self): pass def isVideoFormat(self, url): pass def manualVideoCheck(self): pass def homeContent(self, filter): res = requests.get(xurl, headers=headerx) res.encoding = "utf-8" doc = BeautifulSoup(res.text, "html.parser") sourcediv = doc.find('div', class_='nav') vod = sourcediv.find_all('dd') string_list = ["首页", "激情图漫", "激情小说", "情色小说", "随机推荐", "顶级资源"] result = {} result['class'] = [] result['class'].append({'type_id': "/type/328", 'type_name': "国产视频"}) result['class'].append({'type_id': "/type/329", 'type_name': "中文字幕"}) result['class'].append({'type_id': "/type/331", 'type_name': "日本有码"}) result['class'].append({'type_id': "/type/332", 'type_name': "日本无码"}) result['class'].append({'type_id': "/type/333", 'type_name': "欧美无码"}) result['class'].append({'type_id': "/type/334", 'type_name': "强奸乱轮"}) result['class'].append({'type_id': "/type/335", 'type_name': "制服诱惑"}) result['class'].append({'type_id': "/type/336", 'type_name': "直播主播"}) result['class'].append({'type_id': "/type/338", 'type_name': "明星换脸"}) result['class'].append({'type_id': "/type/339", 'type_name': "抖阴视频"}) result['class'].append({'type_id': "/type/340", 'type_name': "女优明星"}) result['class'].append({'type_id': "/type/343", 'type_name': "网爆门"}) result['class'].append({'type_id': "/type/345", 'type_name': "伦理三级"}) result['class'].append({'type_id': "/type/346", 'type_name': "AV解说"}) result['class'].append({'type_id': "/type/347", 'type_name': "SM调教"}) result['class'].append({'type_id': "/type/348", 'type_name': "萝莉少女"}) result['class'].append({'type_id': "/type/349", 'type_name': "极品媚黑"}) result['class'].append({'type_id': "/type/350", 'type_name': "女同性恋"}) result['class'].append({'type_id': "/type/351", 'type_name': "玩偶姐姐"}) result['class'].append({'type_id': "/type/353", 'type_name': "人妖系列"}) result['class'].append({'type_id': "/type/373", 'type_name': "韩国主播"}) result['class'].append({'type_id': "/type/378", 'type_name': "VR视角"}) for item in vod: name = item.find('a').text if name in string_list: continue id = item.find('a')['href'] id = id.replace(".html", "") result['class'].append({'type_id': id, 'type_name': name}) return result def homeVideoContent(self): videos = [] try: res = requests.get(xurl, headers=headerx) res.encoding = "utf-8" doc = BeautifulSoup(res.text, "html.parser") sourcediv = doc.find_all('div', class_='pic') for vod in sourcediv: ul_elements = vod.find_all('ul') for item in ul_elements: name = item.select_one("li a")['title'] pic = item.select_one("li a img")["data-src"] remark = item.select_one("li a span").text id = item.select_one("li a")['href'] video = { "vod_id": id, "vod_name": name, "vod_pic": pic, "vod_remarks": remark } videos.append(video) except: pass result = {'list': videos} return result def categoryContent(self, cid, pg, filter, ext): result = {} videos = [] if not pg: pg = 1 url = xurl +cid + "/" + str(pg) + ".html" detail = requests.get(url=url, headers=headerx) detail.encoding = "utf-8" doc = BeautifulSoup(detail.text, "html.parser") sourcediv = doc.find_all('div', class_='pic') for vod in sourcediv: ul_elements = vod.find_all('ul') for item in ul_elements: name = item.select_one("li a")['title'] pic = item.select_one("li a img")["src"] remark = item.select_one("li a span").text id = item.select_one("li a")['href'] video = { "vod_id": id, "vod_name": name, "vod_pic": pic, "vod_remarks": remark } videos.append(video) result['list'] = videos result['page'] = pg result['pagecount'] = 9999 result['limit'] = 90 result['total'] = 999999 return result def detailContent(self, ids): did = ids[0] videos = [] result = {} res = requests.get(url=xurl + did, headers=headerx) res.encoding = "utf-8" doc = BeautifulSoup(res.text, "html.parser") sourcediv = doc.find('div', style='padding-bottom: 10px;') vod = sourcediv.find_all('a') play_from = "" play_url = "" for item in vod: play_from = play_from + item.text + "$$$" play_url = play_url + item['href'] + "$$$" while play_url[-1] == "#" or play_url[-1] == "$": play_url = play_url[:-1] while play_from[-1] == "#" or play_from[-1] == "$": play_from = play_from[:-1] source_match = re.search(r"