feat:extend_base_urls

This commit is contained in:
guorong.zheng 2024-04-25 18:28:39 +08:00
parent 485a1abcbb
commit b7db411135
3 changed files with 63 additions and 5 deletions

@ -24,3 +24,4 @@ recent_days = 30
ipv_type = "ipv4"
domain_blacklist = ["epg.pw"]
url_keywords_blacklist = []
extend_base_urls = []

13
main.py

@ -14,13 +14,14 @@ from utils import (
updateChannelUrlsTxt,
updateFile,
getUrlInfo,
compareSpeedAndResolution,
sortUrlsBySpeedAndResolution,
getTotalUrls,
checkUrlIPVType,
checkByDomainBlacklist,
checkByURLKeywordsBlacklist,
filterUrlsByPatterns,
useAccessibleUrl,
getChannelsByExtendBaseUrls,
)
import logging
from logging.handlers import RotatingFileHandler
@ -61,7 +62,11 @@ class UpdateSource:
self.driver = self.setup_driver()
async def visitPage(self, channelItems):
total_channels = sum(len(channelObj) for _, channelObj in channelItems.items())
channelNames = [
name for _, channelObj in channelItems.items() for name in channelObj.keys()
]
extendResults = await getChannelsByExtendBaseUrls(channelNames)
total_channels = len(channelNames)
pbar = tqdm(total=total_channels)
pageUrl = await useAccessibleUrl()
for cate, channelObj in channelItems.items():
@ -84,6 +89,8 @@ class UpdateSource:
config.favorite_page_num if isFavorite else config.default_page_num
)
infoList = []
for url in extendResults.get(name, []):
infoList.append((url, None, None))
if pageUrl:
for page in range(1, pageNum + 1):
try:
@ -118,7 +125,7 @@ class UpdateSource:
if not github_actions or (
pbar.n <= 200 and github_actions == "true"
):
sorted_data = await compareSpeedAndResolution(infoList)
sorted_data = await sortUrlsBySpeedAndResolution(infoList)
if sorted_data:
channelUrls[name] = getTotalUrls(sorted_data)
for (url, date, resolution), response_time in sorted_data:

@ -11,6 +11,7 @@ import os
import urllib.parse
import ipaddress
from urllib.parse import urlparse
import requests
def getChannelItems():
@ -44,7 +45,11 @@ def getChannelItems():
if match:
if match.group(1) not in channels[current_category]:
channels[current_category][match.group(1)] = [match.group(2)]
else:
elif (
match.group(2)
and match.group(2)
not in channels[current_category][match.group(1)]
):
channels[current_category][match.group(1)].append(
match.group(2)
)
@ -53,6 +58,51 @@ def getChannelItems():
f.close()
async def getChannelsByExtendBaseUrls(channel_names):
"""
Get the channels by extending the base urls
"""
channels = {}
pattern = r"^(.*?),(?!#genre#)(.*?)$"
for base_url in config.extend_base_urls:
try:
print(f"Processing extend base url: {base_url}")
try:
response = requests.get(base_url, timeout=10)
except requests.exceptions.Timeout:
print(f"Timeout on {base_url}")
continue
content = response.text
if content:
for channel_name in channel_names:
urls = []
lines = content.split("\n")
for line in lines:
line = line.strip()
match = re.search(pattern, line)
url = match.group(2)
if (
match
and match.group(1) == channel_name
and url
and url not in urls
and checkUrlIPVType(url)
and checkByDomainBlacklist(url)
and checkByURLKeywordsBlacklist(url)
):
urls.append(url)
if urls:
if channel_name in channels:
channels[channel_name] += urls
else:
channels[channel_name] = urls
except Exception as e:
print(f"Error on {base_url}: {e}")
continue
print("Finished processing extend base urls")
return channels
def updateChannelUrlsTxt(cate, channelUrls):
"""
Update the category and channel urls to the final file
@ -122,7 +172,7 @@ async def getSpeed(url, urlTimeout=5):
return float("inf")
async def compareSpeedAndResolution(infoList):
async def sortUrlsBySpeedAndResolution(infoList):
"""
Sort by speed and resolution
"""