1
0

Merge pull request from Guovin/master

feat
This commit is contained in:
Govin 2024-06-27 17:06:46 +08:00 committed by GitHub
commit dc26cfda13
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 86 additions and 67 deletions

@ -19,7 +19,7 @@ bs4 = "*"
tqdm = "*"
async-timeout = "*"
pyinstaller = "*"
aiohttp-retry = "*"
aiohttp = "*"
[requires]
python_version = "3.8"

37
Pipfile.lock generated

@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "7ba6998730a27a7cc92698bd503c672eb5a0bcce3a7412a7e044e50d915240ef"
"sha256": "4bfb309bc7d7f25a5cb114e48b398480e2ff489ff884e3d4c07d4b726d58875a"
},
"pipfile-spec": 6,
"requires": {
@ -99,15 +99,6 @@
"markers": "python_version >= '3.8'",
"version": "==3.9.5"
},
"aiohttp-retry": {
"hashes": [
"sha256:3aeeead8f6afe48272db93ced9440cf4eda8b6fd7ee2abb25357b7eb28525b45",
"sha256:9a8e637e31682ad36e1ff9f8bcba912fcfc7d7041722bc901a4b948da4d71ea9"
],
"index": "pypi",
"markers": "python_version >= '3.7'",
"version": "==2.8.3"
},
"aiosignal": {
"hashes": [
"sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc",
@ -444,11 +435,11 @@
},
"importlib-metadata": {
"hashes": [
"sha256:30962b96c0c223483ed6cc7280e7f0199feb01a0e40cfae4d4450fc6fab1f570",
"sha256:b78938b926ee8d5f020fc4772d487045805a55ddbad2ecf21c6d60938dc7fcd2"
"sha256:15584cf2b1bf449d98ff8a6ff1abef57bf20f3ac6454f431736cd3e660921b2f",
"sha256:188bd24e4c346d3f0a933f275c2fec67050326a856b9a359881d7c2a697e8812"
],
"markers": "python_version < '3.10'",
"version": "==7.1.0"
"version": "==8.0.0"
},
"multidict": {
"hashes": [
@ -640,12 +631,12 @@
},
"selenium": {
"hashes": [
"sha256:4770ffe5a5264e609de7dc914be6b89987512040d5a8efb2abb181330d097993",
"sha256:650dbfa5159895ff00ad16e5ddb6ceecb86b90c7ed2012b3f041f64e6e4904fe"
"sha256:903c8c9d61b3eea6fcc9809dc7d9377e04e2ac87709876542cc8f863e482c4ce",
"sha256:e424991196e9857e19bf04fe5c1c0a4aac076794ff5e74615b1124e729d93104"
],
"index": "pypi",
"markers": "python_version >= '3.8'",
"version": "==4.21.0"
"version": "==4.22.0"
},
"selenium-stealth": {
"hashes": [
@ -657,11 +648,11 @@
},
"setuptools": {
"hashes": [
"sha256:54faa7f2e8d2d11bcd2c07bed282eef1046b5c080d1c32add737d7b5817b1ad4",
"sha256:f211a66637b8fa059bb28183da127d4e86396c991a942b028c6650d4319c3fd0"
"sha256:937a48c7cdb7a21eb53cd7f9b59e525503aa8abaf3584c730dc5f7a5bec3a650",
"sha256:a58a8fde0541dab0419750bcc521fbdf8585f6e5cb41909df3a472ef7b81ca95"
],
"markers": "python_version >= '3.8'",
"version": "==70.0.0"
"version": "==70.1.1"
},
"sgmllib3k": {
"hashes": [
@ -736,6 +727,14 @@
"markers": "python_version >= '3.8'",
"version": "==2.2.2"
},
"websocket-client": {
"hashes": [
"sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526",
"sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da"
],
"markers": "python_version >= '3.8'",
"version": "==1.8.0"
},
"wsproto": {
"hashes": [
"sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065",

@ -32,3 +32,4 @@ subscribe_urls = [
]
open_multicast = True
region_list = ["all"]
open_proxy = True

19
main.py

@ -93,19 +93,34 @@ class UpdateSource:
self.append_data_to_info_data(
cate, name, self.results["open_subscribe"].get(formatName, [])
)
print(
name,
"subscribe num:",
len(self.results["open_subscribe"].get(formatName, [])),
)
if config.open_multicast:
self.append_data_to_info_data(
cate, name, self.results["open_multicast"].get(formatName, [])
)
print(
name,
"multicast num:",
len(self.results["open_multicast"].get(formatName, [])),
)
if config.open_online_search:
self.append_data_to_info_data(
cate,
name,
self.results["open_online_search"].get(formatName, []),
)
print(
name,
"online search num:",
len(self.results["open_online_search"].get(formatName, [])),
)
print(
name,
"total len:",
"total num:",
len(self.channel_data.get(cate, {}).get(name, [])),
)
if len(self.channel_data.get(cate, {}).get(name, [])) == 0:
@ -122,7 +137,7 @@ class UpdateSource:
info_list = self.channel_data.get(cate, {}).get(name, [])
try:
channel_urls = get_total_urls_from_info_list(info_list)
print("write:", cate, name, len(channel_urls))
print("write:", cate, name, "num:", len(channel_urls))
update_channel_urls_txt(cate, name, channel_urls)
finally:
self.pbar.update()

@ -1,5 +1,5 @@
from selenium import webdriver
from aiohttp_retry import RetryClient, ExponentialRetry
import aiohttp
import asyncio
from time import time
import re
@ -134,7 +134,7 @@ def get_proxy_list(page_count=1):
for pattern in url_pattern:
url = pattern.format(page_index)
retry_func(lambda: driver.get(url), name=url)
sleep(3)
sleep(1)
source = re.sub(
r"<!--.*?-->",
"",
@ -148,7 +148,7 @@ def get_proxy_list(page_count=1):
tds = tr.find_all("td")
ip = tds[0].get_text().strip()
port = tds[1].get_text().strip()
proxy = f"{ip}:{port}"
proxy = f"http://{ip}:{port}"
proxy_list.append(proxy)
pbar.update()
pbar.set_description(
@ -158,30 +158,28 @@ def get_proxy_list(page_count=1):
return proxy_list
async def get_best_proxy(base_url, proxy_list):
async def get_proxy_list_with_test(base_url, proxy_list):
"""
Get the best proxy from the proxy list
Get the proxy list with speed test
"""
if not proxy_list:
return None
return []
response_times = await tqdm_asyncio.gather(
*(get_speed(base_url, proxy=url) for url in proxy_list),
*(get_speed(base_url, timeout=30, proxy=url) for url in proxy_list),
desc="Testing proxy speed",
)
print(f"Response times: {response_times}")
proxy_list_with_speed = [
proxy_list_with_test = [
(proxy, response_time)
for proxy, response_time in zip(proxy_list, response_times)
if response_time != float("inf")
]
if not proxy_list_with_speed:
print("No valid proxy found, using default proxy")
return None
proxy_list_with_speed.sort(key=lambda x: x[1])
print(f"Proxy list with speed: {proxy_list_with_speed}")
best_proxy = proxy_list_with_speed[0][0]
print(f"Using proxy: {best_proxy}, response time: {proxy_list_with_speed[0][1]}ms")
return best_proxy
if not proxy_list_with_test:
print("No valid proxy found")
return []
proxy_list_with_test.sort(key=lambda x: x[1])
proxy_urls = [url for url, _ in proxy_list_with_test]
print(f"{len(proxy_urls)} valid proxy found")
return proxy_urls
def format_channel_name(name):
@ -297,7 +295,7 @@ async def get_channels_by_subscribe_urls(callback):
name=subscribe_url,
)
except requests.exceptions.Timeout:
print(f"Timeout on {subscribe_url}")
print(f"Timeout on subscribe: {subscribe_url}")
if response:
content = response.text
lines = content.split("\n")
@ -354,15 +352,16 @@ async def get_channels_by_online_search(names, callback):
pageUrl = await use_accessible_url(callback)
if not pageUrl:
return channels
github_actions = os.environ.get("GITHUB_ACTIONS")
if github_actions:
if config.open_proxy:
proxy_list = get_proxy_list(3)
print(f"Proxy list: {proxy_list}")
proxy = await get_best_proxy(pageUrl, proxy_list) if proxy_list else None
start_time = time()
proxy_list_test = (
await get_proxy_list_with_test(pageUrl, proxy_list) if proxy_list else []
)
proxy_index = 0
start_time = time()
def process_channel_by_online_search(name):
driver = setup_driver(proxy if github_actions else None)
def process_channel_by_online_search(name, proxy=None):
driver = setup_driver(proxy)
wait = WebDriverWait(driver, timeout)
info_list = []
try:
@ -391,6 +390,7 @@ async def get_channels_by_online_search(names, callback):
for page in range(1, pageNum + 1):
try:
if page > 1:
sleep(1)
page_link = retry_func(
lambda: wait.until(
EC.element_to_be_clickable(
@ -413,7 +413,7 @@ async def get_channels_by_online_search(names, callback):
soup = BeautifulSoup(source, "html.parser")
if soup:
results = get_results_from_soup(soup, name)
print(name, "page:", page, "results len:", len(results))
print(name, "page:", page, "results num:", len(results))
for result in results:
url, date, resolution = result
if url and check_url_by_patterns(url):
@ -450,7 +450,14 @@ async def get_channels_by_online_search(names, callback):
while not names_queue.empty():
loop = asyncio.get_running_loop()
name = await names_queue.get()
loop.run_in_executor(pool, process_channel_by_online_search, name)
proxy = (
proxy_list_test[proxy_index]
if config.open_proxy and proxy_list_test
else None
)
if config.open_proxy and proxy_list_test:
proxy_index = (proxy_index + 1) % len(proxy_list_test)
loop.run_in_executor(pool, process_channel_by_online_search, name, proxy)
print("Finished processing online search")
pbar.close()
return channels
@ -547,24 +554,21 @@ async def get_speed(url, timeout=timeout, proxy=None):
"""
Get the speed of the url
"""
retry_options = ExponentialRetry(attempts=1, max_timeout=timeout)
retry_client = RetryClient(raise_for_status=False, retry_options=retry_options)
start = time()
total = float("inf")
try:
async with retry_client.get(url, proxy=proxy) as response:
resStatus = response.status
print(f"{url} {resStatus}")
end = time()
if resStatus == 200:
total = int(round((end - start) * 1000))
else:
total = float("inf")
except Exception as e:
print(f"Error on {url}: {e}")
total = float("inf")
await retry_client.close()
return total
async with aiohttp.ClientSession(
connector=aiohttp.TCPConnector(verify_ssl=False), trust_env=True
) as session:
start = time()
end = None
try:
async with session.get(url, timeout=timeout, proxy=proxy) as response:
resStatus = response.status
if resStatus == 200:
end = time()
else:
return float("inf")
except Exception as e:
return float("inf")
return int(round((end - start) * 1000)) if end else float("inf")
async def sort_urls_by_speed_and_resolution(infoList):