commit
dc26cfda13
2
Pipfile
2
Pipfile
@ -19,7 +19,7 @@ bs4 = "*"
|
||||
tqdm = "*"
|
||||
async-timeout = "*"
|
||||
pyinstaller = "*"
|
||||
aiohttp-retry = "*"
|
||||
aiohttp = "*"
|
||||
|
||||
[requires]
|
||||
python_version = "3.8"
|
||||
|
37
Pipfile.lock
generated
37
Pipfile.lock
generated
@ -1,7 +1,7 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "7ba6998730a27a7cc92698bd503c672eb5a0bcce3a7412a7e044e50d915240ef"
|
||||
"sha256": "4bfb309bc7d7f25a5cb114e48b398480e2ff489ff884e3d4c07d4b726d58875a"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
@ -99,15 +99,6 @@
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==3.9.5"
|
||||
},
|
||||
"aiohttp-retry": {
|
||||
"hashes": [
|
||||
"sha256:3aeeead8f6afe48272db93ced9440cf4eda8b6fd7ee2abb25357b7eb28525b45",
|
||||
"sha256:9a8e637e31682ad36e1ff9f8bcba912fcfc7d7041722bc901a4b948da4d71ea9"
|
||||
],
|
||||
"index": "pypi",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==2.8.3"
|
||||
},
|
||||
"aiosignal": {
|
||||
"hashes": [
|
||||
"sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc",
|
||||
@ -444,11 +435,11 @@
|
||||
},
|
||||
"importlib-metadata": {
|
||||
"hashes": [
|
||||
"sha256:30962b96c0c223483ed6cc7280e7f0199feb01a0e40cfae4d4450fc6fab1f570",
|
||||
"sha256:b78938b926ee8d5f020fc4772d487045805a55ddbad2ecf21c6d60938dc7fcd2"
|
||||
"sha256:15584cf2b1bf449d98ff8a6ff1abef57bf20f3ac6454f431736cd3e660921b2f",
|
||||
"sha256:188bd24e4c346d3f0a933f275c2fec67050326a856b9a359881d7c2a697e8812"
|
||||
],
|
||||
"markers": "python_version < '3.10'",
|
||||
"version": "==7.1.0"
|
||||
"version": "==8.0.0"
|
||||
},
|
||||
"multidict": {
|
||||
"hashes": [
|
||||
@ -640,12 +631,12 @@
|
||||
},
|
||||
"selenium": {
|
||||
"hashes": [
|
||||
"sha256:4770ffe5a5264e609de7dc914be6b89987512040d5a8efb2abb181330d097993",
|
||||
"sha256:650dbfa5159895ff00ad16e5ddb6ceecb86b90c7ed2012b3f041f64e6e4904fe"
|
||||
"sha256:903c8c9d61b3eea6fcc9809dc7d9377e04e2ac87709876542cc8f863e482c4ce",
|
||||
"sha256:e424991196e9857e19bf04fe5c1c0a4aac076794ff5e74615b1124e729d93104"
|
||||
],
|
||||
"index": "pypi",
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==4.21.0"
|
||||
"version": "==4.22.0"
|
||||
},
|
||||
"selenium-stealth": {
|
||||
"hashes": [
|
||||
@ -657,11 +648,11 @@
|
||||
},
|
||||
"setuptools": {
|
||||
"hashes": [
|
||||
"sha256:54faa7f2e8d2d11bcd2c07bed282eef1046b5c080d1c32add737d7b5817b1ad4",
|
||||
"sha256:f211a66637b8fa059bb28183da127d4e86396c991a942b028c6650d4319c3fd0"
|
||||
"sha256:937a48c7cdb7a21eb53cd7f9b59e525503aa8abaf3584c730dc5f7a5bec3a650",
|
||||
"sha256:a58a8fde0541dab0419750bcc521fbdf8585f6e5cb41909df3a472ef7b81ca95"
|
||||
],
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==70.0.0"
|
||||
"version": "==70.1.1"
|
||||
},
|
||||
"sgmllib3k": {
|
||||
"hashes": [
|
||||
@ -736,6 +727,14 @@
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==2.2.2"
|
||||
},
|
||||
"websocket-client": {
|
||||
"hashes": [
|
||||
"sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526",
|
||||
"sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da"
|
||||
],
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==1.8.0"
|
||||
},
|
||||
"wsproto": {
|
||||
"hashes": [
|
||||
"sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065",
|
||||
|
@ -32,3 +32,4 @@ subscribe_urls = [
|
||||
]
|
||||
open_multicast = True
|
||||
region_list = ["all"]
|
||||
open_proxy = True
|
||||
|
19
main.py
19
main.py
@ -93,19 +93,34 @@ class UpdateSource:
|
||||
self.append_data_to_info_data(
|
||||
cate, name, self.results["open_subscribe"].get(formatName, [])
|
||||
)
|
||||
print(
|
||||
name,
|
||||
"subscribe num:",
|
||||
len(self.results["open_subscribe"].get(formatName, [])),
|
||||
)
|
||||
if config.open_multicast:
|
||||
self.append_data_to_info_data(
|
||||
cate, name, self.results["open_multicast"].get(formatName, [])
|
||||
)
|
||||
print(
|
||||
name,
|
||||
"multicast num:",
|
||||
len(self.results["open_multicast"].get(formatName, [])),
|
||||
)
|
||||
if config.open_online_search:
|
||||
self.append_data_to_info_data(
|
||||
cate,
|
||||
name,
|
||||
self.results["open_online_search"].get(formatName, []),
|
||||
)
|
||||
print(
|
||||
name,
|
||||
"online search num:",
|
||||
len(self.results["open_online_search"].get(formatName, [])),
|
||||
)
|
||||
print(
|
||||
name,
|
||||
"total len:",
|
||||
"total num:",
|
||||
len(self.channel_data.get(cate, {}).get(name, [])),
|
||||
)
|
||||
if len(self.channel_data.get(cate, {}).get(name, [])) == 0:
|
||||
@ -122,7 +137,7 @@ class UpdateSource:
|
||||
info_list = self.channel_data.get(cate, {}).get(name, [])
|
||||
try:
|
||||
channel_urls = get_total_urls_from_info_list(info_list)
|
||||
print("write:", cate, name, len(channel_urls))
|
||||
print("write:", cate, name, "num:", len(channel_urls))
|
||||
update_channel_urls_txt(cate, name, channel_urls)
|
||||
finally:
|
||||
self.pbar.update()
|
||||
|
94
utils.py
94
utils.py
@ -1,5 +1,5 @@
|
||||
from selenium import webdriver
|
||||
from aiohttp_retry import RetryClient, ExponentialRetry
|
||||
import aiohttp
|
||||
import asyncio
|
||||
from time import time
|
||||
import re
|
||||
@ -134,7 +134,7 @@ def get_proxy_list(page_count=1):
|
||||
for pattern in url_pattern:
|
||||
url = pattern.format(page_index)
|
||||
retry_func(lambda: driver.get(url), name=url)
|
||||
sleep(3)
|
||||
sleep(1)
|
||||
source = re.sub(
|
||||
r"<!--.*?-->",
|
||||
"",
|
||||
@ -148,7 +148,7 @@ def get_proxy_list(page_count=1):
|
||||
tds = tr.find_all("td")
|
||||
ip = tds[0].get_text().strip()
|
||||
port = tds[1].get_text().strip()
|
||||
proxy = f"{ip}:{port}"
|
||||
proxy = f"http://{ip}:{port}"
|
||||
proxy_list.append(proxy)
|
||||
pbar.update()
|
||||
pbar.set_description(
|
||||
@ -158,30 +158,28 @@ def get_proxy_list(page_count=1):
|
||||
return proxy_list
|
||||
|
||||
|
||||
async def get_best_proxy(base_url, proxy_list):
|
||||
async def get_proxy_list_with_test(base_url, proxy_list):
|
||||
"""
|
||||
Get the best proxy from the proxy list
|
||||
Get the proxy list with speed test
|
||||
"""
|
||||
if not proxy_list:
|
||||
return None
|
||||
return []
|
||||
response_times = await tqdm_asyncio.gather(
|
||||
*(get_speed(base_url, proxy=url) for url in proxy_list),
|
||||
*(get_speed(base_url, timeout=30, proxy=url) for url in proxy_list),
|
||||
desc="Testing proxy speed",
|
||||
)
|
||||
print(f"Response times: {response_times}")
|
||||
proxy_list_with_speed = [
|
||||
proxy_list_with_test = [
|
||||
(proxy, response_time)
|
||||
for proxy, response_time in zip(proxy_list, response_times)
|
||||
if response_time != float("inf")
|
||||
]
|
||||
if not proxy_list_with_speed:
|
||||
print("No valid proxy found, using default proxy")
|
||||
return None
|
||||
proxy_list_with_speed.sort(key=lambda x: x[1])
|
||||
print(f"Proxy list with speed: {proxy_list_with_speed}")
|
||||
best_proxy = proxy_list_with_speed[0][0]
|
||||
print(f"Using proxy: {best_proxy}, response time: {proxy_list_with_speed[0][1]}ms")
|
||||
return best_proxy
|
||||
if not proxy_list_with_test:
|
||||
print("No valid proxy found")
|
||||
return []
|
||||
proxy_list_with_test.sort(key=lambda x: x[1])
|
||||
proxy_urls = [url for url, _ in proxy_list_with_test]
|
||||
print(f"{len(proxy_urls)} valid proxy found")
|
||||
return proxy_urls
|
||||
|
||||
|
||||
def format_channel_name(name):
|
||||
@ -297,7 +295,7 @@ async def get_channels_by_subscribe_urls(callback):
|
||||
name=subscribe_url,
|
||||
)
|
||||
except requests.exceptions.Timeout:
|
||||
print(f"Timeout on {subscribe_url}")
|
||||
print(f"Timeout on subscribe: {subscribe_url}")
|
||||
if response:
|
||||
content = response.text
|
||||
lines = content.split("\n")
|
||||
@ -354,15 +352,16 @@ async def get_channels_by_online_search(names, callback):
|
||||
pageUrl = await use_accessible_url(callback)
|
||||
if not pageUrl:
|
||||
return channels
|
||||
github_actions = os.environ.get("GITHUB_ACTIONS")
|
||||
if github_actions:
|
||||
if config.open_proxy:
|
||||
proxy_list = get_proxy_list(3)
|
||||
print(f"Proxy list: {proxy_list}")
|
||||
proxy = await get_best_proxy(pageUrl, proxy_list) if proxy_list else None
|
||||
start_time = time()
|
||||
proxy_list_test = (
|
||||
await get_proxy_list_with_test(pageUrl, proxy_list) if proxy_list else []
|
||||
)
|
||||
proxy_index = 0
|
||||
start_time = time()
|
||||
|
||||
def process_channel_by_online_search(name):
|
||||
driver = setup_driver(proxy if github_actions else None)
|
||||
def process_channel_by_online_search(name, proxy=None):
|
||||
driver = setup_driver(proxy)
|
||||
wait = WebDriverWait(driver, timeout)
|
||||
info_list = []
|
||||
try:
|
||||
@ -391,6 +390,7 @@ async def get_channels_by_online_search(names, callback):
|
||||
for page in range(1, pageNum + 1):
|
||||
try:
|
||||
if page > 1:
|
||||
sleep(1)
|
||||
page_link = retry_func(
|
||||
lambda: wait.until(
|
||||
EC.element_to_be_clickable(
|
||||
@ -413,7 +413,7 @@ async def get_channels_by_online_search(names, callback):
|
||||
soup = BeautifulSoup(source, "html.parser")
|
||||
if soup:
|
||||
results = get_results_from_soup(soup, name)
|
||||
print(name, "page:", page, "results len:", len(results))
|
||||
print(name, "page:", page, "results num:", len(results))
|
||||
for result in results:
|
||||
url, date, resolution = result
|
||||
if url and check_url_by_patterns(url):
|
||||
@ -450,7 +450,14 @@ async def get_channels_by_online_search(names, callback):
|
||||
while not names_queue.empty():
|
||||
loop = asyncio.get_running_loop()
|
||||
name = await names_queue.get()
|
||||
loop.run_in_executor(pool, process_channel_by_online_search, name)
|
||||
proxy = (
|
||||
proxy_list_test[proxy_index]
|
||||
if config.open_proxy and proxy_list_test
|
||||
else None
|
||||
)
|
||||
if config.open_proxy and proxy_list_test:
|
||||
proxy_index = (proxy_index + 1) % len(proxy_list_test)
|
||||
loop.run_in_executor(pool, process_channel_by_online_search, name, proxy)
|
||||
print("Finished processing online search")
|
||||
pbar.close()
|
||||
return channels
|
||||
@ -547,24 +554,21 @@ async def get_speed(url, timeout=timeout, proxy=None):
|
||||
"""
|
||||
Get the speed of the url
|
||||
"""
|
||||
retry_options = ExponentialRetry(attempts=1, max_timeout=timeout)
|
||||
retry_client = RetryClient(raise_for_status=False, retry_options=retry_options)
|
||||
start = time()
|
||||
total = float("inf")
|
||||
try:
|
||||
async with retry_client.get(url, proxy=proxy) as response:
|
||||
resStatus = response.status
|
||||
print(f"{url} {resStatus}")
|
||||
end = time()
|
||||
if resStatus == 200:
|
||||
total = int(round((end - start) * 1000))
|
||||
else:
|
||||
total = float("inf")
|
||||
except Exception as e:
|
||||
print(f"Error on {url}: {e}")
|
||||
total = float("inf")
|
||||
await retry_client.close()
|
||||
return total
|
||||
async with aiohttp.ClientSession(
|
||||
connector=aiohttp.TCPConnector(verify_ssl=False), trust_env=True
|
||||
) as session:
|
||||
start = time()
|
||||
end = None
|
||||
try:
|
||||
async with session.get(url, timeout=timeout, proxy=proxy) as response:
|
||||
resStatus = response.status
|
||||
if resStatus == 200:
|
||||
end = time()
|
||||
else:
|
||||
return float("inf")
|
||||
except Exception as e:
|
||||
return float("inf")
|
||||
return int(round((end - start) * 1000)) if end else float("inf")
|
||||
|
||||
|
||||
async def sort_urls_by_speed_and_resolution(infoList):
|
||||
|
Loading…
x
Reference in New Issue
Block a user