1
0

Merge pull request from Guovin/dev

refactor
This commit is contained in:
Govin 2024-07-08 14:35:23 +08:00 committed by GitHub
commit 3178865a6c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 81 additions and 29 deletions

@ -9,7 +9,7 @@ import re
from utils.retry import retry_func
from utils.channel import format_channel_name
from utils.tools import merge_objects, get_pbar_remaining
from proxy import get_proxy
from proxy import get_proxy, get_proxy_next
config = get_config()
timeout = 10
@ -44,21 +44,33 @@ async def get_channels_by_fofa(callback):
proxy = None
if config.open_proxy:
proxy = await get_proxy(fofa_urls[0], best=True, with_test=True)
driver = setup_driver(proxy)
def process_fofa_channels(fofa_url, fofa_urls_len):
def process_fofa_channels(fofa_url):
nonlocal proxy, fofa_urls_len
results = {}
try:
retry_func(lambda: driver.get(fofa_url), name=fofa_url)
driver = setup_driver(proxy)
try:
retry_func(lambda: driver.get(fofa_url), name=fofa_url)
except Exception as e:
if config.open_proxy:
proxy = get_proxy_next()
driver.close()
driver.quit()
driver = setup_driver(proxy)
driver.get(fofa_url)
fofa_source = re.sub(r"<!--.*?-->", "", driver.page_source, flags=re.DOTALL)
urls = set(re.findall(r"https?://[\w\.-]+:\d+", fofa_source))
with ThreadPoolExecutor(max_workers=100) as executor:
futures = [executor.submit(process_fofa_json_url, url) for url in urls]
for future in futures:
merge_objects(fofa_results, future.result())
merge_objects(results, future.result())
except Exception as e:
print(e)
finally:
driver.close()
driver.quit()
pbar.update()
remain = fofa_urls_len - pbar.n
callback(
@ -67,10 +79,12 @@ async def get_channels_by_fofa(callback):
)
if config.open_online_search and pbar.n / fofa_urls_len == 1:
callback("正在获取在线搜索结果, 请耐心等待", 0)
return results
for fofa_url in fofa_urls:
process_fofa_channels(fofa_url, fofa_urls_len)
driver.quit()
with ThreadPoolExecutor(max_workers=3) as executor:
futures = [executor.submit(process_fofa_channels, fofa_url) for fofa_url in fofa_urls]
for future in futures:
merge_objects(fofa_results, future.result())
pbar.close()
return fofa_results

@ -3,7 +3,7 @@ from utils.speed import get_speed
from utils.channel import format_channel_name, get_results_from_soup
from utils.tools import check_url_by_patterns, get_pbar_remaining, get_soup
from utils.config import get_config
from proxy import get_proxy
from proxy import get_proxy, get_proxy_next
from time import time, sleep
from driver.setup import setup_driver
from utils.retry import (
@ -13,6 +13,7 @@ from utils.retry import (
)
from selenium.webdriver.common.by import By
from tqdm.asyncio import tqdm_asyncio
from concurrent.futures import ThreadPoolExecutor
config = get_config()
@ -50,7 +51,7 @@ def search_submit(driver, name):
)
if not submit_button:
return
sleep(3)
sleep(1)
driver.execute_script("arguments[0].click();", submit_button)
@ -66,12 +67,21 @@ async def get_channels_by_online_search(names, callback):
if config.open_proxy:
proxy = await get_proxy(pageUrl, best=True, with_test=True)
start_time = time()
driver = setup_driver(proxy)
def process_channel_by_online_search(name):
info_list = []
nonlocal proxy
try:
retry_func(lambda: driver.get(pageUrl), name=f"online search:{name}")
driver = setup_driver(proxy)
try:
retry_func(lambda: driver.get(pageUrl), name=f"online search:{name}")
except Exception as e:
if config.open_proxy:
proxy = get_proxy_next()
driver.close()
driver.quit()
driver = setup_driver(proxy)
driver.get(pageUrl)
search_submit(driver, name)
isFavorite = name in config.favorite_list
pageNum = (
@ -92,9 +102,9 @@ async def get_channels_by_online_search(names, callback):
)
if not page_link:
break
sleep(3)
sleep(1)
driver.execute_script("arguments[0].click();", page_link)
sleep(3)
sleep(1)
soup = get_soup(driver.page_source)
if soup:
results = get_results_from_soup(soup, name)
@ -116,6 +126,11 @@ async def get_channels_by_online_search(names, callback):
retries=1,
)
if next_page_link:
if config.open_proxy:
proxy = get_proxy_next()
driver.close()
driver.quit()
driver = setup_driver(proxy)
search_submit(driver, name)
retries += 1
continue
@ -140,18 +155,25 @@ async def get_channels_by_online_search(names, callback):
print(f"{name}:Error on search: {e}")
pass
finally:
channels[format_channel_name(name)] = info_list
driver.close()
driver.quit()
pbar.update()
callback(
f"正在线上查询更新, 剩余{names_len - pbar.n}个频道待查询, 预计剩余时间: {get_pbar_remaining(pbar, start_time)}",
int((pbar.n / names_len) * 100),
)
return {name: format_channel_name(name), data: info_list}
names_len = len(names)
pbar = tqdm_asyncio(total=names_len, desc="Online search")
callback(f"正在线上查询更新, 共{names_len}个频道", 0)
for name in names:
process_channel_by_online_search(name)
driver.quit()
with ThreadPoolExecutor(max_workers=3) as executor:
futures = [executor.submit(process_channel_by_online_search, name) for name in names]
for future in futures:
result = future.result()
name = result.get('name')
data = result.get('data', [])
if name:
channels[name] = data
pbar.close()
return channels

@ -16,6 +16,18 @@ async def get_proxy(url=None, best=False, with_test=False):
return None
if best:
return proxy_list_test[0]
else:
proxy = proxy_list_test[proxy_index]
proxy_index = (proxy_index + 1) % len(proxy_list_test)
return proxy
def get_proxy_next():
"""
Get the next proxy
"""
global proxy_list_test, proxy_index
if not proxy_list_test:
return None
else:
proxy = proxy_list_test[proxy_index]
proxy_index = (proxy_index + 1) % len(proxy_list_test)

@ -7,6 +7,7 @@ from driver.setup import setup_driver
from utils.retry import retry_func
from time import sleep
from utils.speed import get_speed
from concurrent.futures import ThreadPoolExecutor
def get_proxy_list(page_count=1):
@ -25,10 +26,11 @@ def get_proxy_list(page_count=1):
url = pattern.format(page_index)
urls.append(url)
pbar = tqdm(total=len(urls), desc="Getting proxy list")
driver = setup_driver()
def get_proxy(url):
proxys = []
try:
driver = setup_driver()
url = pattern.format(page_index)
retry_func(lambda: driver.get(url), name=url)
sleep(1)
@ -46,13 +48,17 @@ def get_proxy_list(page_count=1):
ip = tds[0].get_text().strip()
port = tds[1].get_text().strip()
proxy = f"http://{ip}:{port}"
proxy_list.append(proxy)
proxys.append(proxy)
finally:
driver.close()
driver.quit()
pbar.update()
return proxys
for url in urls:
get_proxy(url)
driver.quit()
with ThreadPoolExecutor(max_workers=3) as executor:
futures = [executor.submit(get_proxy, url) for url in urls]
for future in futures:
proxy_list.extend(future.result())
pbar.close()
return proxy_list

@ -16,13 +16,11 @@ def retry_func(func, retries=max_retries + 1, name=""):
sleep(3)
return func()
except Exception as e:
count = retries - 1
if name and i < count:
if name and i < retries - 1:
print(f"Failed to connect to the {name}. Retrying {i+1}...")
if i == count:
return False
else:
continue
elif i == retries - 1:
raise Exception(f"Failed to connect to the {name} reached the maximum retries.")
raise Exception(f"Failed to connect to the {name} reached the maximum retries.")
def locate_element_with_retry(driver, locator, timeout=timeout, retries=max_retries):