commit
3178865a6c
@ -9,7 +9,7 @@ import re
|
||||
from utils.retry import retry_func
|
||||
from utils.channel import format_channel_name
|
||||
from utils.tools import merge_objects, get_pbar_remaining
|
||||
from proxy import get_proxy
|
||||
from proxy import get_proxy, get_proxy_next
|
||||
|
||||
config = get_config()
|
||||
timeout = 10
|
||||
@ -44,21 +44,33 @@ async def get_channels_by_fofa(callback):
|
||||
proxy = None
|
||||
if config.open_proxy:
|
||||
proxy = await get_proxy(fofa_urls[0], best=True, with_test=True)
|
||||
driver = setup_driver(proxy)
|
||||
|
||||
def process_fofa_channels(fofa_url, fofa_urls_len):
|
||||
def process_fofa_channels(fofa_url):
|
||||
nonlocal proxy, fofa_urls_len
|
||||
results = {}
|
||||
try:
|
||||
retry_func(lambda: driver.get(fofa_url), name=fofa_url)
|
||||
driver = setup_driver(proxy)
|
||||
try:
|
||||
retry_func(lambda: driver.get(fofa_url), name=fofa_url)
|
||||
except Exception as e:
|
||||
if config.open_proxy:
|
||||
proxy = get_proxy_next()
|
||||
driver.close()
|
||||
driver.quit()
|
||||
driver = setup_driver(proxy)
|
||||
driver.get(fofa_url)
|
||||
fofa_source = re.sub(r"<!--.*?-->", "", driver.page_source, flags=re.DOTALL)
|
||||
urls = set(re.findall(r"https?://[\w\.-]+:\d+", fofa_source))
|
||||
|
||||
with ThreadPoolExecutor(max_workers=100) as executor:
|
||||
futures = [executor.submit(process_fofa_json_url, url) for url in urls]
|
||||
for future in futures:
|
||||
merge_objects(fofa_results, future.result())
|
||||
merge_objects(results, future.result())
|
||||
except Exception as e:
|
||||
print(e)
|
||||
finally:
|
||||
driver.close()
|
||||
driver.quit()
|
||||
pbar.update()
|
||||
remain = fofa_urls_len - pbar.n
|
||||
callback(
|
||||
@ -67,10 +79,12 @@ async def get_channels_by_fofa(callback):
|
||||
)
|
||||
if config.open_online_search and pbar.n / fofa_urls_len == 1:
|
||||
callback("正在获取在线搜索结果, 请耐心等待", 0)
|
||||
return results
|
||||
|
||||
for fofa_url in fofa_urls:
|
||||
process_fofa_channels(fofa_url, fofa_urls_len)
|
||||
driver.quit()
|
||||
with ThreadPoolExecutor(max_workers=3) as executor:
|
||||
futures = [executor.submit(process_fofa_channels, fofa_url) for fofa_url in fofa_urls]
|
||||
for future in futures:
|
||||
merge_objects(fofa_results, future.result())
|
||||
pbar.close()
|
||||
return fofa_results
|
||||
|
||||
|
@ -3,7 +3,7 @@ from utils.speed import get_speed
|
||||
from utils.channel import format_channel_name, get_results_from_soup
|
||||
from utils.tools import check_url_by_patterns, get_pbar_remaining, get_soup
|
||||
from utils.config import get_config
|
||||
from proxy import get_proxy
|
||||
from proxy import get_proxy, get_proxy_next
|
||||
from time import time, sleep
|
||||
from driver.setup import setup_driver
|
||||
from utils.retry import (
|
||||
@ -13,6 +13,7 @@ from utils.retry import (
|
||||
)
|
||||
from selenium.webdriver.common.by import By
|
||||
from tqdm.asyncio import tqdm_asyncio
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
config = get_config()
|
||||
|
||||
@ -50,7 +51,7 @@ def search_submit(driver, name):
|
||||
)
|
||||
if not submit_button:
|
||||
return
|
||||
sleep(3)
|
||||
sleep(1)
|
||||
driver.execute_script("arguments[0].click();", submit_button)
|
||||
|
||||
|
||||
@ -66,12 +67,21 @@ async def get_channels_by_online_search(names, callback):
|
||||
if config.open_proxy:
|
||||
proxy = await get_proxy(pageUrl, best=True, with_test=True)
|
||||
start_time = time()
|
||||
driver = setup_driver(proxy)
|
||||
|
||||
def process_channel_by_online_search(name):
|
||||
info_list = []
|
||||
nonlocal proxy
|
||||
try:
|
||||
retry_func(lambda: driver.get(pageUrl), name=f"online search:{name}")
|
||||
driver = setup_driver(proxy)
|
||||
try:
|
||||
retry_func(lambda: driver.get(pageUrl), name=f"online search:{name}")
|
||||
except Exception as e:
|
||||
if config.open_proxy:
|
||||
proxy = get_proxy_next()
|
||||
driver.close()
|
||||
driver.quit()
|
||||
driver = setup_driver(proxy)
|
||||
driver.get(pageUrl)
|
||||
search_submit(driver, name)
|
||||
isFavorite = name in config.favorite_list
|
||||
pageNum = (
|
||||
@ -92,9 +102,9 @@ async def get_channels_by_online_search(names, callback):
|
||||
)
|
||||
if not page_link:
|
||||
break
|
||||
sleep(3)
|
||||
sleep(1)
|
||||
driver.execute_script("arguments[0].click();", page_link)
|
||||
sleep(3)
|
||||
sleep(1)
|
||||
soup = get_soup(driver.page_source)
|
||||
if soup:
|
||||
results = get_results_from_soup(soup, name)
|
||||
@ -116,6 +126,11 @@ async def get_channels_by_online_search(names, callback):
|
||||
retries=1,
|
||||
)
|
||||
if next_page_link:
|
||||
if config.open_proxy:
|
||||
proxy = get_proxy_next()
|
||||
driver.close()
|
||||
driver.quit()
|
||||
driver = setup_driver(proxy)
|
||||
search_submit(driver, name)
|
||||
retries += 1
|
||||
continue
|
||||
@ -140,18 +155,25 @@ async def get_channels_by_online_search(names, callback):
|
||||
print(f"{name}:Error on search: {e}")
|
||||
pass
|
||||
finally:
|
||||
channels[format_channel_name(name)] = info_list
|
||||
driver.close()
|
||||
driver.quit()
|
||||
pbar.update()
|
||||
callback(
|
||||
f"正在线上查询更新, 剩余{names_len - pbar.n}个频道待查询, 预计剩余时间: {get_pbar_remaining(pbar, start_time)}",
|
||||
int((pbar.n / names_len) * 100),
|
||||
)
|
||||
return {name: format_channel_name(name), data: info_list}
|
||||
|
||||
names_len = len(names)
|
||||
pbar = tqdm_asyncio(total=names_len, desc="Online search")
|
||||
callback(f"正在线上查询更新, 共{names_len}个频道", 0)
|
||||
for name in names:
|
||||
process_channel_by_online_search(name)
|
||||
driver.quit()
|
||||
with ThreadPoolExecutor(max_workers=3) as executor:
|
||||
futures = [executor.submit(process_channel_by_online_search, name) for name in names]
|
||||
for future in futures:
|
||||
result = future.result()
|
||||
name = result.get('name')
|
||||
data = result.get('data', [])
|
||||
if name:
|
||||
channels[name] = data
|
||||
pbar.close()
|
||||
return channels
|
||||
|
@ -16,6 +16,18 @@ async def get_proxy(url=None, best=False, with_test=False):
|
||||
return None
|
||||
if best:
|
||||
return proxy_list_test[0]
|
||||
else:
|
||||
proxy = proxy_list_test[proxy_index]
|
||||
proxy_index = (proxy_index + 1) % len(proxy_list_test)
|
||||
return proxy
|
||||
|
||||
def get_proxy_next():
|
||||
"""
|
||||
Get the next proxy
|
||||
"""
|
||||
global proxy_list_test, proxy_index
|
||||
if not proxy_list_test:
|
||||
return None
|
||||
else:
|
||||
proxy = proxy_list_test[proxy_index]
|
||||
proxy_index = (proxy_index + 1) % len(proxy_list_test)
|
||||
|
@ -7,6 +7,7 @@ from driver.setup import setup_driver
|
||||
from utils.retry import retry_func
|
||||
from time import sleep
|
||||
from utils.speed import get_speed
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
|
||||
def get_proxy_list(page_count=1):
|
||||
@ -25,10 +26,11 @@ def get_proxy_list(page_count=1):
|
||||
url = pattern.format(page_index)
|
||||
urls.append(url)
|
||||
pbar = tqdm(total=len(urls), desc="Getting proxy list")
|
||||
driver = setup_driver()
|
||||
|
||||
def get_proxy(url):
|
||||
proxys = []
|
||||
try:
|
||||
driver = setup_driver()
|
||||
url = pattern.format(page_index)
|
||||
retry_func(lambda: driver.get(url), name=url)
|
||||
sleep(1)
|
||||
@ -46,13 +48,17 @@ def get_proxy_list(page_count=1):
|
||||
ip = tds[0].get_text().strip()
|
||||
port = tds[1].get_text().strip()
|
||||
proxy = f"http://{ip}:{port}"
|
||||
proxy_list.append(proxy)
|
||||
proxys.append(proxy)
|
||||
finally:
|
||||
driver.close()
|
||||
driver.quit()
|
||||
pbar.update()
|
||||
return proxys
|
||||
|
||||
for url in urls:
|
||||
get_proxy(url)
|
||||
driver.quit()
|
||||
with ThreadPoolExecutor(max_workers=3) as executor:
|
||||
futures = [executor.submit(get_proxy, url) for url in urls]
|
||||
for future in futures:
|
||||
proxy_list.extend(future.result())
|
||||
pbar.close()
|
||||
return proxy_list
|
||||
|
||||
|
@ -16,13 +16,11 @@ def retry_func(func, retries=max_retries + 1, name=""):
|
||||
sleep(3)
|
||||
return func()
|
||||
except Exception as e:
|
||||
count = retries - 1
|
||||
if name and i < count:
|
||||
if name and i < retries - 1:
|
||||
print(f"Failed to connect to the {name}. Retrying {i+1}...")
|
||||
if i == count:
|
||||
return False
|
||||
else:
|
||||
continue
|
||||
elif i == retries - 1:
|
||||
raise Exception(f"Failed to connect to the {name} reached the maximum retries.")
|
||||
raise Exception(f"Failed to connect to the {name} reached the maximum retries.")
|
||||
|
||||
|
||||
def locate_element_with_retry(driver, locator, timeout=timeout, retries=max_retries):
|
||||
|
Loading…
x
Reference in New Issue
Block a user