1
0

feat:requests retry

This commit is contained in:
guorong.zheng 2024-07-09 11:44:51 +08:00
parent 0d66ba0765
commit 8c33a7da71
4 changed files with 25 additions and 11 deletions
fofa
online_search
proxy
requests_custom

@ -10,7 +10,7 @@ from utils.retry import retry_func
from utils.channel import format_channel_name
from utils.tools import merge_objects, get_pbar_remaining
from proxy import get_proxy, get_proxy_next
from requests_custom.utils import get_source_requests, close_session
from requests_custom.utils import get_source_requests, reset_user_agent, close_session
config = get_config()
timeout = 30
@ -61,11 +61,15 @@ async def get_channels_by_fofa(callback):
driver.quit()
driver = setup_driver(proxy)
driver.get(fofa_url)
page_source = (
driver.page_source
if config.open_driver
else get_source_requests(fofa_url)
)
page_source = driver.page_source
else:
try:
page_source = retry_func(
lambda: get_source_requests(fofa_url), name=fofa_url
)
except Exception as e:
reset_user_agent()
page_source = get_source_requests(fofa_url)
fofa_source = re.sub(r"<!--.*?-->", "", page_source, flags=re.DOTALL)
urls = set(re.findall(r"https?://[\w\.-]+:\d+", fofa_source))

@ -18,7 +18,7 @@ from utils.retry import (
from selenium.webdriver.common.by import By
from tqdm.asyncio import tqdm_asyncio
from concurrent.futures import ThreadPoolExecutor
from requests_custom.utils import get_soup_requests, close_session
from requests_custom.utils import get_soup_requests, reset_user_agent, close_session
config = get_config()
@ -103,6 +103,7 @@ async def get_channels_by_online_search(names, callback):
except Exception as e:
if config.open_proxy:
proxy = get_proxy_next()
reset_user_agent()
page_soup = get_soup_requests(request_url, proxy=proxy)
if not page_soup:
print(f"{name}:Request fail.")

@ -5,7 +5,8 @@ from utils.speed import get_speed
from concurrent.futures import ThreadPoolExecutor
from utils.config import get_config
from driver.utils import get_soup_driver
from requests_custom.utils import get_soup_requests, close_session
from requests_custom.utils import get_soup_requests, reset_user_agent, close_session
from utils.retry import retry_func
config = get_config()
@ -30,9 +31,14 @@ def get_proxy_list(page_count=1):
def get_proxy(url):
proxys = []
try:
soup = (
get_soup_driver(url) if config.open_driver else get_soup_requests(url)
)
if config.open_driver:
soup = retry_func(lambda: get_soup_driver(url), name=url)
else:
try:
soup = retry_func(lambda: get_soup_requests(url), name=url)
except Exception as e:
reset_user_agent()
soup = get_soup_requests(url)
table = soup.find("table")
trs = table.find_all("tr") if table else []
for tr in trs[1:]:
@ -61,6 +67,7 @@ async def get_proxy_list_with_test(base_url, proxy_list):
Get the proxy list with speed test
"""
if not proxy_list:
print("No valid proxy found")
return []
semaphore = Semaphore(100)

@ -2,6 +2,7 @@ import requests
import re
from bs4 import BeautifulSoup
import random
from time import sleep
user_agents = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
@ -25,6 +26,7 @@ def get_source_requests(url, proxy=None, timeout=30):
"""
proxies = {"http": proxy}
response = session.get(url, headers=headers, proxies=proxies, timeout=timeout)
sleep(1)
source = re.sub(
r"<!--.*?-->",
"",