This commit is contained in:
guorong.zheng 2024-07-05 18:07:15 +08:00
parent 7c1a6f1a3e
commit d70b24b2f7
12 changed files with 32 additions and 28 deletions

@ -14,7 +14,6 @@ requests = "*"
feedparser = "*"
pytz = "*"
selenium = "*"
selenium-stealth = "*"
bs4 = "*"
tqdm = "*"
async-timeout = "*"

10
Pipfile.lock generated

@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "513ed2d5439904fe7a8043886b946726a4e704a202910667678927224f65e98e"
"sha256": "f2e414a925be2ea62fcf7660090d518ea80826d1e510d62732a75b5894b58730"
},
"pipfile-spec": 6,
"requires": {
@ -860,14 +860,6 @@
"markers": "python_version >= '3.8'",
"version": "==4.22.0"
},
"selenium-stealth": {
"hashes": [
"sha256:b62da5452aa4a84f29a4dfb21a9696aff20788a7c570dd0b81bc04a940848b97"
],
"index": "aliyun",
"markers": "python_version >= '3' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==1.0.6"
},
"setuptools": {
"hashes": [
"sha256:937a48c7cdb7a21eb53cd7f9b59e525503aa8abaf3584c730dc5f7a5bec3a650",

@ -8,6 +8,7 @@ def setup_driver(proxy=None):
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_experimental_option("excludeSwitches", ["enable-logging"])
options.add_experimental_option("useAutomationExtension", False)
options.add_argument("blink-settings=imagesEnabled=false")
@ -17,6 +18,7 @@ def setup_driver(proxy=None):
options.add_argument("blink-settings=imagesEnabled=false")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--disable-extensions")
if proxy:
options.add_argument("--proxy-server=%s" % proxy)
driver = webdriver.Chrome(options=options)

@ -0,0 +1 @@
from .request import get_channels_by_fofa

@ -2,7 +2,7 @@ from utils.config import get_config
from tqdm.asyncio import tqdm_asyncio
from time import time
from asyncio import Queue, get_running_loop
from request import get
from requests import get
from concurrent.futures import ThreadPoolExecutor
import fofa_map
from driver.setup import setup_driver
@ -47,10 +47,10 @@ async def get_channels_by_fofa(callback):
await fofa_queue.put(fofa_url)
def process_fofa_channels(fofa_url, fofa_urls_len, callback):
driver = setup_driver()
driver = None
try:
driver = setup_driver()
retry_func(lambda: driver.get(fofa_url), name=fofa_url)
driver.get(fofa_url)
fofa_source = re.sub(r"<!--.*?-->", "", driver.page_source, flags=re.DOTALL)
urls = set(re.findall(r"https?://[\w\.-]+:\d+", fofa_source))
channels = {}
@ -92,6 +92,8 @@ async def get_channels_by_fofa(callback):
# print(e)
pass
finally:
if driver:
driver.quit()
fofa_queue.task_done()
pbar.update()
remain = fofa_urls_len - pbar.n
@ -102,9 +104,8 @@ async def get_channels_by_fofa(callback):
)
if config.open_online_search and pbar.n / fofa_urls_len == 1:
callback("正在获取在线搜索结果, 请耐心等待", 0)
driver.quit()
with ThreadPoolExecutor(max_workers=10) as pool:
with ThreadPoolExecutor(max_workers=5) as pool:
while not fofa_queue.empty():
loop = get_running_loop()
fofa_url = await fofa_queue.get()

@ -10,8 +10,9 @@ from utils.utils import (
check_url_by_patterns,
get_pbar_remaining,
get_ip_address,
get_total_urls_from_info_list,
)
from utils.speed import sort_urls_by_speed_and_resolution, get_total_urls_from_info_list
from utils.speed import sort_urls_by_speed_and_resolution
from subscribe import get_channels_by_subscribe_urls
from fofa import get_channels_by_fofa
from online_search import get_channels_by_online_search

@ -0,0 +1 @@
from .request import get_channels_by_online_search

@ -61,9 +61,10 @@ async def get_channels_by_online_search(names, callback):
start_time = time()
def process_channel_by_online_search(name, proxy=None):
driver = setup_driver(proxy)
info_list = []
driver = None
try:
driver = setup_driver(proxy)
retry_func(lambda: driver.get(pageUrl), name=f"online search:{name}")
search_box = locate_element_with_retry(
driver, (By.XPATH, '//input[@type="text"]')
@ -139,6 +140,8 @@ async def get_channels_by_online_search(names, callback):
print(f"{name}:Error on search: {e}")
pass
finally:
if driver:
driver.quit()
channels[format_channel_name(name)] = info_list
names_queue.task_done()
pbar.update()
@ -149,7 +152,6 @@ async def get_channels_by_online_search(names, callback):
f"正在线上查询更新, 剩余{names_len - pbar.n}个频道待查询, 预计剩余时间: {get_pbar_remaining(pbar, start_time)}",
int((pbar.n / names_len) * 100),
)
driver.quit()
names_queue = Queue()
for name in names:
@ -158,12 +160,14 @@ async def get_channels_by_online_search(names, callback):
pbar = tqdm_asyncio(total=names_len)
pbar.set_description(f"Processing online search, {names_len} channels remaining")
callback(f"正在线上查询更新, 共{names_len}个频道", 0)
with ThreadPoolExecutor(max_workers=10) as pool:
with ThreadPoolExecutor(max_workers=5) as pool:
while not names_queue.empty():
loop = get_running_loop()
name = await names_queue.get()
proxy = (
proxy_list_test[0] if config.open_proxy and proxy_list_test else None
proxy_list_test[proxy_index]
if config.open_proxy and proxy_list_test
else None
)
if config.open_proxy and proxy_list_test:
proxy_index = (proxy_index + 1) % len(proxy_list_test)

@ -27,8 +27,9 @@ async def get_proxy_list(page_count=1):
pbar = tqdm_asyncio(total=url_queue.qsize(), desc="Getting proxy list")
def get_proxy(url):
driver = setup_driver()
driver = None
try:
driver = setup_driver()
url = pattern.format(page_index)
retry_func(lambda: driver.get(url), name=url)
sleep(1)
@ -48,11 +49,12 @@ async def get_proxy_list(page_count=1):
proxy = f"http://{ip}:{port}"
proxy_list.append(proxy)
finally:
driver.quit()
if driver:
driver.quit()
url_queue.task_done()
pbar.update()
with ThreadPoolExecutor(max_workers=10) as executor:
with ThreadPoolExecutor(max_workers=5) as executor:
while not url_queue.empty():
loop = get_running_loop()
url = await url_queue.get()

@ -0,0 +1 @@
from .request import get_channels_by_subscribe_urls

@ -2,7 +2,7 @@ from utils.config import get_config
from tqdm.asyncio import tqdm_asyncio
from time import time
from asyncio import Queue
from request import get, exceptions
from requests import get, exceptions
from utils.retry import retry_func
import re
from utils.channel import format_channel_name
@ -75,7 +75,7 @@ async def get_channels_by_subscribe_urls(callback):
if config.open_online_search and pbar.n / subscribe_urls_len == 1:
callback("正在获取在线搜索结果, 请耐心等待", 0)
with ThreadPoolExecutor(max_workers=10) as pool:
with ThreadPoolExecutor(max_workers=5) as pool:
loop = get_running_loop()
subscribe_url = await subscribe_queue.get()
loop.run_in_executor(pool, process_subscribe_channels, subscribe_url)

@ -1,5 +1,5 @@
from os import path
from sys import _MEIPASS, executable
import sys
from importlib import util
@ -13,7 +13,7 @@ def resource_path(relative_path, persistent=False):
return total_path
else:
try:
base_path = _MEIPASS
base_path = sys._MEIPASS
return path.join(base_path, relative_path)
except Exception:
return total_path
@ -25,7 +25,7 @@ def load_external_config(name):
"""
config = None
config_path = name
config_filename = path.join(path.dirname(executable), config_path)
config_filename = path.join(path.dirname(sys.executable), config_path)
if path.exists(config_filename):
spec = util.spec_from_file_location(name, config_filename)