refactor:match()

This commit is contained in:
guorong.zheng 2024-10-21 17:24:29 +08:00
parent 9f937c7895
commit 4881cb2922
3 changed files with 24 additions and 24 deletions
updates
multicast
subscribe
utils

@ -6,7 +6,7 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../.
from updates.subscribe import get_channels_by_subscribe_urls
from driver.utils import get_soup_driver
from utils.config import resource_path, config
from utils.channel import format_channel_name
from utils.channel import format_channel_name, get_name_url
from utils.tools import get_pbar_remaining
import json
@ -127,7 +127,6 @@ def get_multicast_region_result_by_rtp_txt(callback=None):
pbar = tqdm(total=total_files, desc="Loading local multicast rtp files")
multicast_result = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
pattern = re.compile(r"^(.*?),(?!#genre#)(.*?)$")
start_time = time()
for filename in rtp_file_list:
@ -136,10 +135,10 @@ def get_multicast_region_result_by_rtp_txt(callback=None):
os.path.join(rtp_path, f"{filename}.txt"), "r", encoding="utf-8"
) as f:
for line in f:
matcher = pattern.match(line)
if matcher:
channel_name = format_channel_name(matcher.group(1).strip())
url = matcher.group(2).strip()
name_url = get_name_url(line, rtp=True)
if name_url and name_url[0]:
channel_name = format_channel_name(name_url[0]["name"])
url = name_url[0]["url"]
if url not in multicast_result[channel_name][region][type]:
multicast_result[channel_name][region][type].append(url)
pbar.update()

@ -67,7 +67,7 @@ async def get_channels_by_subscribe_urls(
if response:
response.encoding = "utf-8"
content = response.text
data = get_name_url(content)
data = get_name_url(content, m3u="#EXTM3U" in content)
for item in data:
name = item["name"]
url = item["url"]

@ -30,6 +30,11 @@ log_file = "result_new.log"
log_path = os.path.join(log_dir, log_file)
handler = None
url_regex = r"\b((https?):\/\/)?(([\w-]+\.)+[\w-]+)(:[0-9]{1,5})?(\/[^\s]*)?\b"
rtp_regex = r"^(.*?),(rtp://.*)?$"
txt_regex = r"^(.*?)(?:,)?((?!#genre#)" + url_regex + r")?$"
m3u_regex = r"^#EXTINF:-1.*?,(.*?)\n" + url_regex + r"$"
def setup_logging():
"""
@ -59,19 +64,16 @@ def cleanup_logging():
os.remove(log_path)
txt_pattern = r"^(.*?),(?!#genre#)(.*?)$"
m3u_pattern = r"^#EXTINF:-1.*?,(.*?)\n(.*?)$"
def get_name_url(content):
def get_name_url(content, m3u=False, rtp=False, check_url=True):
"""
Get channel name and url from content
"""
matches = re.findall(
m3u_pattern if "#EXTM3U" in content else txt_pattern, content, re.MULTILINE
)
regex = m3u_regex if m3u else rtp_regex if rtp else txt_regex
matches = re.findall(regex, content, re.MULTILINE)
channels = [
{"name": match[0].strip(), "url": match[1].strip()} for match in matches
{"name": match[0].strip(), "url": match[1].strip()}
for match in matches
if (check_url and match[1].strip()) or not check_url
]
return channels
@ -81,21 +83,21 @@ def get_channel_data_from_file(channels, file, use_old):
Get the channel data from the file
"""
current_category = ""
pattern = re.compile(r"^(.*?)(,(?!#genre#)(.*?))?$")
for line in file:
line = line.strip()
if "#genre#" in line:
current_category = line.split(",")[0]
else:
match = pattern.search(line)
if match is not None and match.group(1):
name = match.group(1).strip()
name_url = get_name_url(line, check_url=False)
if name_url and name_url[0]:
name = name_url[0]["name"]
url = name_url[0]["url"]
category_dict = channels[current_category]
if name not in category_dict:
category_dict[name] = []
if use_old and match.group(3):
info = (match.group(3).strip(), None, None, None)
if use_old and url:
info = (url, None, None, None)
if info[0] and info not in category_dict[name]:
category_dict[name].append(info)
return channels
@ -459,9 +461,8 @@ def get_channel_url(text):
Get the url from text
"""
url = None
urlRegex = r"\b((https?):\/\/)?(([\w-]+\.)+[\w-]+)(:[0-9]{1,5})?(\/[^\s]*)?\b"
url_search = re.search(
urlRegex,
url_regex,
text,
)
if url_search: