refactor:match(#422)

2024-10-21 17:24:29 +08:00 · 2024-10-21 17:24:29 +08:00 · 4881cb2922
commit 4881cb2922
parent 9f937c7895
3 changed files with 24 additions and 24 deletions
--- a/updates/multicast/update_tmp.py
+++ b/updates/multicast/update_tmp.py
@ -6,7 +6,7 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../.
 from updates.subscribe import get_channels_by_subscribe_urls
 from driver.utils import get_soup_driver
 from utils.config import resource_path, config
-from utils.channel import format_channel_name
+from utils.channel import format_channel_name, get_name_url
 from utils.tools import get_pbar_remaining
 import json

@ -127,7 +127,6 @@ def get_multicast_region_result_by_rtp_txt(callback=None):

    pbar = tqdm(total=total_files, desc="Loading local multicast rtp files")
    multicast_result = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
-    pattern = re.compile(r"^(.*?),(?!#genre#)(.*?)$")
    start_time = time()

    for filename in rtp_file_list:
@ -136,10 +135,10 @@ def get_multicast_region_result_by_rtp_txt(callback=None):
            os.path.join(rtp_path, f"{filename}.txt"), "r", encoding="utf-8"
        ) as f:
            for line in f:
-                matcher = pattern.match(line)
-                if matcher:
-                    channel_name = format_channel_name(matcher.group(1).strip())
-                    url = matcher.group(2).strip()
+                name_url = get_name_url(line, rtp=True)
+                if name_url and name_url[0]:
+                    channel_name = format_channel_name(name_url[0]["name"])
+                    url = name_url[0]["url"]
                    if url not in multicast_result[channel_name][region][type]:
                        multicast_result[channel_name][region][type].append(url)
        pbar.update()
--- a/updates/subscribe/request.py
+++ b/updates/subscribe/request.py
@ -67,7 +67,7 @@ async def get_channels_by_subscribe_urls(
            if response:
                response.encoding = "utf-8"
                content = response.text
-                data = get_name_url(content)
+                data = get_name_url(content, m3u="#EXTM3U" in content)
                for item in data:
                    name = item["name"]
                    url = item["url"]
--- a/utils/channel.py
+++ b/utils/channel.py
@ -30,6 +30,11 @@ log_file = "result_new.log"
 log_path = os.path.join(log_dir, log_file)
 handler = None

+url_regex = r"\b((https?):\/\/)?(([\w-]+\.)+[\w-]+)(:[0-9]{1,5})?(\/[^\s]*)?\b"
+rtp_regex = r"^(.*?),(rtp://.*)?$"
+txt_regex = r"^(.*?)(?:,)?((?!#genre#)" + url_regex + r")?$"
+m3u_regex = r"^#EXTINF:-1.*?,(.*?)\n" + url_regex + r"$"
+

 def setup_logging():
    """
@ -59,19 +64,16 @@ def cleanup_logging():
        os.remove(log_path)


-txt_pattern = r"^(.*?),(?!#genre#)(.*?)$"
-m3u_pattern = r"^#EXTINF:-1.*?,(.*?)\n(.*?)$"
-
-
-def get_name_url(content):
+def get_name_url(content, m3u=False, rtp=False, check_url=True):
    """
    Get channel name and url from content
    """
-    matches = re.findall(
-        m3u_pattern if "#EXTM3U" in content else txt_pattern, content, re.MULTILINE
-    )
+    regex = m3u_regex if m3u else rtp_regex if rtp else txt_regex
+    matches = re.findall(regex, content, re.MULTILINE)
    channels = [
-        {"name": match[0].strip(), "url": match[1].strip()} for match in matches
+        {"name": match[0].strip(), "url": match[1].strip()}
+        for match in matches
+        if (check_url and match[1].strip()) or not check_url
    ]
    return channels

@ -81,21 +83,21 @@ def get_channel_data_from_file(channels, file, use_old):
    Get the channel data from the file
    """
    current_category = ""
-    pattern = re.compile(r"^(.*?)(,(?!#genre#)(.*?))?$")

    for line in file:
        line = line.strip()
        if "#genre#" in line:
            current_category = line.split(",")[0]
        else:
-            match = pattern.search(line)
-            if match is not None and match.group(1):
-                name = match.group(1).strip()
+            name_url = get_name_url(line, check_url=False)
+            if name_url and name_url[0]:
+                name = name_url[0]["name"]
+                url = name_url[0]["url"]
                category_dict = channels[current_category]
                if name not in category_dict:
                    category_dict[name] = []
-                if use_old and match.group(3):
-                    info = (match.group(3).strip(), None, None, None)
+                if use_old and url:
+                    info = (url, None, None, None)
                    if info[0] and info not in category_dict[name]:
                        category_dict[name].append(info)
    return channels
@ -459,9 +461,8 @@ def get_channel_url(text):
    Get the url from text
    """
    url = None
-    urlRegex = r"\b((https?):\/\/)?(([\w-]+\.)+[\w-]+)(:[0-9]{1,5})?(\/[^\s]*)?\b"
    url_search = re.search(
-        urlRegex,
+        url_regex,
        text,
    )
    if url_search: