Merge branch 'dev'

# Conflicts: # result.txt
2024-03-18 16:01:52 +08:00 · 2024-03-18 16:01:52 +08:00 · 42df00eb9c
commit 42df00eb9c
parent 7922da3aef 500fa7f48a
8 changed files with 2154 additions and 736 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -41,6 +41,11 @@ jobs:
          chromedriver --url-base=/wd/hub &
          sudo Xvfb -ac :99 -screen 0 1280x1024x24 > /dev/null 2>&1 & # optional

+      - name: Set final file name
+        id: set_final_file_name
+        run: |
+          echo "::set-output name=final_file::$(python -c 'import config; print(config.final_file)')"
+
      - name: Install pipenv
        run: pip3 install --user pipenv
      - name: Install dependecies
@ -52,6 +57,13 @@ jobs:
          git config --local user.email "github-actions[bot]@users.noreply.github.com"
          git config --local user.name "github-actions[bot]"
          git diff
-          git add result.txt
-          git commit -m "Github Action Auto Updated"
-          git push
+          if [[ -f ${{ steps.set_final_file_name.outputs.final_file }} ]]; then
+            git add ${{ steps.set_final_file_name.outputs.final_file }}
+          fi
+          if [[ -f result.log ]]; then
+            git add result.log
+          fi
+          if [[ `git status --porcelain` ]]; then
+            git commit -m "Github Action Auto Updated"
+            git push
+          fi
--- a/README-EN.md
+++ b/README-EN.md
@ -13,6 +13,7 @@ Customize channel menus, automatically fetch and update the latest live source i
 - Set up key focus channels and configure the number of pages fetched separately
 - Pagination results retrieval (configurable number of pages and interfaces)
 - Ensure update timeliness, configure to retrieve interfaces updated within a recent time range
+- Can filter ipv4, ipv6 interfaces

 ## How to Use

@ -27,15 +28,16 @@ Customize channel menus, automatically fetch and update the latest live source i

   | Configuration Item   | Default Value      | Description                                                                                                        |
   | -------------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------ |
-   | source_file          | demo.txt           | Template file name                                                                                                 |
-   | final_file           | result.txt         | Generated file name                                                                                                |
+   | source_file          | "demo.txt"         | Template file name                                                                                                 |
+   | final_file           | "result.txt"       | Generated file name                                                                                                |
   | favorite_list        | ["CCTV1","CCTV13"] | List of favorite channel names (used only to distinguish from regular channels, custom page retrieval quantity)    |
-   | favorite_page_num    | 8                  | Page retrieval quantity for favorite channels                                                                      |
-   | default_page_num     | 5                  | Page retrieval quantity for regular channels                                                                       |
+   | favorite_page_num    | 6                  | Page retrieval quantity for favorite channels                                                                      |
+   | default_page_num     | 4                  | Page retrieval quantity for regular channels                                                                       |
   | urls_limit           | 15                 | Number of interfaces per channel                                                                                   |
   | response_time_weight | 0.5                | Response time weight value (the sum of all weight values should be 1)                                              |
   | resolution_weight    | 0.5                | Resolution weight value (the sum of all weight values should be 1)                                                 |
   | recent_days          | 60                 | Retrieve interfaces updated within a recent time range (in days), reducing appropriately can avoid matching issues |
+   | ipv_type             | "ipv4"             | The type of interface in the generated result, optional values: "ipv4", "ipv6", "all"                              |

   #### .github/workflows/main.yml:

@ -48,6 +50,14 @@ Customize channel menus, automatically fetch and update the latest live source i

 ## Update Log

+### 2024/3/18
+
+- Added configuration item: ipv_type, used to filter ipv4, ipv6 interface types
+- Optimized file update logic to prevent file loss caused by update failure
+- Adjusted the default value for pagination: fetch 6 pages for followed channels, 4 pages for regular channels, to improve update speed
+- Added output of interface log file result.log
+- Fixed weight sorting anomaly
+
 ### 2024/3/15

 - Optimize code structure
--- a/README.md
+++ b/README.md
@ -13,6 +13,7 @@
 - 可设置重点关注频道，单独配置获取分页的数量
 - 分页结果获取（可配置页数、接口数量）
 - 保证更新时效性，配置获取最近时间范围内更新的接口
+- 可过滤 ipv4、ipv6 接口

 ## 使用方法

@ -27,15 +28,16 @@

   | 配置项               | 默认值             | 描述                                                               |
   | -------------------- | ------------------ | ------------------------------------------------------------------ |
-   | source_file          | demo.txt           | 模板文件名称                                                       |
-   | final_file           | result.txt         | 生成文件名称                                                       |
+   | source_file          | "demo.txt"         | 模板文件名称                                                       |
+   | final_file           | "result.txt"       | 生成文件名称                                                       |
   | favorite_list        | ["CCTV1","CCTV13"] | 关注频道名称列表（仅用于与常规频道区分，自定义获取分页数量）       |
-   | favorite_page_num    | 8                  | 关注频道获取分页数量                                               |
-   | default_page_num     | 5                  | 常规频道获取分页数量                                               |
+   | favorite_page_num    | 6                  | 关注频道获取分页数量                                               |
+   | default_page_num     | 4                  | 常规频道获取分页数量                                               |
   | urls_limit           | 15                 | 单个频道接口数量                                                   |
   | response_time_weight | 0.5                | 响应时间权重值（所有权重值总和应为 1）                             |
   | resolution_weight    | 0.5                | 分辨率权重值 （所有权重值总和应为 1）                              |
   | recent_days          | 60                 | 获取最近时间范围内更新的接口（单位天），适当减小可避免出现匹配问题 |
+   | ipv_type             | "ipv4"             | 生成结果中接口的类型，可选值："ipv4"、"ipv6"、"all"                |

   #### .github/workflows/main.yml：

@ -48,6 +50,14 @@

 ## 更新日志

+### 2024/3/18
+
+- 新增配置项：ipv_type，用于过滤 ipv4、ipv6 接口类型
+- 优化文件更新逻辑，避免更新失效引起文件丢失
+- 调整分页获取默认值：关注频道获取 6 页，常规频道获取 4 页，以提升更新速度
+- 增加接口日志文件 result.log 输出
+- 修复权重排序异常
+
 ### 2024/3/15

 - 优化代码结构
--- a/config.py
+++ b/config.py
@ -15,9 +15,10 @@ favorite_list = [
    "湖南卫视",
    "翡翠台",
 ]
-favorite_page_num = 8
-default_page_num = 5
+favorite_page_num = 6
+default_page_num = 4
 urls_limit = 15
 response_time_weight = 0.5
 resolution_weight = 0.5
 recent_days = 60
+ipv_type = "ipv4"
--- a/main.py
+++ b/main.py
@ -7,19 +7,21 @@ from selenium_stealth import stealth
 import asyncio
 from bs4 import BeautifulSoup
 from utils import (
-    removeLogFile,
    getChannelItems,
-    removeFinalFile,
    updateChannelUrlsTxt,
+    updateFile,
    getUrlInfo,
    compareSpeedAndResolution,
    getTotalUrls,
+    filterSortedDataByIPVType,
+    filterByIPVType,
 )
 import logging

 logging.basicConfig(
-    filename="result.log",
+    filename="result_new.log",
    filemode="a",
+    format="%(message)s",
    level=logging.INFO,
 )

@ -49,8 +51,6 @@ class UpdateSource:
        self.driver = self.setup_driver()

    async def visitPage(self, channelItems):
-        removeLogFile()
-        removeFinalFile()
        for cate, channelObj in channelItems.items():
            channelUrls = {}
            for name in channelObj.keys():
@ -92,16 +92,17 @@ class UpdateSource:
                        continue
                try:
                    sorted_data = await compareSpeedAndResolution(infoList)
-                    if sorted_data:
+                    ipvSortedData = filterSortedDataByIPVType(sorted_data)
+                    if ipvSortedData:
                        channelUrls[name] = (
-                            getTotalUrls(sorted_data) or channelObj[name]
+                            getTotalUrls(ipvSortedData) or channelObj[name]
                        )
-                        for (url, date, resolution), response_time in sorted_data:
+                        for (url, date, resolution), response_time in ipvSortedData:
                            logging.info(
                                f"Name: {name}, URL: {url}, Date: {date}, Resolution: {resolution}, Response Time: {response_time}ms"
                            )
                    else:
-                        channelUrls[name] = channelObj[name]
+                        channelUrls[name] = filterByIPVType(channelObj[name])
                except Exception as e:
                    print(f"Error on sorting: {e}")
                    continue
@ -110,6 +111,8 @@ class UpdateSource:

    def main(self):
        asyncio.run(self.visitPage(getChannelItems()))
+        updateFile(config.final_file, "result_new.txt")
+        updateFile("result.log", "result_new.log")


 UpdateSource().main()
--- a/result.log
+++ b/result.log
--- a/result.txt
+++ b/result.txt
--- a/utils.py
+++ b/utils.py
@ -5,14 +5,8 @@ import time
 import re
 import datetime
 import os
-
-
-def removeLogFile():
-    """
-    Remove the log file
-    """
-    if os.path.exists("result.log"):
-        os.remove("result.log")
+import urllib.parse
+import ipaddress


 def getChannelItems():
@ -45,19 +39,11 @@ def getChannelItems():
    return channels


-def removeFinalFile():
-    """
-    Remove the old final file
-    """
-    if os.path.exists(config.final_file):
-        os.remove(config.final_file)
-
-
 def updateChannelUrlsTxt(cate, channelUrls):
    """
    Update the category and channel urls to the final file
    """
-    with open(config.final_file, "a") as f:
+    with open("result_new.txt", "a") as f:
        f.write(cate + ",#genre#\n")
        for name, urls in channelUrls.items():
            for url in urls:
@ -66,6 +52,16 @@ def updateChannelUrlsTxt(cate, channelUrls):
        f.write("\n")


+def updateFile(final_file, old_file):
+    """
+    Update the file
+    """
+    if os.path.exists(final_file):
+        os.remove(final_file)
+    if os.path.exists(old_file):
+        os.rename(old_file, final_file)
+
+
 def getUrlInfo(result):
    """
    Get the url, date and resolution
@ -97,12 +93,12 @@ async def getSpeed(url):
            async with session.get(url, timeout=5) as response:
                resStatus = response.status
        except:
-            return url, float("inf")
+            return float("inf")
        end = time.time()
        if resStatus == 200:
-            return url, int(round((end - start) * 1000))
+            return int(round((end - start) * 1000))
        else:
-            return url, float("inf")
+            return float("inf")


 async def compareSpeedAndResolution(infoList):
@ -111,9 +107,7 @@ async def compareSpeedAndResolution(infoList):
    """
    response_times = await asyncio.gather(*(getSpeed(url) for url, _, _ in infoList))
    valid_responses = [
-        (info, rt)
-        for info, rt in zip(infoList, response_times)
-        if rt[1] != float("inf")
+        (info, rt) for info, rt in zip(infoList, response_times) if rt != float("inf")
    ]

    def extract_resolution(resolution_str):
@ -143,11 +137,11 @@ async def compareSpeedAndResolution(infoList):
        (_, _, resolution), response_time = item
        resolution_value = extract_resolution(resolution) if resolution else 0
        return (
-            -(response_time_weight * response_time[1])
+            -(response_time_weight * response_time)
            + resolution_weight * resolution_value
        )

-    sorted_res = sorted(valid_responses, key=combined_key)
+    sorted_res = sorted(valid_responses, key=combined_key, reverse=True)
    return sorted_res


@ -183,3 +177,49 @@ def getTotalUrls(data):
    else:
        total_urls = [url for (url, _, _), _ in data]
    return list(dict.fromkeys(total_urls))
+
+
+def is_ipv6(url):
+    """
+    Check if the url is ipv6
+    """
+    try:
+        host = urllib.parse.urlparse(url).hostname
+        ipaddress.IPv6Address(host)
+        return True
+    except ValueError:
+        return False
+
+
+def filterSortedDataByIPVType(sorted_data):
+    """
+    Filter sorted data by ipv type
+    """
+    ipv_type = getattr(config, "ipv_type", "ipv4")
+    if ipv_type == "ipv4":
+        return [
+            ((url, date, resolution), response_time)
+            for (url, date, resolution), response_time in sorted_data
+            if not is_ipv6(url)
+        ]
+    elif ipv_type == "ipv6":
+        return [
+            ((url, date, resolution), response_time)
+            for (url, date, resolution), response_time in sorted_data
+            if is_ipv6(url)
+        ]
+    else:
+        return sorted_data
+
+
+def filterByIPVType(urls):
+    """
+    Filter by ipv type
+    """
+    ipv_type = getattr(config, "ipv_type", "ipv4")
+    if ipv_type == "ipv4":
+        return [url for url in urls if not is_ipv6(url)]
+    elif ipv_type == "ipv6":
+        return [url for url in urls if is_ipv6(url)]
+    else:
+        return urls