Merge branch 'dev'

# Conflicts:
#	result.txt
This commit is contained in:
guorong.zheng 2024-03-18 16:01:52 +08:00
commit 42df00eb9c
8 changed files with 2154 additions and 736 deletions

@ -41,6 +41,11 @@ jobs:
chromedriver --url-base=/wd/hub &
sudo Xvfb -ac :99 -screen 0 1280x1024x24 > /dev/null 2>&1 & # optional
- name: Set final file name
id: set_final_file_name
run: |
echo "::set-output name=final_file::$(python -c 'import config; print(config.final_file)')"
- name: Install pipenv
run: pip3 install --user pipenv
- name: Install dependecies
@ -52,6 +57,13 @@ jobs:
git config --local user.email "github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
git diff
git add result.txt
git commit -m "Github Action Auto Updated"
git push
if [[ -f ${{ steps.set_final_file_name.outputs.final_file }} ]]; then
git add ${{ steps.set_final_file_name.outputs.final_file }}
fi
if [[ -f result.log ]]; then
git add result.log
fi
if [[ `git status --porcelain` ]]; then
git commit -m "Github Action Auto Updated"
git push
fi

@ -13,6 +13,7 @@ Customize channel menus, automatically fetch and update the latest live source i
- Set up key focus channels and configure the number of pages fetched separately
- Pagination results retrieval (configurable number of pages and interfaces)
- Ensure update timeliness, configure to retrieve interfaces updated within a recent time range
- Can filter ipv4, ipv6 interfaces
## How to Use
@ -27,15 +28,16 @@ Customize channel menus, automatically fetch and update the latest live source i
| Configuration Item | Default Value | Description |
| -------------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------ |
| source_file | demo.txt | Template file name |
| final_file | result.txt | Generated file name |
| source_file | "demo.txt" | Template file name |
| final_file | "result.txt" | Generated file name |
| favorite_list | ["CCTV1","CCTV13"] | List of favorite channel names (used only to distinguish from regular channels, custom page retrieval quantity) |
| favorite_page_num | 8 | Page retrieval quantity for favorite channels |
| default_page_num | 5 | Page retrieval quantity for regular channels |
| favorite_page_num | 6 | Page retrieval quantity for favorite channels |
| default_page_num | 4 | Page retrieval quantity for regular channels |
| urls_limit | 15 | Number of interfaces per channel |
| response_time_weight | 0.5 | Response time weight value (the sum of all weight values should be 1) |
| resolution_weight | 0.5 | Resolution weight value (the sum of all weight values should be 1) |
| recent_days | 60 | Retrieve interfaces updated within a recent time range (in days), reducing appropriately can avoid matching issues |
| ipv_type | "ipv4" | The type of interface in the generated result, optional values: "ipv4", "ipv6", "all" |
#### .github/workflows/main.yml:
@ -48,6 +50,14 @@ Customize channel menus, automatically fetch and update the latest live source i
## Update Log
### 2024/3/18
- Added configuration item: ipv_type, used to filter ipv4, ipv6 interface types
- Optimized file update logic to prevent file loss caused by update failure
- Adjusted the default value for pagination: fetch 6 pages for followed channels, 4 pages for regular channels, to improve update speed
- Added output of interface log file result.log
- Fixed weight sorting anomaly
### 2024/3/15
- Optimize code structure

@ -13,6 +13,7 @@
- 可设置重点关注频道,单独配置获取分页的数量
- 分页结果获取(可配置页数、接口数量)
- 保证更新时效性,配置获取最近时间范围内更新的接口
- 可过滤 ipv4、ipv6 接口
## 使用方法
@ -27,15 +28,16 @@
| 配置项 | 默认值 | 描述 |
| -------------------- | ------------------ | ------------------------------------------------------------------ |
| source_file | demo.txt | 模板文件名称 |
| final_file | result.txt | 生成文件名称 |
| source_file | "demo.txt" | 模板文件名称 |
| final_file | "result.txt" | 生成文件名称 |
| favorite_list | ["CCTV1","CCTV13"] | 关注频道名称列表(仅用于与常规频道区分,自定义获取分页数量) |
| favorite_page_num | 8 | 关注频道获取分页数量 |
| default_page_num | 5 | 常规频道获取分页数量 |
| favorite_page_num | 6 | 关注频道获取分页数量 |
| default_page_num | 4 | 常规频道获取分页数量 |
| urls_limit | 15 | 单个频道接口数量 |
| response_time_weight | 0.5 | 响应时间权重值(所有权重值总和应为 1 |
| resolution_weight | 0.5 | 分辨率权重值 (所有权重值总和应为 1 |
| recent_days | 60 | 获取最近时间范围内更新的接口(单位天),适当减小可避免出现匹配问题 |
| ipv_type | "ipv4" | 生成结果中接口的类型,可选值:"ipv4"、"ipv6"、"all" |
#### .github/workflows/main.yml
@ -48,6 +50,14 @@
## 更新日志
### 2024/3/18
- 新增配置项ipv_type用于过滤 ipv4、ipv6 接口类型
- 优化文件更新逻辑,避免更新失效引起文件丢失
- 调整分页获取默认值:关注频道获取 6 页,常规频道获取 4 页,以提升更新速度
- 增加接口日志文件 result.log 输出
- 修复权重排序异常
### 2024/3/15
- 优化代码结构

@ -15,9 +15,10 @@ favorite_list = [
"湖南卫视",
"翡翠台",
]
favorite_page_num = 8
default_page_num = 5
favorite_page_num = 6
default_page_num = 4
urls_limit = 15
response_time_weight = 0.5
resolution_weight = 0.5
recent_days = 60
ipv_type = "ipv4"

21
main.py

@ -7,19 +7,21 @@ from selenium_stealth import stealth
import asyncio
from bs4 import BeautifulSoup
from utils import (
removeLogFile,
getChannelItems,
removeFinalFile,
updateChannelUrlsTxt,
updateFile,
getUrlInfo,
compareSpeedAndResolution,
getTotalUrls,
filterSortedDataByIPVType,
filterByIPVType,
)
import logging
logging.basicConfig(
filename="result.log",
filename="result_new.log",
filemode="a",
format="%(message)s",
level=logging.INFO,
)
@ -49,8 +51,6 @@ class UpdateSource:
self.driver = self.setup_driver()
async def visitPage(self, channelItems):
removeLogFile()
removeFinalFile()
for cate, channelObj in channelItems.items():
channelUrls = {}
for name in channelObj.keys():
@ -92,16 +92,17 @@ class UpdateSource:
continue
try:
sorted_data = await compareSpeedAndResolution(infoList)
if sorted_data:
ipvSortedData = filterSortedDataByIPVType(sorted_data)
if ipvSortedData:
channelUrls[name] = (
getTotalUrls(sorted_data) or channelObj[name]
getTotalUrls(ipvSortedData) or channelObj[name]
)
for (url, date, resolution), response_time in sorted_data:
for (url, date, resolution), response_time in ipvSortedData:
logging.info(
f"Name: {name}, URL: {url}, Date: {date}, Resolution: {resolution}, Response Time: {response_time}ms"
)
else:
channelUrls[name] = channelObj[name]
channelUrls[name] = filterByIPVType(channelObj[name])
except Exception as e:
print(f"Error on sorting: {e}")
continue
@ -110,6 +111,8 @@ class UpdateSource:
def main(self):
asyncio.run(self.visitPage(getChannelItems()))
updateFile(config.final_file, "result_new.txt")
updateFile("result.log", "result_new.log")
UpdateSource().main()

1323
result.log Normal file

File diff suppressed because it is too large Load Diff

1397
result.txt

File diff suppressed because it is too large Load Diff

@ -5,14 +5,8 @@ import time
import re
import datetime
import os
def removeLogFile():
"""
Remove the log file
"""
if os.path.exists("result.log"):
os.remove("result.log")
import urllib.parse
import ipaddress
def getChannelItems():
@ -45,19 +39,11 @@ def getChannelItems():
return channels
def removeFinalFile():
"""
Remove the old final file
"""
if os.path.exists(config.final_file):
os.remove(config.final_file)
def updateChannelUrlsTxt(cate, channelUrls):
"""
Update the category and channel urls to the final file
"""
with open(config.final_file, "a") as f:
with open("result_new.txt", "a") as f:
f.write(cate + ",#genre#\n")
for name, urls in channelUrls.items():
for url in urls:
@ -66,6 +52,16 @@ def updateChannelUrlsTxt(cate, channelUrls):
f.write("\n")
def updateFile(final_file, old_file):
"""
Update the file
"""
if os.path.exists(final_file):
os.remove(final_file)
if os.path.exists(old_file):
os.rename(old_file, final_file)
def getUrlInfo(result):
"""
Get the url, date and resolution
@ -97,12 +93,12 @@ async def getSpeed(url):
async with session.get(url, timeout=5) as response:
resStatus = response.status
except:
return url, float("inf")
return float("inf")
end = time.time()
if resStatus == 200:
return url, int(round((end - start) * 1000))
return int(round((end - start) * 1000))
else:
return url, float("inf")
return float("inf")
async def compareSpeedAndResolution(infoList):
@ -111,9 +107,7 @@ async def compareSpeedAndResolution(infoList):
"""
response_times = await asyncio.gather(*(getSpeed(url) for url, _, _ in infoList))
valid_responses = [
(info, rt)
for info, rt in zip(infoList, response_times)
if rt[1] != float("inf")
(info, rt) for info, rt in zip(infoList, response_times) if rt != float("inf")
]
def extract_resolution(resolution_str):
@ -143,11 +137,11 @@ async def compareSpeedAndResolution(infoList):
(_, _, resolution), response_time = item
resolution_value = extract_resolution(resolution) if resolution else 0
return (
-(response_time_weight * response_time[1])
-(response_time_weight * response_time)
+ resolution_weight * resolution_value
)
sorted_res = sorted(valid_responses, key=combined_key)
sorted_res = sorted(valid_responses, key=combined_key, reverse=True)
return sorted_res
@ -183,3 +177,49 @@ def getTotalUrls(data):
else:
total_urls = [url for (url, _, _), _ in data]
return list(dict.fromkeys(total_urls))
def is_ipv6(url):
"""
Check if the url is ipv6
"""
try:
host = urllib.parse.urlparse(url).hostname
ipaddress.IPv6Address(host)
return True
except ValueError:
return False
def filterSortedDataByIPVType(sorted_data):
"""
Filter sorted data by ipv type
"""
ipv_type = getattr(config, "ipv_type", "ipv4")
if ipv_type == "ipv4":
return [
((url, date, resolution), response_time)
for (url, date, resolution), response_time in sorted_data
if not is_ipv6(url)
]
elif ipv_type == "ipv6":
return [
((url, date, resolution), response_time)
for (url, date, resolution), response_time in sorted_data
if is_ipv6(url)
]
else:
return sorted_data
def filterByIPVType(urls):
"""
Filter by ipv type
"""
ipv_type = getattr(config, "ipv_type", "ipv4")
if ipv_type == "ipv4":
return [url for url in urls if not is_ipv6(url)]
elif ipv_type == "ipv6":
return [url for url in urls if is_ipv6(url)]
else:
return urls