Merge branch 'dev'
# Conflicts: # result.txt
This commit is contained in:
commit
42df00eb9c
18
.github/workflows/main.yml
vendored
18
.github/workflows/main.yml
vendored
@ -41,6 +41,11 @@ jobs:
|
||||
chromedriver --url-base=/wd/hub &
|
||||
sudo Xvfb -ac :99 -screen 0 1280x1024x24 > /dev/null 2>&1 & # optional
|
||||
|
||||
- name: Set final file name
|
||||
id: set_final_file_name
|
||||
run: |
|
||||
echo "::set-output name=final_file::$(python -c 'import config; print(config.final_file)')"
|
||||
|
||||
- name: Install pipenv
|
||||
run: pip3 install --user pipenv
|
||||
- name: Install dependecies
|
||||
@ -52,6 +57,13 @@ jobs:
|
||||
git config --local user.email "github-actions[bot]@users.noreply.github.com"
|
||||
git config --local user.name "github-actions[bot]"
|
||||
git diff
|
||||
git add result.txt
|
||||
git commit -m "Github Action Auto Updated"
|
||||
git push
|
||||
if [[ -f ${{ steps.set_final_file_name.outputs.final_file }} ]]; then
|
||||
git add ${{ steps.set_final_file_name.outputs.final_file }}
|
||||
fi
|
||||
if [[ -f result.log ]]; then
|
||||
git add result.log
|
||||
fi
|
||||
if [[ `git status --porcelain` ]]; then
|
||||
git commit -m "Github Action Auto Updated"
|
||||
git push
|
||||
fi
|
||||
|
18
README-EN.md
18
README-EN.md
@ -13,6 +13,7 @@ Customize channel menus, automatically fetch and update the latest live source i
|
||||
- Set up key focus channels and configure the number of pages fetched separately
|
||||
- Pagination results retrieval (configurable number of pages and interfaces)
|
||||
- Ensure update timeliness, configure to retrieve interfaces updated within a recent time range
|
||||
- Can filter ipv4, ipv6 interfaces
|
||||
|
||||
## How to Use
|
||||
|
||||
@ -27,15 +28,16 @@ Customize channel menus, automatically fetch and update the latest live source i
|
||||
|
||||
| Configuration Item | Default Value | Description |
|
||||
| -------------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------ |
|
||||
| source_file | demo.txt | Template file name |
|
||||
| final_file | result.txt | Generated file name |
|
||||
| source_file | "demo.txt" | Template file name |
|
||||
| final_file | "result.txt" | Generated file name |
|
||||
| favorite_list | ["CCTV1","CCTV13"] | List of favorite channel names (used only to distinguish from regular channels, custom page retrieval quantity) |
|
||||
| favorite_page_num | 8 | Page retrieval quantity for favorite channels |
|
||||
| default_page_num | 5 | Page retrieval quantity for regular channels |
|
||||
| favorite_page_num | 6 | Page retrieval quantity for favorite channels |
|
||||
| default_page_num | 4 | Page retrieval quantity for regular channels |
|
||||
| urls_limit | 15 | Number of interfaces per channel |
|
||||
| response_time_weight | 0.5 | Response time weight value (the sum of all weight values should be 1) |
|
||||
| resolution_weight | 0.5 | Resolution weight value (the sum of all weight values should be 1) |
|
||||
| recent_days | 60 | Retrieve interfaces updated within a recent time range (in days), reducing appropriately can avoid matching issues |
|
||||
| ipv_type | "ipv4" | The type of interface in the generated result, optional values: "ipv4", "ipv6", "all" |
|
||||
|
||||
#### .github/workflows/main.yml:
|
||||
|
||||
@ -48,6 +50,14 @@ Customize channel menus, automatically fetch and update the latest live source i
|
||||
|
||||
## Update Log
|
||||
|
||||
### 2024/3/18
|
||||
|
||||
- Added configuration item: ipv_type, used to filter ipv4, ipv6 interface types
|
||||
- Optimized file update logic to prevent file loss caused by update failure
|
||||
- Adjusted the default value for pagination: fetch 6 pages for followed channels, 4 pages for regular channels, to improve update speed
|
||||
- Added output of interface log file result.log
|
||||
- Fixed weight sorting anomaly
|
||||
|
||||
### 2024/3/15
|
||||
|
||||
- Optimize code structure
|
||||
|
18
README.md
18
README.md
@ -13,6 +13,7 @@
|
||||
- 可设置重点关注频道,单独配置获取分页的数量
|
||||
- 分页结果获取(可配置页数、接口数量)
|
||||
- 保证更新时效性,配置获取最近时间范围内更新的接口
|
||||
- 可过滤 ipv4、ipv6 接口
|
||||
|
||||
## 使用方法
|
||||
|
||||
@ -27,15 +28,16 @@
|
||||
|
||||
| 配置项 | 默认值 | 描述 |
|
||||
| -------------------- | ------------------ | ------------------------------------------------------------------ |
|
||||
| source_file | demo.txt | 模板文件名称 |
|
||||
| final_file | result.txt | 生成文件名称 |
|
||||
| source_file | "demo.txt" | 模板文件名称 |
|
||||
| final_file | "result.txt" | 生成文件名称 |
|
||||
| favorite_list | ["CCTV1","CCTV13"] | 关注频道名称列表(仅用于与常规频道区分,自定义获取分页数量) |
|
||||
| favorite_page_num | 8 | 关注频道获取分页数量 |
|
||||
| default_page_num | 5 | 常规频道获取分页数量 |
|
||||
| favorite_page_num | 6 | 关注频道获取分页数量 |
|
||||
| default_page_num | 4 | 常规频道获取分页数量 |
|
||||
| urls_limit | 15 | 单个频道接口数量 |
|
||||
| response_time_weight | 0.5 | 响应时间权重值(所有权重值总和应为 1) |
|
||||
| resolution_weight | 0.5 | 分辨率权重值 (所有权重值总和应为 1) |
|
||||
| recent_days | 60 | 获取最近时间范围内更新的接口(单位天),适当减小可避免出现匹配问题 |
|
||||
| ipv_type | "ipv4" | 生成结果中接口的类型,可选值:"ipv4"、"ipv6"、"all" |
|
||||
|
||||
#### .github/workflows/main.yml:
|
||||
|
||||
@ -48,6 +50,14 @@
|
||||
|
||||
## 更新日志
|
||||
|
||||
### 2024/3/18
|
||||
|
||||
- 新增配置项:ipv_type,用于过滤 ipv4、ipv6 接口类型
|
||||
- 优化文件更新逻辑,避免更新失效引起文件丢失
|
||||
- 调整分页获取默认值:关注频道获取 6 页,常规频道获取 4 页,以提升更新速度
|
||||
- 增加接口日志文件 result.log 输出
|
||||
- 修复权重排序异常
|
||||
|
||||
### 2024/3/15
|
||||
|
||||
- 优化代码结构
|
||||
|
@ -15,9 +15,10 @@ favorite_list = [
|
||||
"湖南卫视",
|
||||
"翡翠台",
|
||||
]
|
||||
favorite_page_num = 8
|
||||
default_page_num = 5
|
||||
favorite_page_num = 6
|
||||
default_page_num = 4
|
||||
urls_limit = 15
|
||||
response_time_weight = 0.5
|
||||
resolution_weight = 0.5
|
||||
recent_days = 60
|
||||
ipv_type = "ipv4"
|
||||
|
21
main.py
21
main.py
@ -7,19 +7,21 @@ from selenium_stealth import stealth
|
||||
import asyncio
|
||||
from bs4 import BeautifulSoup
|
||||
from utils import (
|
||||
removeLogFile,
|
||||
getChannelItems,
|
||||
removeFinalFile,
|
||||
updateChannelUrlsTxt,
|
||||
updateFile,
|
||||
getUrlInfo,
|
||||
compareSpeedAndResolution,
|
||||
getTotalUrls,
|
||||
filterSortedDataByIPVType,
|
||||
filterByIPVType,
|
||||
)
|
||||
import logging
|
||||
|
||||
logging.basicConfig(
|
||||
filename="result.log",
|
||||
filename="result_new.log",
|
||||
filemode="a",
|
||||
format="%(message)s",
|
||||
level=logging.INFO,
|
||||
)
|
||||
|
||||
@ -49,8 +51,6 @@ class UpdateSource:
|
||||
self.driver = self.setup_driver()
|
||||
|
||||
async def visitPage(self, channelItems):
|
||||
removeLogFile()
|
||||
removeFinalFile()
|
||||
for cate, channelObj in channelItems.items():
|
||||
channelUrls = {}
|
||||
for name in channelObj.keys():
|
||||
@ -92,16 +92,17 @@ class UpdateSource:
|
||||
continue
|
||||
try:
|
||||
sorted_data = await compareSpeedAndResolution(infoList)
|
||||
if sorted_data:
|
||||
ipvSortedData = filterSortedDataByIPVType(sorted_data)
|
||||
if ipvSortedData:
|
||||
channelUrls[name] = (
|
||||
getTotalUrls(sorted_data) or channelObj[name]
|
||||
getTotalUrls(ipvSortedData) or channelObj[name]
|
||||
)
|
||||
for (url, date, resolution), response_time in sorted_data:
|
||||
for (url, date, resolution), response_time in ipvSortedData:
|
||||
logging.info(
|
||||
f"Name: {name}, URL: {url}, Date: {date}, Resolution: {resolution}, Response Time: {response_time}ms"
|
||||
)
|
||||
else:
|
||||
channelUrls[name] = channelObj[name]
|
||||
channelUrls[name] = filterByIPVType(channelObj[name])
|
||||
except Exception as e:
|
||||
print(f"Error on sorting: {e}")
|
||||
continue
|
||||
@ -110,6 +111,8 @@ class UpdateSource:
|
||||
|
||||
def main(self):
|
||||
asyncio.run(self.visitPage(getChannelItems()))
|
||||
updateFile(config.final_file, "result_new.txt")
|
||||
updateFile("result.log", "result_new.log")
|
||||
|
||||
|
||||
UpdateSource().main()
|
||||
|
1323
result.log
Normal file
1323
result.log
Normal file
File diff suppressed because it is too large
Load Diff
1397
result.txt
1397
result.txt
File diff suppressed because it is too large
Load Diff
90
utils.py
90
utils.py
@ -5,14 +5,8 @@ import time
|
||||
import re
|
||||
import datetime
|
||||
import os
|
||||
|
||||
|
||||
def removeLogFile():
|
||||
"""
|
||||
Remove the log file
|
||||
"""
|
||||
if os.path.exists("result.log"):
|
||||
os.remove("result.log")
|
||||
import urllib.parse
|
||||
import ipaddress
|
||||
|
||||
|
||||
def getChannelItems():
|
||||
@ -45,19 +39,11 @@ def getChannelItems():
|
||||
return channels
|
||||
|
||||
|
||||
def removeFinalFile():
|
||||
"""
|
||||
Remove the old final file
|
||||
"""
|
||||
if os.path.exists(config.final_file):
|
||||
os.remove(config.final_file)
|
||||
|
||||
|
||||
def updateChannelUrlsTxt(cate, channelUrls):
|
||||
"""
|
||||
Update the category and channel urls to the final file
|
||||
"""
|
||||
with open(config.final_file, "a") as f:
|
||||
with open("result_new.txt", "a") as f:
|
||||
f.write(cate + ",#genre#\n")
|
||||
for name, urls in channelUrls.items():
|
||||
for url in urls:
|
||||
@ -66,6 +52,16 @@ def updateChannelUrlsTxt(cate, channelUrls):
|
||||
f.write("\n")
|
||||
|
||||
|
||||
def updateFile(final_file, old_file):
|
||||
"""
|
||||
Update the file
|
||||
"""
|
||||
if os.path.exists(final_file):
|
||||
os.remove(final_file)
|
||||
if os.path.exists(old_file):
|
||||
os.rename(old_file, final_file)
|
||||
|
||||
|
||||
def getUrlInfo(result):
|
||||
"""
|
||||
Get the url, date and resolution
|
||||
@ -97,12 +93,12 @@ async def getSpeed(url):
|
||||
async with session.get(url, timeout=5) as response:
|
||||
resStatus = response.status
|
||||
except:
|
||||
return url, float("inf")
|
||||
return float("inf")
|
||||
end = time.time()
|
||||
if resStatus == 200:
|
||||
return url, int(round((end - start) * 1000))
|
||||
return int(round((end - start) * 1000))
|
||||
else:
|
||||
return url, float("inf")
|
||||
return float("inf")
|
||||
|
||||
|
||||
async def compareSpeedAndResolution(infoList):
|
||||
@ -111,9 +107,7 @@ async def compareSpeedAndResolution(infoList):
|
||||
"""
|
||||
response_times = await asyncio.gather(*(getSpeed(url) for url, _, _ in infoList))
|
||||
valid_responses = [
|
||||
(info, rt)
|
||||
for info, rt in zip(infoList, response_times)
|
||||
if rt[1] != float("inf")
|
||||
(info, rt) for info, rt in zip(infoList, response_times) if rt != float("inf")
|
||||
]
|
||||
|
||||
def extract_resolution(resolution_str):
|
||||
@ -143,11 +137,11 @@ async def compareSpeedAndResolution(infoList):
|
||||
(_, _, resolution), response_time = item
|
||||
resolution_value = extract_resolution(resolution) if resolution else 0
|
||||
return (
|
||||
-(response_time_weight * response_time[1])
|
||||
-(response_time_weight * response_time)
|
||||
+ resolution_weight * resolution_value
|
||||
)
|
||||
|
||||
sorted_res = sorted(valid_responses, key=combined_key)
|
||||
sorted_res = sorted(valid_responses, key=combined_key, reverse=True)
|
||||
return sorted_res
|
||||
|
||||
|
||||
@ -183,3 +177,49 @@ def getTotalUrls(data):
|
||||
else:
|
||||
total_urls = [url for (url, _, _), _ in data]
|
||||
return list(dict.fromkeys(total_urls))
|
||||
|
||||
|
||||
def is_ipv6(url):
|
||||
"""
|
||||
Check if the url is ipv6
|
||||
"""
|
||||
try:
|
||||
host = urllib.parse.urlparse(url).hostname
|
||||
ipaddress.IPv6Address(host)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def filterSortedDataByIPVType(sorted_data):
|
||||
"""
|
||||
Filter sorted data by ipv type
|
||||
"""
|
||||
ipv_type = getattr(config, "ipv_type", "ipv4")
|
||||
if ipv_type == "ipv4":
|
||||
return [
|
||||
((url, date, resolution), response_time)
|
||||
for (url, date, resolution), response_time in sorted_data
|
||||
if not is_ipv6(url)
|
||||
]
|
||||
elif ipv_type == "ipv6":
|
||||
return [
|
||||
((url, date, resolution), response_time)
|
||||
for (url, date, resolution), response_time in sorted_data
|
||||
if is_ipv6(url)
|
||||
]
|
||||
else:
|
||||
return sorted_data
|
||||
|
||||
|
||||
def filterByIPVType(urls):
|
||||
"""
|
||||
Filter by ipv type
|
||||
"""
|
||||
ipv_type = getattr(config, "ipv_type", "ipv4")
|
||||
if ipv_type == "ipv4":
|
||||
return [url for url in urls if not is_ipv6(url)]
|
||||
elif ipv_type == "ipv6":
|
||||
return [url for url in urls if is_ipv6(url)]
|
||||
else:
|
||||
return urls
|
||||
|
Loading…
x
Reference in New Issue
Block a user