Merge pull request from Guovin/dev

Release: v1.1.0
This commit is contained in:
Govin 2024-04-26 17:21:01 +08:00 committed by GitHub
commit e9311f96ab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 177 additions and 75 deletions

@ -1,5 +1,11 @@
# 更新日志Changelog
## v1.1.0
### 2024/4/26
- 新增自定义接口获取源,配置项为 extend_base_urls#56Added custom interface for source acquisition, the configuration item is extend_base_urls (#56)
## v1.0.9
### 2024/4/25

@ -16,23 +16,25 @@ Customize channel menus and automatically obtain and update the latest live sour
- Ensure update timeliness, configure to retrieve interfaces updated within a recent time range
- Can filter ipv4, ipv6 interfaces
- Blacklist feature: Interface domain and keywords
- Customize the source of interface acquisition
## Config
| Configuration Item | Default Value | Description |
| ---------------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------ |
| source_file | "demo.txt" | Template file name |
| final_file | "result.txt" | Generated file name |
| favorite_list | ["CCTV1","CCTV13"] | List of favorite channel names (used only to distinguish from regular channels, custom page retrieval quantity) |
| favorite_page_num | 5 | Page retrieval quantity for favorite channels |
| default_page_num | 3 | Page retrieval quantity for regular channels |
| urls_limit | 10 | Number of interfaces per channel |
| response_time_weight | 0.5 | Response time weight value (the sum of all weight values should be 1) |
| resolution_weight | 0.5 | Resolution weight value (the sum of all weight values should be 1) |
| recent_days | 30 | Retrieve interfaces updated within a recent time range (in days), reducing appropriately can avoid matching issues |
| ipv_type | "ipv4" | The type of interface in the generated result, optional values: "ipv4", "ipv6", "all" |
| domain_blacklist | ["epg.pw"] | Interface domain blacklist, used to filter out interfaces with low-quality, ad-inclusive domains |
| url_keywords_blacklist | [] | Interface keyword blacklist, used to filter out interfaces containing specific characters |
| Configuration Item | Default Value | Description |
| ---------------------- | --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
| source_file | "demo.txt" | Template file name |
| final_file | "result.txt" | Generated file name |
| favorite_list | ["CCTV1","CCTV13"] | List of favorite channel names (used only to distinguish from regular channels, custom page retrieval quantity) |
| favorite_page_num | 5 | Page retrieval quantity for favorite channels |
| default_page_num | 3 | Page retrieval quantity for regular channels |
| urls_limit | 10 | Number of interfaces per channel |
| response_time_weight | 0.5 | Response time weight value (the sum of all weight values should be 1) |
| resolution_weight | 0.5 | Resolution weight value (the sum of all weight values should be 1) |
| recent_days | 30 | Retrieve interfaces updated within a recent time range (in days), reducing appropriately can avoid matching issues |
| ipv_type | "ipv4" | The type of interface in the generated result, optional values: "ipv4", "ipv6", "all" |
| domain_blacklist | ["epg.pw"] | Interface domain blacklist, used to filter out interfaces with low-quality, ad-inclusive domains |
| url_keywords_blacklist | [] | Interface keyword blacklist, used to filter out interfaces containing specific characters |
| extend_base_urls | ["https://m3u.ibert.me/txt/fmml_dv6.txt",<br>"https://m3u.ibert.me/txt/o_cn.txt",<br>"https://m3u.ibert.me/txt/j_iptv.txt"] | The source of interface acquisition, currently only compatible with specific content formats and fuzzy matching of some channel names |
## Quick Start

@ -16,23 +16,25 @@
- 保证更新时效性,配置获取最近时间范围内更新的接口
- 可过滤 ipv4、ipv6 接口
- 黑名单功能:接口域名与关键字
- 自定义接口获取源
## 配置
| 配置项 | 默认值 | 描述 |
| ---------------------- | ------------------ | ------------------------------------------------------------------ |
| source_file | "demo.txt" | 模板文件名称 |
| final_file | "result.txt" | 生成文件名称 |
| favorite_list | ["CCTV1","CCTV13"] | 关注频道名称列表(仅用于与常规频道区分,自定义获取分页数量) |
| favorite_page_num | 5 | 关注频道获取分页数量 |
| default_page_num | 3 | 常规频道获取分页数量 |
| urls_limit | 10 | 单个频道接口数量 |
| response_time_weight | 0.5 | 响应时间权重值(所有权重值总和应为 1 |
| resolution_weight | 0.5 | 分辨率权重值 (所有权重值总和应为 1 |
| recent_days | 30 | 获取最近时间范围内更新的接口(单位天),适当减小可避免出现匹配问题 |
| ipv_type | "ipv4" | 生成结果中接口的类型,可选值:"ipv4"、"ipv6"、"all" |
| domain_blacklist | ["epg.pw"] | 接口域名黑名单,用于过滤低质量含广告类域名的接口 |
| url_keywords_blacklist | [] | 接口关键字黑名单,用于过滤含特定字符的接口 |
| 配置项 | 默认值 | 描述 |
| ---------------------- | --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------ |
| source_file | "demo.txt" | 模板文件名称 |
| final_file | "result.txt" | 生成文件名称 |
| favorite_list | ["CCTV1","CCTV13"] | 关注频道名称列表(仅用于与常规频道区分,自定义获取分页数量) |
| favorite_page_num | 5 | 关注频道获取分页数量 |
| default_page_num | 3 | 常规频道获取分页数量 |
| urls_limit | 10 | 单个频道接口数量 |
| response_time_weight | 0.5 | 响应时间权重值(所有权重值总和应为 1 |
| resolution_weight | 0.5 | 分辨率权重值 (所有权重值总和应为 1 |
| recent_days | 30 | 获取最近时间范围内更新的接口(单位天),适当减小可避免出现匹配问题 |
| ipv_type | "ipv4" | 生成结果中接口的类型,可选值:"ipv4"、"ipv6"、"all" |
| domain_blacklist | ["epg.pw"] | 接口域名黑名单,用于过滤低质量含广告类域名的接口 |
| url_keywords_blacklist | [] | 接口关键字黑名单,用于过滤含特定字符的接口 |
| extend_base_urls | ["https://m3u.ibert.me/txt/fmml_dv6.txt",<br>"https://m3u.ibert.me/txt/o_cn.txt",<br>"https://m3u.ibert.me/txt/j_iptv.txt"] | 接口获取源,目前仅兼容特定内容格式与部分频道名称的模糊匹配 |
## 快速上手

@ -24,3 +24,8 @@ recent_days = 30
ipv_type = "ipv4"
domain_blacklist = ["epg.pw"]
url_keywords_blacklist = []
extend_base_urls = [
"https://m3u.ibert.me/txt/fmml_dv6.txt",
"https://m3u.ibert.me/txt/o_cn.txt",
"https://m3u.ibert.me/txt/j_iptv.txt",
]

@ -57,20 +57,21 @@ Similar to editing the template, modify the running configuration
Adjust the configuration as needed. Below is the default configuration explanation:
| Configuration Item | Default Value | Description |
| ---------------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------ |
| source_file | "demo.txt" | Template file name |
| final_file | "result.txt" | Generated file name |
| favorite_list | ["CCTV1","CCTV13"] | List of favorite channel names (used only to distinguish from regular channels, custom page retrieval quantity) |
| favorite_page_num | 5 | Page retrieval quantity for favorite channels |
| default_page_num | 3 | Page retrieval quantity for regular channels |
| urls_limit | 10 | Number of interfaces per channel |
| response_time_weight | 0.5 | Response time weight value (the sum of all weight values should be 1) |
| resolution_weight | 0.5 | Resolution weight value (the sum of all weight values should be 1) |
| recent_days | 30 | Retrieve interfaces updated within a recent time range (in days), reducing appropriately can avoid matching issues |
| ipv_type | "ipv4" | The type of interface in the generated result, optional values: "ipv4", "ipv6", "all" |
| domain_blacklist | ["epg.pw"] | Interface domain blacklist, used to filter out interfaces with low-quality, ad-inclusive domains |
| url_keywords_blacklist | [] | Interface keyword blacklist, used to filter out interfaces containing specific characters |
| Configuration Item | Default Value | Description |
| ---------------------- | --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
| source_file | "demo.txt" | Template file name |
| final_file | "result.txt" | Generated file name |
| favorite_list | ["CCTV1","CCTV13"] | List of favorite channel names (used only to distinguish from regular channels, custom page retrieval quantity) |
| favorite_page_num | 5 | Page retrieval quantity for favorite channels |
| default_page_num | 3 | Page retrieval quantity for regular channels |
| urls_limit | 10 | Number of interfaces per channel |
| response_time_weight | 0.5 | Response time weight value (the sum of all weight values should be 1) |
| resolution_weight | 0.5 | Resolution weight value (the sum of all weight values should be 1) |
| recent_days | 30 | Retrieve interfaces updated within a recent time range (in days), reducing appropriately can avoid matching issues |
| ipv_type | "ipv4" | The type of interface in the generated result, optional values: "ipv4", "ipv6", "all" |
| domain_blacklist | ["epg.pw"] | Interface domain blacklist, used to filter out interfaces with low-quality, ad-inclusive domains |
| url_keywords_blacklist | [] | Interface keyword blacklist, used to filter out interfaces containing specific characters |
| extend_base_urls | ["https://m3u.ibert.me/txt/fmml_dv6.txt",<br>"https://m3u.ibert.me/txt/o_cn.txt",<br>"https://m3u.ibert.me/txt/j_iptv.txt"] | The source of interface acquisition, currently only compatible with specific content formats and fuzzy matching of some channel names |
## Step 4: Run Updates Locally (Recommended, Stable, Supports a large number of channel updates)

@ -57,7 +57,7 @@
按照您的需要适当调整配置,以下是默认配置说明
| 配置项 | 默认值 | 描述 |
| -------------------- | ------------------ | ------------------------------------------------------------------ |
| ---------------------- | --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------ |
| source_file | "demo.txt" | 模板文件名称 |
| final_file | "result.txt" | 生成文件名称 |
| favorite_list | ["CCTV1","CCTV13"] | 关注频道名称列表(仅用于与常规频道区分,自定义获取分页数量) |
@ -70,6 +70,7 @@
| ipv_type | "ipv4" | 生成结果中接口的类型,可选值:"ipv4"、"ipv6"、"all" |
| domain_blacklist | ["epg.pw"] | 接口域名黑名单,用于过滤低质量含广告类域名的接口 |
| url_keywords_blacklist | [] | 接口关键字黑名单,用于过滤含特定字符的接口 |
| extend_base_urls | ["https://m3u.ibert.me/txt/fmml_dv6.txt",<br>"https://m3u.ibert.me/txt/o_cn.txt",<br>"https://m3u.ibert.me/txt/j_iptv.txt"] | 接口获取源,目前仅兼容特定内容格式与部分频道名称的模糊匹配 |
## 步骤四:本地运行更新(推荐,稳定,支持大量频道更新)

68
main.py

@ -14,13 +14,12 @@ from utils import (
updateChannelUrlsTxt,
updateFile,
getUrlInfo,
compareSpeedAndResolution,
sortUrlsBySpeedAndResolution,
getTotalUrls,
checkUrlIPVType,
checkByDomainBlacklist,
checkByURLKeywordsBlacklist,
filterUrlsByPatterns,
useAccessibleUrl,
getChannelsByExtendBaseUrls,
checkUrlByPatterns,
)
import logging
from logging.handlers import RotatingFileHandler
@ -61,9 +60,14 @@ class UpdateSource:
self.driver = self.setup_driver()
async def visitPage(self, channelItems):
total_channels = sum(len(channelObj) for _, channelObj in channelItems.items())
channelNames = [
name for _, channelObj in channelItems.items() for name in channelObj.keys()
]
extendResults = await getChannelsByExtendBaseUrls(channelNames)
total_channels = len(channelNames)
pbar = tqdm(total=total_channels)
pageUrl = await useAccessibleUrl()
wait = WebDriverWait(self.driver, 10)
for cate, channelObj in channelItems.items():
channelUrls = {}
channelObjKeys = channelObj.keys()
@ -71,26 +75,41 @@ class UpdateSource:
pbar.set_description(
f"Processing {name}, {total_channels - pbar.n} channels remaining"
)
self.driver.get(pageUrl)
search_box = self.driver.find_element(By.XPATH, '//input[@type="text"]')
search_box.clear()
search_box.send_keys(name)
submit_button = self.driver.find_element(
By.XPATH, '//input[@type="submit"]'
)
submit_button.click()
isFavorite = name in config.favorite_list
pageNum = (
config.favorite_page_num if isFavorite else config.default_page_num
)
infoList = []
for url, date, resolution in extendResults.get(name, []):
if url and checkUrlByPatterns(url):
infoList.append((url, None, resolution))
if pageUrl:
self.driver.get(pageUrl)
search_box = wait.until(
EC.presence_of_element_located(
(By.XPATH, '//input[@type="text"]')
)
)
search_box.clear()
search_box.send_keys(name)
submit_button = wait.until(
EC.element_to_be_clickable(
(By.XPATH, '//input[@type="submit"]')
)
)
submit_button.click()
isFavorite = name in config.favorite_list
pageNum = (
config.favorite_page_num
if isFavorite
else config.default_page_num
)
for page in range(1, pageNum + 1):
try:
if page > 1:
page_link = self.driver.find_element(
By.XPATH,
f'//a[contains(@href, "={page}") and contains(@href, "{name}")]',
page_link = wait.until(
EC.element_to_be_clickable(
(
By.XPATH,
f'//a[contains(@href, "={page}") and contains(@href, "{name}")]',
)
)
)
page_link.click()
soup = BeautifulSoup(self.driver.page_source, "html.parser")
@ -100,12 +119,7 @@ class UpdateSource:
for result in results:
try:
url, date, resolution = getUrlInfo(result)
if (
url
and checkUrlIPVType(url)
and checkByDomainBlacklist(url)
and checkByURLKeywordsBlacklist(url)
):
if url and checkUrlByPatterns(url):
infoList.append((url, date, resolution))
except Exception as e:
print(f"Error on result {result}: {e}")
@ -118,7 +132,7 @@ class UpdateSource:
if not github_actions or (
pbar.n <= 200 and github_actions == "true"
):
sorted_data = await compareSpeedAndResolution(infoList)
sorted_data = await sortUrlsBySpeedAndResolution(infoList)
if sorted_data:
channelUrls[name] = getTotalUrls(sorted_data)
for (url, date, resolution), response_time in sorted_data:

@ -11,6 +11,8 @@ import os
import urllib.parse
import ipaddress
from urllib.parse import urlparse
import requests
import re
def getChannelItems():
@ -41,10 +43,14 @@ def getChannelItems():
else:
# This is a url, add it to the list of urls for the current channel.
match = re.search(pattern, line)
if match:
if match is not None:
if match.group(1) not in channels[current_category]:
channels[current_category][match.group(1)] = [match.group(2)]
else:
elif (
match.group(2)
and match.group(2)
not in channels[current_category][match.group(1)]
):
channels[current_category][match.group(1)].append(
match.group(2)
)
@ -53,6 +59,60 @@ def getChannelItems():
f.close()
async def getChannelsByExtendBaseUrls(channel_names):
"""
Get the channels by extending the base urls
"""
channels = {}
pattern = r"^(.*?),(?!#genre#)(.*?)$"
sub_pattern = r"_\((.*?)\)|_\[(.*?)\]|频道"
for base_url in config.extend_base_urls:
try:
print(f"Processing extend base url: {base_url}")
try:
response = requests.get(base_url, timeout=30)
except requests.exceptions.Timeout:
print(f"Timeout on {base_url}")
continue
content = response.text
if content:
lines = content.split("\n")
link_dict = {}
for line in lines:
if re.match(pattern, line) is not None:
key = re.match(pattern, line).group(1)
resolution_match = re.search(r"_(\((.*?)\))", key)
resolution = (
resolution_match.group(2)
if resolution_match is not None
else None
)
key = re.sub(sub_pattern, "", key).lower()
url = re.match(pattern, line).group(2)
value = (url, None, resolution)
if key in link_dict:
link_dict[key].append(value)
else:
link_dict[key] = [value]
found_channels = []
for channel_name in channel_names:
sub_channel_name = re.sub(sub_pattern, "", channel_name).lower()
values = link_dict.get(sub_channel_name)
if values:
if channel_name in channels:
channels[channel_name] += values
else:
channels[channel_name] = values
found_channels.append(channel_name)
if found_channels:
print(f"{base_url} found channels: {','.join(found_channels)}")
except Exception as e:
print(f"Error on {base_url}: {e}")
continue
print("Finished processing extend base urls")
return channels
def updateChannelUrlsTxt(cate, channelUrls):
"""
Update the category and channel urls to the final file
@ -89,7 +149,7 @@ def getUrlInfo(result):
r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+",
channel_text,
)
if url_match:
if url_match is not None:
url = url_match.group()
info_text = result_div[-1].get_text(strip=True)
if info_text:
@ -122,7 +182,7 @@ async def getSpeed(url, urlTimeout=5):
return float("inf")
async def compareSpeedAndResolution(infoList):
async def sortUrlsBySpeedAndResolution(infoList):
"""
Sort by speed and resolution
"""
@ -249,6 +309,17 @@ def checkByURLKeywordsBlacklist(url):
return not any(keyword in url for keyword in url_keywords_blacklist)
def checkUrlByPatterns(url):
"""
Check the url by patterns
"""
return (
checkUrlIPVType(url)
and checkByDomainBlacklist(url)
and checkByURLKeywordsBlacklist(url)
)
def filterUrlsByPatterns(urls):
"""
Filter urls by patterns

@ -1,3 +1,3 @@
{
"version": "1.0.9"
"version": "1.1.0"
}