commit
e9311f96ab
@ -1,5 +1,11 @@
|
||||
# 更新日志(Changelog)
|
||||
|
||||
## v1.1.0
|
||||
|
||||
### 2024/4/26
|
||||
|
||||
- 新增自定义接口获取源,配置项为 extend_base_urls(#56)(Added custom interface for source acquisition, the configuration item is extend_base_urls (#56))
|
||||
|
||||
## v1.0.9
|
||||
|
||||
### 2024/4/25
|
||||
|
30
README-EN.md
30
README-EN.md
@ -16,23 +16,25 @@ Customize channel menus and automatically obtain and update the latest live sour
|
||||
- Ensure update timeliness, configure to retrieve interfaces updated within a recent time range
|
||||
- Can filter ipv4, ipv6 interfaces
|
||||
- Blacklist feature: Interface domain and keywords
|
||||
- Customize the source of interface acquisition
|
||||
|
||||
## Config
|
||||
|
||||
| Configuration Item | Default Value | Description |
|
||||
| ---------------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------ |
|
||||
| source_file | "demo.txt" | Template file name |
|
||||
| final_file | "result.txt" | Generated file name |
|
||||
| favorite_list | ["CCTV1","CCTV13"] | List of favorite channel names (used only to distinguish from regular channels, custom page retrieval quantity) |
|
||||
| favorite_page_num | 5 | Page retrieval quantity for favorite channels |
|
||||
| default_page_num | 3 | Page retrieval quantity for regular channels |
|
||||
| urls_limit | 10 | Number of interfaces per channel |
|
||||
| response_time_weight | 0.5 | Response time weight value (the sum of all weight values should be 1) |
|
||||
| resolution_weight | 0.5 | Resolution weight value (the sum of all weight values should be 1) |
|
||||
| recent_days | 30 | Retrieve interfaces updated within a recent time range (in days), reducing appropriately can avoid matching issues |
|
||||
| ipv_type | "ipv4" | The type of interface in the generated result, optional values: "ipv4", "ipv6", "all" |
|
||||
| domain_blacklist | ["epg.pw"] | Interface domain blacklist, used to filter out interfaces with low-quality, ad-inclusive domains |
|
||||
| url_keywords_blacklist | [] | Interface keyword blacklist, used to filter out interfaces containing specific characters |
|
||||
| Configuration Item | Default Value | Description |
|
||||
| ---------------------- | --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| source_file | "demo.txt" | Template file name |
|
||||
| final_file | "result.txt" | Generated file name |
|
||||
| favorite_list | ["CCTV1","CCTV13"] | List of favorite channel names (used only to distinguish from regular channels, custom page retrieval quantity) |
|
||||
| favorite_page_num | 5 | Page retrieval quantity for favorite channels |
|
||||
| default_page_num | 3 | Page retrieval quantity for regular channels |
|
||||
| urls_limit | 10 | Number of interfaces per channel |
|
||||
| response_time_weight | 0.5 | Response time weight value (the sum of all weight values should be 1) |
|
||||
| resolution_weight | 0.5 | Resolution weight value (the sum of all weight values should be 1) |
|
||||
| recent_days | 30 | Retrieve interfaces updated within a recent time range (in days), reducing appropriately can avoid matching issues |
|
||||
| ipv_type | "ipv4" | The type of interface in the generated result, optional values: "ipv4", "ipv6", "all" |
|
||||
| domain_blacklist | ["epg.pw"] | Interface domain blacklist, used to filter out interfaces with low-quality, ad-inclusive domains |
|
||||
| url_keywords_blacklist | [] | Interface keyword blacklist, used to filter out interfaces containing specific characters |
|
||||
| extend_base_urls | ["https://m3u.ibert.me/txt/fmml_dv6.txt",<br>"https://m3u.ibert.me/txt/o_cn.txt",<br>"https://m3u.ibert.me/txt/j_iptv.txt"] | The source of interface acquisition, currently only compatible with specific content formats and fuzzy matching of some channel names |
|
||||
|
||||
## Quick Start
|
||||
|
||||
|
30
README.md
30
README.md
@ -16,23 +16,25 @@
|
||||
- 保证更新时效性,配置获取最近时间范围内更新的接口
|
||||
- 可过滤 ipv4、ipv6 接口
|
||||
- 黑名单功能:接口域名与关键字
|
||||
- 自定义接口获取源
|
||||
|
||||
## 配置
|
||||
|
||||
| 配置项 | 默认值 | 描述 |
|
||||
| ---------------------- | ------------------ | ------------------------------------------------------------------ |
|
||||
| source_file | "demo.txt" | 模板文件名称 |
|
||||
| final_file | "result.txt" | 生成文件名称 |
|
||||
| favorite_list | ["CCTV1","CCTV13"] | 关注频道名称列表(仅用于与常规频道区分,自定义获取分页数量) |
|
||||
| favorite_page_num | 5 | 关注频道获取分页数量 |
|
||||
| default_page_num | 3 | 常规频道获取分页数量 |
|
||||
| urls_limit | 10 | 单个频道接口数量 |
|
||||
| response_time_weight | 0.5 | 响应时间权重值(所有权重值总和应为 1) |
|
||||
| resolution_weight | 0.5 | 分辨率权重值 (所有权重值总和应为 1) |
|
||||
| recent_days | 30 | 获取最近时间范围内更新的接口(单位天),适当减小可避免出现匹配问题 |
|
||||
| ipv_type | "ipv4" | 生成结果中接口的类型,可选值:"ipv4"、"ipv6"、"all" |
|
||||
| domain_blacklist | ["epg.pw"] | 接口域名黑名单,用于过滤低质量含广告类域名的接口 |
|
||||
| url_keywords_blacklist | [] | 接口关键字黑名单,用于过滤含特定字符的接口 |
|
||||
| 配置项 | 默认值 | 描述 |
|
||||
| ---------------------- | --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------ |
|
||||
| source_file | "demo.txt" | 模板文件名称 |
|
||||
| final_file | "result.txt" | 生成文件名称 |
|
||||
| favorite_list | ["CCTV1","CCTV13"] | 关注频道名称列表(仅用于与常规频道区分,自定义获取分页数量) |
|
||||
| favorite_page_num | 5 | 关注频道获取分页数量 |
|
||||
| default_page_num | 3 | 常规频道获取分页数量 |
|
||||
| urls_limit | 10 | 单个频道接口数量 |
|
||||
| response_time_weight | 0.5 | 响应时间权重值(所有权重值总和应为 1) |
|
||||
| resolution_weight | 0.5 | 分辨率权重值 (所有权重值总和应为 1) |
|
||||
| recent_days | 30 | 获取最近时间范围内更新的接口(单位天),适当减小可避免出现匹配问题 |
|
||||
| ipv_type | "ipv4" | 生成结果中接口的类型,可选值:"ipv4"、"ipv6"、"all" |
|
||||
| domain_blacklist | ["epg.pw"] | 接口域名黑名单,用于过滤低质量含广告类域名的接口 |
|
||||
| url_keywords_blacklist | [] | 接口关键字黑名单,用于过滤含特定字符的接口 |
|
||||
| extend_base_urls | ["https://m3u.ibert.me/txt/fmml_dv6.txt",<br>"https://m3u.ibert.me/txt/o_cn.txt",<br>"https://m3u.ibert.me/txt/j_iptv.txt"] | 接口获取源,目前仅兼容特定内容格式与部分频道名称的模糊匹配 |
|
||||
|
||||
## 快速上手
|
||||
|
||||
|
@ -24,3 +24,8 @@ recent_days = 30
|
||||
ipv_type = "ipv4"
|
||||
domain_blacklist = ["epg.pw"]
|
||||
url_keywords_blacklist = []
|
||||
extend_base_urls = [
|
||||
"https://m3u.ibert.me/txt/fmml_dv6.txt",
|
||||
"https://m3u.ibert.me/txt/o_cn.txt",
|
||||
"https://m3u.ibert.me/txt/j_iptv.txt",
|
||||
]
|
||||
|
@ -57,20 +57,21 @@ Similar to editing the template, modify the running configuration
|
||||
|
||||
Adjust the configuration as needed. Below is the default configuration explanation:
|
||||
|
||||
| Configuration Item | Default Value | Description |
|
||||
| ---------------------- | ------------------ | ------------------------------------------------------------------------------------------------------------------ |
|
||||
| source_file | "demo.txt" | Template file name |
|
||||
| final_file | "result.txt" | Generated file name |
|
||||
| favorite_list | ["CCTV1","CCTV13"] | List of favorite channel names (used only to distinguish from regular channels, custom page retrieval quantity) |
|
||||
| favorite_page_num | 5 | Page retrieval quantity for favorite channels |
|
||||
| default_page_num | 3 | Page retrieval quantity for regular channels |
|
||||
| urls_limit | 10 | Number of interfaces per channel |
|
||||
| response_time_weight | 0.5 | Response time weight value (the sum of all weight values should be 1) |
|
||||
| resolution_weight | 0.5 | Resolution weight value (the sum of all weight values should be 1) |
|
||||
| recent_days | 30 | Retrieve interfaces updated within a recent time range (in days), reducing appropriately can avoid matching issues |
|
||||
| ipv_type | "ipv4" | The type of interface in the generated result, optional values: "ipv4", "ipv6", "all" |
|
||||
| domain_blacklist | ["epg.pw"] | Interface domain blacklist, used to filter out interfaces with low-quality, ad-inclusive domains |
|
||||
| url_keywords_blacklist | [] | Interface keyword blacklist, used to filter out interfaces containing specific characters |
|
||||
| Configuration Item | Default Value | Description |
|
||||
| ---------------------- | --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| source_file | "demo.txt" | Template file name |
|
||||
| final_file | "result.txt" | Generated file name |
|
||||
| favorite_list | ["CCTV1","CCTV13"] | List of favorite channel names (used only to distinguish from regular channels, custom page retrieval quantity) |
|
||||
| favorite_page_num | 5 | Page retrieval quantity for favorite channels |
|
||||
| default_page_num | 3 | Page retrieval quantity for regular channels |
|
||||
| urls_limit | 10 | Number of interfaces per channel |
|
||||
| response_time_weight | 0.5 | Response time weight value (the sum of all weight values should be 1) |
|
||||
| resolution_weight | 0.5 | Resolution weight value (the sum of all weight values should be 1) |
|
||||
| recent_days | 30 | Retrieve interfaces updated within a recent time range (in days), reducing appropriately can avoid matching issues |
|
||||
| ipv_type | "ipv4" | The type of interface in the generated result, optional values: "ipv4", "ipv6", "all" |
|
||||
| domain_blacklist | ["epg.pw"] | Interface domain blacklist, used to filter out interfaces with low-quality, ad-inclusive domains |
|
||||
| url_keywords_blacklist | [] | Interface keyword blacklist, used to filter out interfaces containing specific characters |
|
||||
| extend_base_urls | ["https://m3u.ibert.me/txt/fmml_dv6.txt",<br>"https://m3u.ibert.me/txt/o_cn.txt",<br>"https://m3u.ibert.me/txt/j_iptv.txt"] | The source of interface acquisition, currently only compatible with specific content formats and fuzzy matching of some channel names |
|
||||
|
||||
## Step 4: Run Updates Locally (Recommended, Stable, Supports a large number of channel updates)
|
||||
|
||||
|
@ -57,7 +57,7 @@
|
||||
|
||||
按照您的需要适当调整配置,以下是默认配置说明
|
||||
| 配置项 | 默认值 | 描述 |
|
||||
| -------------------- | ------------------ | ------------------------------------------------------------------ |
|
||||
| ---------------------- | --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------ |
|
||||
| source_file | "demo.txt" | 模板文件名称 |
|
||||
| final_file | "result.txt" | 生成文件名称 |
|
||||
| favorite_list | ["CCTV1","CCTV13"] | 关注频道名称列表(仅用于与常规频道区分,自定义获取分页数量) |
|
||||
@ -70,6 +70,7 @@
|
||||
| ipv_type | "ipv4" | 生成结果中接口的类型,可选值:"ipv4"、"ipv6"、"all" |
|
||||
| domain_blacklist | ["epg.pw"] | 接口域名黑名单,用于过滤低质量含广告类域名的接口 |
|
||||
| url_keywords_blacklist | [] | 接口关键字黑名单,用于过滤含特定字符的接口 |
|
||||
| extend_base_urls | ["https://m3u.ibert.me/txt/fmml_dv6.txt",<br>"https://m3u.ibert.me/txt/o_cn.txt",<br>"https://m3u.ibert.me/txt/j_iptv.txt"] | 接口获取源,目前仅兼容特定内容格式与部分频道名称的模糊匹配 |
|
||||
|
||||
## 步骤四:本地运行更新(推荐,稳定,支持大量频道更新)
|
||||
|
||||
|
68
main.py
68
main.py
@ -14,13 +14,12 @@ from utils import (
|
||||
updateChannelUrlsTxt,
|
||||
updateFile,
|
||||
getUrlInfo,
|
||||
compareSpeedAndResolution,
|
||||
sortUrlsBySpeedAndResolution,
|
||||
getTotalUrls,
|
||||
checkUrlIPVType,
|
||||
checkByDomainBlacklist,
|
||||
checkByURLKeywordsBlacklist,
|
||||
filterUrlsByPatterns,
|
||||
useAccessibleUrl,
|
||||
getChannelsByExtendBaseUrls,
|
||||
checkUrlByPatterns,
|
||||
)
|
||||
import logging
|
||||
from logging.handlers import RotatingFileHandler
|
||||
@ -61,9 +60,14 @@ class UpdateSource:
|
||||
self.driver = self.setup_driver()
|
||||
|
||||
async def visitPage(self, channelItems):
|
||||
total_channels = sum(len(channelObj) for _, channelObj in channelItems.items())
|
||||
channelNames = [
|
||||
name for _, channelObj in channelItems.items() for name in channelObj.keys()
|
||||
]
|
||||
extendResults = await getChannelsByExtendBaseUrls(channelNames)
|
||||
total_channels = len(channelNames)
|
||||
pbar = tqdm(total=total_channels)
|
||||
pageUrl = await useAccessibleUrl()
|
||||
wait = WebDriverWait(self.driver, 10)
|
||||
for cate, channelObj in channelItems.items():
|
||||
channelUrls = {}
|
||||
channelObjKeys = channelObj.keys()
|
||||
@ -71,26 +75,41 @@ class UpdateSource:
|
||||
pbar.set_description(
|
||||
f"Processing {name}, {total_channels - pbar.n} channels remaining"
|
||||
)
|
||||
self.driver.get(pageUrl)
|
||||
search_box = self.driver.find_element(By.XPATH, '//input[@type="text"]')
|
||||
search_box.clear()
|
||||
search_box.send_keys(name)
|
||||
submit_button = self.driver.find_element(
|
||||
By.XPATH, '//input[@type="submit"]'
|
||||
)
|
||||
submit_button.click()
|
||||
isFavorite = name in config.favorite_list
|
||||
pageNum = (
|
||||
config.favorite_page_num if isFavorite else config.default_page_num
|
||||
)
|
||||
infoList = []
|
||||
for url, date, resolution in extendResults.get(name, []):
|
||||
if url and checkUrlByPatterns(url):
|
||||
infoList.append((url, None, resolution))
|
||||
if pageUrl:
|
||||
self.driver.get(pageUrl)
|
||||
search_box = wait.until(
|
||||
EC.presence_of_element_located(
|
||||
(By.XPATH, '//input[@type="text"]')
|
||||
)
|
||||
)
|
||||
search_box.clear()
|
||||
search_box.send_keys(name)
|
||||
submit_button = wait.until(
|
||||
EC.element_to_be_clickable(
|
||||
(By.XPATH, '//input[@type="submit"]')
|
||||
)
|
||||
)
|
||||
submit_button.click()
|
||||
isFavorite = name in config.favorite_list
|
||||
pageNum = (
|
||||
config.favorite_page_num
|
||||
if isFavorite
|
||||
else config.default_page_num
|
||||
)
|
||||
for page in range(1, pageNum + 1):
|
||||
try:
|
||||
if page > 1:
|
||||
page_link = self.driver.find_element(
|
||||
By.XPATH,
|
||||
f'//a[contains(@href, "={page}") and contains(@href, "{name}")]',
|
||||
page_link = wait.until(
|
||||
EC.element_to_be_clickable(
|
||||
(
|
||||
By.XPATH,
|
||||
f'//a[contains(@href, "={page}") and contains(@href, "{name}")]',
|
||||
)
|
||||
)
|
||||
)
|
||||
page_link.click()
|
||||
soup = BeautifulSoup(self.driver.page_source, "html.parser")
|
||||
@ -100,12 +119,7 @@ class UpdateSource:
|
||||
for result in results:
|
||||
try:
|
||||
url, date, resolution = getUrlInfo(result)
|
||||
if (
|
||||
url
|
||||
and checkUrlIPVType(url)
|
||||
and checkByDomainBlacklist(url)
|
||||
and checkByURLKeywordsBlacklist(url)
|
||||
):
|
||||
if url and checkUrlByPatterns(url):
|
||||
infoList.append((url, date, resolution))
|
||||
except Exception as e:
|
||||
print(f"Error on result {result}: {e}")
|
||||
@ -118,7 +132,7 @@ class UpdateSource:
|
||||
if not github_actions or (
|
||||
pbar.n <= 200 and github_actions == "true"
|
||||
):
|
||||
sorted_data = await compareSpeedAndResolution(infoList)
|
||||
sorted_data = await sortUrlsBySpeedAndResolution(infoList)
|
||||
if sorted_data:
|
||||
channelUrls[name] = getTotalUrls(sorted_data)
|
||||
for (url, date, resolution), response_time in sorted_data:
|
||||
|
79
utils.py
79
utils.py
@ -11,6 +11,8 @@ import os
|
||||
import urllib.parse
|
||||
import ipaddress
|
||||
from urllib.parse import urlparse
|
||||
import requests
|
||||
import re
|
||||
|
||||
|
||||
def getChannelItems():
|
||||
@ -41,10 +43,14 @@ def getChannelItems():
|
||||
else:
|
||||
# This is a url, add it to the list of urls for the current channel.
|
||||
match = re.search(pattern, line)
|
||||
if match:
|
||||
if match is not None:
|
||||
if match.group(1) not in channels[current_category]:
|
||||
channels[current_category][match.group(1)] = [match.group(2)]
|
||||
else:
|
||||
elif (
|
||||
match.group(2)
|
||||
and match.group(2)
|
||||
not in channels[current_category][match.group(1)]
|
||||
):
|
||||
channels[current_category][match.group(1)].append(
|
||||
match.group(2)
|
||||
)
|
||||
@ -53,6 +59,60 @@ def getChannelItems():
|
||||
f.close()
|
||||
|
||||
|
||||
async def getChannelsByExtendBaseUrls(channel_names):
|
||||
"""
|
||||
Get the channels by extending the base urls
|
||||
"""
|
||||
channels = {}
|
||||
pattern = r"^(.*?),(?!#genre#)(.*?)$"
|
||||
sub_pattern = r"_\((.*?)\)|_\[(.*?)\]|频道"
|
||||
for base_url in config.extend_base_urls:
|
||||
try:
|
||||
print(f"Processing extend base url: {base_url}")
|
||||
try:
|
||||
response = requests.get(base_url, timeout=30)
|
||||
except requests.exceptions.Timeout:
|
||||
print(f"Timeout on {base_url}")
|
||||
continue
|
||||
content = response.text
|
||||
if content:
|
||||
lines = content.split("\n")
|
||||
link_dict = {}
|
||||
for line in lines:
|
||||
if re.match(pattern, line) is not None:
|
||||
key = re.match(pattern, line).group(1)
|
||||
resolution_match = re.search(r"_(\((.*?)\))", key)
|
||||
resolution = (
|
||||
resolution_match.group(2)
|
||||
if resolution_match is not None
|
||||
else None
|
||||
)
|
||||
key = re.sub(sub_pattern, "", key).lower()
|
||||
url = re.match(pattern, line).group(2)
|
||||
value = (url, None, resolution)
|
||||
if key in link_dict:
|
||||
link_dict[key].append(value)
|
||||
else:
|
||||
link_dict[key] = [value]
|
||||
found_channels = []
|
||||
for channel_name in channel_names:
|
||||
sub_channel_name = re.sub(sub_pattern, "", channel_name).lower()
|
||||
values = link_dict.get(sub_channel_name)
|
||||
if values:
|
||||
if channel_name in channels:
|
||||
channels[channel_name] += values
|
||||
else:
|
||||
channels[channel_name] = values
|
||||
found_channels.append(channel_name)
|
||||
if found_channels:
|
||||
print(f"{base_url} found channels: {','.join(found_channels)}")
|
||||
except Exception as e:
|
||||
print(f"Error on {base_url}: {e}")
|
||||
continue
|
||||
print("Finished processing extend base urls")
|
||||
return channels
|
||||
|
||||
|
||||
def updateChannelUrlsTxt(cate, channelUrls):
|
||||
"""
|
||||
Update the category and channel urls to the final file
|
||||
@ -89,7 +149,7 @@ def getUrlInfo(result):
|
||||
r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+",
|
||||
channel_text,
|
||||
)
|
||||
if url_match:
|
||||
if url_match is not None:
|
||||
url = url_match.group()
|
||||
info_text = result_div[-1].get_text(strip=True)
|
||||
if info_text:
|
||||
@ -122,7 +182,7 @@ async def getSpeed(url, urlTimeout=5):
|
||||
return float("inf")
|
||||
|
||||
|
||||
async def compareSpeedAndResolution(infoList):
|
||||
async def sortUrlsBySpeedAndResolution(infoList):
|
||||
"""
|
||||
Sort by speed and resolution
|
||||
"""
|
||||
@ -249,6 +309,17 @@ def checkByURLKeywordsBlacklist(url):
|
||||
return not any(keyword in url for keyword in url_keywords_blacklist)
|
||||
|
||||
|
||||
def checkUrlByPatterns(url):
|
||||
"""
|
||||
Check the url by patterns
|
||||
"""
|
||||
return (
|
||||
checkUrlIPVType(url)
|
||||
and checkByDomainBlacklist(url)
|
||||
and checkByURLKeywordsBlacklist(url)
|
||||
)
|
||||
|
||||
|
||||
def filterUrlsByPatterns(urls):
|
||||
"""
|
||||
Filter urls by patterns
|
||||
|
@ -1,3 +1,3 @@
|
||||
{
|
||||
"version": "1.0.9"
|
||||
"version": "1.1.0"
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user