AlexLive/utils.py

from selenium import webdriver
from aiohttp_retry import RetryClient, ExponentialRetry
import asyncio
from time import time
import re
import datetime
import os
import urllib.parse
import ipaddress
from urllib.parse import urlparse
import requests
import re
from bs4 import BeautifulSoup
from bs4 import NavigableString
import fofa_map
from collections import defaultdict
from tqdm.asyncio import tqdm_asyncio
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium_stealth import stealth
import concurrent.futures
import sys
import importlib.util

timeout = 10
max_retries = 3


def retry_func(func, retries=max_retries + 1, name=""):
    """
    Retry the function
    """
    for i in range(retries):
        try:
            return func()
        except Exception as e:
            count = retries - 1
            if name and i < count:
                print(f"Failed to connect to the {name}. Retrying {i+1}...")
            if i == count:
                break
            else:
                continue


def resource_path(relative_path, persistent=False):
    """
    Get the resource path
    """
    base_path = os.path.abspath(".")
    total_path = os.path.join(base_path, relative_path)
    if persistent or os.path.exists(total_path):
        return total_path
    else:
        try:
            base_path = sys._MEIPASS
            return os.path.join(base_path, relative_path)
        except Exception:
            return total_path


def load_external_config(name):
    """
    Load the external config file
    """
    config = None
    config_path = name
    config_filename = os.path.join(os.path.dirname(sys.executable), config_path)

    if os.path.exists(config_filename):
        spec = importlib.util.spec_from_file_location(name, config_filename)
        config = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(config)
    else:
        import config

    return config


config_path = resource_path("user_config.py")
default_config_path = resource_path("config.py")
config = (
    load_external_config("user_config.py")
    if os.path.exists(config_path)
    else load_external_config("config.py")
)


def setup_driver(proxy=None):
    """
    Setup the driver for selenium
    """
    options = webdriver.ChromeOptions()
    options.add_argument("start-maximized")
    options.add_argument("--headless")
    options.add_experimental_option("excludeSwitches", ["enable-logging"])
    options.add_experimental_option("useAutomationExtension", False)
    options.add_argument("blink-settings=imagesEnabled=false")
    options.add_argument("--log-level=3")
    options.add_argument("--ignore-certificate-errors")
    options.add_argument("--allow-running-insecure-content")
    options.add_argument("blink-settings=imagesEnabled=false")
    if proxy:
        options.add_argument("--proxy-server=%s" % proxy)
    driver = webdriver.Chrome(options=options)
    stealth(
        driver,
        languages=["en-US", "en"],
        vendor="Google Inc.",
        platform="Win32",
        webgl_vendor="Intel Inc.",
        renderer="Intel Iris OpenGL Engine",
        fix_hairline=True,
    )
    return driver


def get_proxy_list(page_count=1):
    """
    Get the proxy list
    """
    url_pattern = [
        "https://www.kuaidaili.com/free/inha/{}/",
        "https://www.kuaidaili.com/free/intr/{}/",
    ]
    proxy_list = []
    driver = setup_driver()
    for page_index in range(1, page_count + 1):
        for pattern in url_pattern:
            url = pattern.format(page_index)
            retry_func(lambda: driver.get(url), name=url)
            source = re.sub(
                r"<!--.*?-->",
                "",
                driver.page_source,
                flags=re.DOTALL,
            )
            soup = BeautifulSoup(source, "html.parser")
            table = soup.find("table")
            trs = table.find_all("tr") if table else []
            for tr in trs[1:]:
                tds = tr.find_all("td")
                ip = tds[0].get_text().strip()
                port = tds[1].get_text().strip()
                proxy = f"{ip}:{port}"
                proxy_list.append(proxy)

    return proxy_list


def format_channel_name(name):
    """
    Format the channel name with sub and replace and lower
    """
    sub_pattern = (
        r"-|_|\((.*?)\)|\[(.*?)\]| |频道|标清|高清|HD|hd|超清|超高|超高清|中央|央视|台"
    )
    name = re.sub(sub_pattern, "", name)
    name = name.replace("plus", "+")
    name = name.replace("PLUS", "+")
    name = name.replace("＋", "+")
    name = name.replace("CCTV1综合", "CCTV1")
    name = name.replace("CCTV2财经", "CCTV2")
    name = name.replace("CCTV3综艺", "CCTV3")
    name = name.replace("CCTV4国际", "CCTV4")
    name = name.replace("CCTV4中文国际", "CCTV4")
    name = name.replace("CCTV4欧洲", "CCTV4")
    name = name.replace("CCTV5体育", "CCTV5")
    name = name.replace("CCTV5+体育赛视", "CCTV5+")
    name = name.replace("CCTV5+体育赛事", "CCTV5+")
    name = name.replace("CCTV5+体育", "CCTV5+")
    name = name.replace("CCTV6电影", "CCTV6")
    name = name.replace("CCTV7军事", "CCTV7")
    name = name.replace("CCTV7军农", "CCTV7")
    name = name.replace("CCTV7农业", "CCTV7")
    name = name.replace("CCTV7国防军事", "CCTV7")
    name = name.replace("CCTV8电视剧", "CCTV8")
    name = name.replace("CCTV9记录", "CCTV9")
    name = name.replace("CCTV9纪录", "CCTV9")
    name = name.replace("CCTV10科教", "CCTV10")
    name = name.replace("CCTV11戏曲", "CCTV11")
    name = name.replace("CCTV12社会与法", "CCTV12")
    name = name.replace("CCTV13新闻", "CCTV13")
    name = name.replace("CCTV新闻", "CCTV13")
    name = name.replace("CCTV14少儿", "CCTV14")
    name = name.replace("CCTV15音乐", "CCTV15")
    name = name.replace("CCTV16奥林匹克", "CCTV16")
    name = name.replace("CCTV17农业农村", "CCTV17")
    name = name.replace("CCTV17农业", "CCTV17")
    return name.lower()


def get_channel_items():
    """
    Get the channel items from the source file
    """
    # Open the source file and read all lines.
    user_source_file = (
        "user_" + config.source_file
        if os.path.exists("user_" + config.source_file)
        else getattr(config, "source_file", "demo.txt")
    )

    # Create a dictionary to store the channels.
    channels = defaultdict(lambda: defaultdict(list))
    current_category = ""
    pattern = r"^(.*?),(?!#genre#)(.*?)$"

    with open(resource_path(user_source_file), "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if "#genre#" in line:
                # This is a new channel, create a new key in the dictionary.
                current_category = line.split(",")[0]
            else:
                # This is a url, add it to the list of urls for the current channel.
                match = re.search(pattern, line)
                if match is not None:
                    name = match.group(1).strip()
                    url = match.group(2).strip()
                    if url and url not in channels[current_category][name]:
                        channels[current_category][name].append(url)

    return channels


def get_pbar_remaining(pbar, start_time):
    """
    Get the remaining time of the progress bar
    """
    try:
        elapsed = time() - start_time
        completed_tasks = pbar.n
        if completed_tasks > 0:
            avg_time_per_task = elapsed / completed_tasks
            remaining_tasks = pbar.total - completed_tasks
            remaining_time = pbar.format_interval(avg_time_per_task * remaining_tasks)
        else:
            remaining_time = "未知"
        return remaining_time
    except Exception as e:
        print(f"Error: {e}")


async def get_channels_by_subscribe_urls(callback):
    """
    Get the channels by subscribe urls
    """
    channels = {}
    pattern = r"^(.*?),(?!#genre#)(.*?)$"
    subscribe_urls_len = len(config.subscribe_urls)
    pbar = tqdm_asyncio(total=subscribe_urls_len)
    start_time = time()

    def process_subscribe_channels(subscribe_url):
        try:
            response = None
            try:
                response = retry_func(
                    lambda: requests.get(subscribe_url, timeout=timeout),
                    name=subscribe_url,
                )
            except requests.exceptions.Timeout:
                print(f"Timeout on {subscribe_url}")
            if response:
                content = response.text
                lines = content.split("\n")
                for line in lines:
                    matcher = re.match(pattern, line)
                    if matcher is not None:
                        key = matcher.group(1)
                        resolution_match = re.search(r"_(\((.*?)\))", key)
                        resolution = (
                            resolution_match.group(2)
                            if resolution_match is not None
                            else None
                        )
                        url = matcher.group(2)
                        value = (url, None, resolution)
                        name = format_channel_name(key)
                        if name in channels:
                            if value not in channels[name]:
                                channels[name].append(value)
                        else:
                            channels[name] = [value]
        except Exception as e:
            print(f"Error on {subscribe_url}: {e}")
        finally:
            pbar.update()
            remain = subscribe_urls_len - pbar.n
            pbar.set_description(f"Processing subscribe, {remain} urls remaining")
            callback(
                f"正在获取订阅源更新, 剩余{remain}个订阅源待获取, 预计剩余时间: {get_pbar_remaining(pbar, start_time)}",
                int((pbar.n / subscribe_urls_len) * 100),
            )
            if config.open_online_search and pbar.n / subscribe_urls_len == 1:
                callback("正在获取在线搜索结果, 请耐心等待", 0)

    pbar.set_description(f"Processing subscribe, {subscribe_urls_len} urls remaining")
    callback(f"正在获取订阅源更新, 共{subscribe_urls_len}个订阅源", 0)
    with concurrent.futures.ThreadPoolExecutor() as pool:
        loop = asyncio.get_running_loop()
        tasks = []
        for subscribe_url in config.subscribe_urls:
            task = loop.run_in_executor(pool, process_subscribe_channels, subscribe_url)
            tasks.append(task)
        await tqdm_asyncio.gather(*tasks, disable=True)
    print("Finished processing subscribe urls")
    pbar.close()
    return channels


async def get_channels_by_online_search(names, callback):
    """
    Get the channels by online search
    """
    channels = {}
    pageUrl = await use_accessible_url(callback)
    if not pageUrl:
        return channels
    github_actions = os.environ.get("GITHUB_ACTIONS")
    if github_actions:
        proxy_list = get_proxy_list()
        response_times = await asyncio.gather(*(get_speed(url) for url in proxy_list))
        proxy_list_with_speed = [
            (proxy, response_time)
            for proxy, response_time in zip(proxy_list, response_times)
            if response_time is not None
        ]
        proxy_list_with_speed.sort(key=lambda x: x[1])
        best_proxy = proxy_list_with_speed[0][0] if proxy_list_with_speed else None
        print(f"Using proxy: {best_proxy}")
    start_time = time()

    def process_channel_by_online_search(name):
        driver = setup_driver(best_proxy if github_actions else None)
        wait = WebDriverWait(driver, timeout)
        info_list = []
        try:
            retry_func(lambda: driver.get(pageUrl), name=f"online search:{name}")
            search_box = retry_func(
                lambda: wait.until(
                    EC.presence_of_element_located((By.XPATH, '//input[@type="text"]'))
                )
            )
            search_box.clear()
            search_box.send_keys(name)
            submit_button = retry_func(
                lambda: wait.until(
                    EC.element_to_be_clickable((By.XPATH, '//input[@type="submit"]'))
                )
            )
            driver.execute_script("arguments[0].click();", submit_button)
            isFavorite = name in config.favorite_list
            pageNum = (
                config.favorite_page_num if isFavorite else config.default_page_num
            )
            for page in range(1, pageNum + 1):
                try:
                    if page > 1:
                        page_link = retry_func(
                            lambda: wait.until(
                                EC.element_to_be_clickable(
                                    (
                                        By.XPATH,
                                        f'//a[contains(@href, "={page}") and contains(@href, "{name}")]',
                                    )
                                )
                            )
                        )
                        driver.execute_script("arguments[0].click();", page_link)
                    source = re.sub(
                        r"<!--.*?-->",
                        "",
                        driver.page_source,
                        flags=re.DOTALL,
                    )
                    soup = BeautifulSoup(source, "html.parser")
                    if soup:
                        results = get_results_from_soup(soup, name)
                        print(name, "page:", page, "results len:", len(results))
                        for result in results:
                            url, date, resolution = result
                            if url and check_url_by_patterns(url):
                                info_list.append((url, date, resolution))
                    else:
                        print(f"No results found for {name}")
                except Exception as e:
                    print(f"Error on page {page}: {e}")
                    continue
        except Exception as e:
            print(f"Error on search: {e}")
            pass
        finally:
            channels[format_channel_name(name)] = info_list
            names_queue.task_done()
            pbar.update()
            pbar.set_description(
                f"Processing online search, {names_len - pbar.n} channels remaining"
            )
            callback(
                f"正在线上查询更新, 剩余{names_len - pbar.n}个频道待查询, 预计剩余时间: {get_pbar_remaining(pbar, start_time)}",
                int((pbar.n / names_len) * 100),
            )
            driver.quit()

    names_queue = asyncio.Queue()
    for name in names:
        await names_queue.put(name)
    names_len = names_queue.qsize()
    pbar = tqdm_asyncio(total=names_len)
    pbar.set_description(f"Processing online search, {names_len} channels remaining")
    callback(f"正在线上查询更新, 共{names_len}个频道", 0)
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as pool:
        while not names_queue.empty():
            loop = asyncio.get_running_loop()
            name = await names_queue.get()
            loop.run_in_executor(pool, process_channel_by_online_search, name)
    print("Finished processing online search")
    pbar.close()
    return channels


def update_channel_urls_txt(cate, name, urls):
    """
    Update the category and channel urls to the final file
    """
    genre_line = cate + ",#genre#\n"
    filename = "result_new.txt"

    if not os.path.exists(filename):
        open(filename, "w").close()

    with open(filename, "r", encoding="utf-8") as f:
        content = f.read()

    with open(filename, "a", encoding="utf-8") as f:
        if genre_line not in content:
            f.write(genre_line)
        for url in urls:
            if url is not None:
                f.write(name + "," + url + "\n")


def update_file(final_file, old_file):
    """
    Update the file
    """
    old_file_path = resource_path(old_file, persistent=True)
    final_file_path = resource_path(final_file, persistent=True)
    if os.path.exists(old_file_path):
        os.replace(old_file_path, final_file_path)


def get_channel_url(element):
    """
    Get the url, date and resolution
    """
    url = None
    urlRegex = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
    url_search = re.search(
        urlRegex,
        element.get_text(strip=True),
    )
    if url_search:
        url = url_search.group()
    return url


def get_channel_info(element):
    """
    Get the channel info
    """
    date, resolution = None, None
    info_text = element.get_text(strip=True)
    if info_text:
        date, resolution = (
            (info_text.partition(" ")[0] if info_text.partition(" ")[0] else None),
            (
                info_text.partition(" ")[2].partition("•")[2]
                if info_text.partition(" ")[2].partition("•")[2]
                else None
            ),
        )
    return date, resolution


def get_results_from_soup(soup, name):
    """
    Get the results from the soup
    """
    results = []
    for element in soup.descendants:
        if isinstance(element, NavigableString):
            url = get_channel_url(element)
            if url and not any(item[0] == url for item in results):
                url_element = soup.find(lambda tag: tag.get_text(strip=True) == url)
                if url_element:
                    name_element = url_element.find_previous_sibling()
                    if name_element:
                        channel_name = name_element.get_text(strip=True)
                        if format_channel_name(name) == format_channel_name(
                            channel_name
                        ):
                            info_element = url_element.find_next_sibling()
                            date, resolution = get_channel_info(info_element)
                            results.append((url, date, resolution))
    return results


async def get_speed(url, urlTimeout=timeout):
    """
    Get the speed of the url
    """
    retry_options = ExponentialRetry(attempts=1, max_timeout=urlTimeout)
    retry_client = RetryClient(raise_for_status=False, retry_options=retry_options)
    start = time()
    total = float("inf")
    try:
        async with retry_client.get(url) as response:
            resStatus = response.status
            end = time()
            if resStatus == 200:
                total = int(round((end - start) * 1000))
            else:
                total = float("inf")
    except:
        total = float("inf")
    await retry_client.close()
    return total


async def sort_urls_by_speed_and_resolution(infoList):
    """
    Sort by speed and resolution
    """
    response_times = await asyncio.gather(*(get_speed(url) for url, _, _ in infoList))
    valid_responses = [
        (info, rt) for info, rt in zip(infoList, response_times) if rt != float("inf")
    ]

    def extract_resolution(resolution_str):
        numbers = re.findall(r"\d+x\d+", resolution_str)
        if numbers:
            width, height = map(int, numbers[0].split("x"))
            return width * height
        else:
            return 0

    default_response_time_weight = 0.5
    default_resolution_weight = 0.5
    response_time_weight = getattr(
        config, "response_time_weight", default_response_time_weight
    )
    resolution_weight = getattr(config, "resolution_weight", default_resolution_weight)
    # Check if weights are valid
    if not (
        0 <= response_time_weight <= 1
        and 0 <= resolution_weight <= 1
        and response_time_weight + resolution_weight == 1
    ):
        response_time_weight = default_response_time_weight
        resolution_weight = default_resolution_weight

    def combined_key(item):
        (_, _, resolution), response_time = item
        resolution_value = extract_resolution(resolution) if resolution else 0
        return (
            -(response_time_weight * response_time)
            + resolution_weight * resolution_value
        )

    sorted_res = sorted(valid_responses, key=combined_key, reverse=True)
    return sorted_res


def filter_by_date(data):
    """
    Filter by date and limit
    """
    default_recent_days = 30
    use_recent_days = getattr(config, "recent_days", 30)
    if not isinstance(use_recent_days, int) or use_recent_days <= 0:
        use_recent_days = default_recent_days
    start_date = datetime.datetime.now() - datetime.timedelta(days=use_recent_days)
    recent_data = []
    unrecent_data = []
    for (url, date, resolution), response_time in data:
        item = ((url, date, resolution), response_time)
        if date:
            date = datetime.datetime.strptime(date, "%m-%d-%Y")
            if date >= start_date:
                recent_data.append(item)
            else:
                unrecent_data.append(item)
        else:
            unrecent_data.append(item)
    recent_data_len = len(recent_data)
    if recent_data_len == 0:
        recent_data = unrecent_data
    elif recent_data_len < config.urls_limit:
        recent_data.extend(unrecent_data[: config.urls_limit - len(recent_data)])
    return recent_data


def get_total_urls_from_info_list(infoList):
    """
    Get the total urls from info list
    """
    total_urls = [url for url, _, _ in infoList]
    return list(dict.fromkeys(total_urls))[: int(config.urls_limit)]


def get_total_urls_from_sorted_data(data):
    """
    Get the total urls with filter by date and depulicate from sorted data
    """
    total_urls = []
    if len(data) > config.urls_limit:
        total_urls = [url for (url, _, _), _ in filter_by_date(data)]
    else:
        total_urls = [url for (url, _, _), _ in data]
    return list(dict.fromkeys(total_urls))[: config.urls_limit]


def is_ipv6(url):
    """
    Check if the url is ipv6
    """
    try:
        host = urllib.parse.urlparse(url).hostname
        ipaddress.IPv6Address(host)
        return True
    except ValueError:
        return False


def check_url_ipv_type(url):
    """
    Check if the url is compatible with the ipv type in the config
    """
    ipv_type = getattr(config, "ipv_type", "ipv4")
    if ipv_type == "ipv4":
        return not is_ipv6(url)
    elif ipv_type == "ipv6":
        return is_ipv6(url)
    else:
        return True


def check_by_domain_blacklist(url):
    """
    Check by domain blacklist
    """
    domain_blacklist = [
        urlparse(domain).netloc if urlparse(domain).scheme else domain
        for domain in getattr(config, "domain_blacklist", [])
    ]
    return urlparse(url).netloc not in domain_blacklist


def check_by_url_keywords_blacklist(url):
    """
    Check by URL blacklist keywords
    """
    url_keywords_blacklist = getattr(config, "url_keywords_blacklist", [])
    return not any(keyword in url for keyword in url_keywords_blacklist)


def check_url_by_patterns(url):
    """
    Check the url by patterns
    """
    return (
        check_url_ipv_type(url)
        and check_by_domain_blacklist(url)
        and check_by_url_keywords_blacklist(url)
    )


def filter_urls_by_patterns(urls):
    """
    Filter urls by patterns
    """
    urls = [url for url in urls if check_url_ipv_type(url)]
    urls = [url for url in urls if check_by_domain_blacklist(url)]
    urls = [url for url in urls if check_by_url_keywords_blacklist(url)]
    return urls


async def use_accessible_url(callback):
    """
    Check if the url is accessible
    """
    callback(f"正在获取最优的在线检索节点", 0)
    baseUrl1 = "https://www.foodieguide.com/iptvsearch/"
    baseUrl2 = "http://tonkiang.us/"
    task1 = asyncio.create_task(get_speed(baseUrl1, 30))
    task2 = asyncio.create_task(get_speed(baseUrl2, 30))
    task_results = await asyncio.gather(task1, task2)
    callback(f"获取在线检索节点完成", 100)
    if task_results[0] == float("inf") and task_results[1] == float("inf"):
        return None
    if task_results[0] < task_results[1]:
        return baseUrl1
    else:
        return baseUrl2


def get_fofa_urls_from_region_list():
    """
    Get the FOFA url from region
    """
    region_list = getattr(config, "region_list", [])
    urls = []
    region_url = getattr(fofa_map, "region_url")
    if "all" in region_list:
        urls = [url for url in region_url.values() if url]
    else:
        for region in region_list:
            if region in region_url:
                urls.append(region_url[region])
    return urls


async def get_channels_by_fofa(callback):
    """
    Get the channel by FOFA
    """
    fofa_urls = get_fofa_urls_from_region_list()
    fofa_urls_len = len(fofa_urls)
    pbar = tqdm_asyncio(total=fofa_urls_len)
    start_time = time()
    fofa_results = {}

    def process_fofa_channels(fofa_url, pbar, fofa_urls_len, callback):
        driver = setup_driver()
        try:
            retry_func(lambda: driver.get(fofa_url), name=fofa_url)
            fofa_source = re.sub(r"<!--.*?-->", "", driver.page_source, flags=re.DOTALL)
            urls = set(re.findall(r"https?://[\w\.-]+:\d+", fofa_source))
            channels = {}
            for url in urls:
                try:
                    final_url = url + "/iptv/live/1000.json?key=txiptv"
                    response = retry_func(
                        lambda: requests.get(final_url, timeout=timeout),
                        name=final_url,
                    )
                    try:
                        json_data = response.json()
                        if json_data["code"] == 0:
                            try:
                                for item in json_data["data"]:
                                    if isinstance(item, dict):
                                        item_name = format_channel_name(
                                            item.get("name")
                                        )
                                        item_url = item.get("url").strip()
                                        if item_name and item_url:
                                            total_url = url + item_url
                                            if item_name not in channels:
                                                channels[item_name] = [total_url]
                                            else:
                                                channels[item_name].append(total_url)
                            except Exception as e:
                                # print(f"Error on fofa: {e}")
                                continue
                    except Exception as e:
                        # print(f"{url}: {e}")
                        continue
                except Exception as e:
                    # print(f"{url}: {e}")
                    continue
            merge_objects(fofa_results, channels)
        except Exception as e:
            # print(e)
            pass
        finally:
            pbar.update()
            remain = fofa_urls_len - pbar.n
            pbar.set_description(f"Processing multicast, {remain} regions remaining")
            callback(
                f"正在获取组播源更新, 剩余{remain}个地区待获取, 预计剩余时间: {get_pbar_remaining(pbar, start_time)}",
                int((pbar.n / fofa_urls_len) * 100),
            )
            if config.open_online_search and pbar.n / fofa_urls_len == 1:
                callback("正在获取在线搜索结果, 请耐心等待", 0)
            driver.quit()

    pbar.set_description(f"Processing multicast, {fofa_urls_len} regions remaining")
    callback(f"正在获取组播源更新, 共{fofa_urls_len}个地区", 0)
    with concurrent.futures.ThreadPoolExecutor() as pool:
        loop = asyncio.get_running_loop()
        tasks = []
        for fofa_url in fofa_urls:
            task = loop.run_in_executor(
                pool, process_fofa_channels, fofa_url, pbar, fofa_urls_len, callback
            )
            tasks.append(task)
        await tqdm_asyncio.gather(*tasks, disable=True)
    pbar.close()
    return fofa_results


def merge_objects(*objects):
    """
    Merge objects
    """
    merged_dict = {}
    for obj in objects:
        if not isinstance(obj, dict):
            raise TypeError("All input objects must be dictionaries")
        for key, value in obj.items():
            if key not in merged_dict:
                merged_dict[key] = set()
            if isinstance(value, set):
                merged_dict[key].update(value)
            elif isinstance(value, list):
                for item in value:
                    merged_dict[key].add(item)
            else:
                merged_dict[key].add(value)
    for key, value in merged_dict.items():
        merged_dict[key] = list(value)
    return merged_dict