feat:random user-agent

This commit is contained in:
guorong.zheng 2024-07-15 17:28:52 +08:00
parent 81fee2679d
commit 33dc21da67
3 changed files with 28 additions and 39 deletions

@ -20,8 +20,8 @@ async-timeout = "*"
pyinstaller = "*"
aiohttp = "*"
flask = "*"
gunicorn = "*"
opencc-python-reimplemented = "*"
fake-useragent = "*"
[requires]
python_version = "3.8"

39
Pipfile.lock generated

@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "24e1f3abe777aadad329cd8764b401113d292117c39ce7fd275b5f4248910c5f"
"sha256": "566c817f00efca69cb47208751462f6c8150f4333f85eec72110038d24dabfce"
},
"pipfile-spec": 6,
"requires": {
@ -441,11 +441,19 @@
},
"exceptiongroup": {
"hashes": [
"sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad",
"sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"
"sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b",
"sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"
],
"markers": "python_version < '3.11'",
"version": "==1.2.1"
"version": "==1.2.2"
},
"fake-useragent": {
"hashes": [
"sha256:57415096557c8a4e23b62a375c21c55af5fd4ba30549227f562d2c4f5b60e3b3",
"sha256:6387269f5a2196b5ba7ed8935852f75486845a1c95c50e72460e6a8e762f5c49"
],
"index": "aliyun",
"version": "==1.5.1"
},
"feedparser": {
"hashes": [
@ -548,15 +556,6 @@
"markers": "python_version >= '3.8'",
"version": "==1.4.1"
},
"gunicorn": {
"hashes": [
"sha256:350679f91b24062c86e386e198a15438d53a7a8207235a78ba1b53df4c4378d9",
"sha256:4a0b436239ff76fb33f11c07a16482c521a7e09c1ce3cc293c2330afe01bec63"
],
"index": "aliyun",
"markers": "python_version >= '3.7'",
"version": "==22.0.0"
},
"h11": {
"hashes": [
"sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d",
@ -581,6 +580,14 @@
"markers": "python_version < '3.10'",
"version": "==8.0.0"
},
"importlib-resources": {
"hashes": [
"sha256:50d10f043df931902d4194ea07ec57960f66a80449ff867bfe782b4c486ba78c",
"sha256:cdb2b453b8046ca4e3798eb1d84f3cce1446a0e8e7b5ef4efb600f19fc398145"
],
"markers": "python_version < '3.10'",
"version": "==6.4.0"
},
"itsdangerous": {
"hashes": [
"sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef",
@ -870,11 +877,11 @@
},
"setuptools": {
"hashes": [
"sha256:b8b8060bb426838fbe942479c90296ce976249451118ef566a5a0b7d8b78fb05",
"sha256:bd63e505105011b25c3c11f753f7e3b8465ea739efddaccef8f0efac2137bac1"
"sha256:f171bab1dfbc86b132997f26a119f6056a57950d058587841a0082e8830f9dc5",
"sha256:fe384da74336c398e0d956d1cae0669bc02eed936cdb1d49b57de1990dc11ffc"
],
"markers": "python_version >= '3.8'",
"version": "==70.2.0"
"version": "==70.3.0"
},
"sgmllib3k": {
"hashes": [

@ -1,25 +1,14 @@
import requests
import re
from bs4 import BeautifulSoup
import random
from fake_useragent import UserAgent
from time import sleep
user_agents = [
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71",
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
"Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
]
headers = {
"Accept": "*/*",
"Connection": "keep-alive",
"Accept-Language": "zh-CN,zh;q=0.8",
"User-Agent": random.choice(user_agents),
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
}
session = requests.Session()
@ -30,7 +19,8 @@ def get_source_requests(url, proxy=None, timeout=30):
Get the source by requests
"""
proxies = {"http": proxy}
headers["User-Agent"] = random.choice(user_agents)
ua = UserAgent()
headers["User-Agent"] = ua.random
response = session.get(url, headers=headers, proxies=proxies, timeout=timeout)
source = re.sub(
r"<!--.*?-->",
@ -55,11 +45,3 @@ def close_session():
Close the requests session
"""
session.close()
def reset_user_agent():
"""
Reset the user agent
"""
global headers
headers["User-Agent"] = random.choice(user_agents)