From fc908012087883a890b47b6edbd3b448f93c103e Mon Sep 17 00:00:00 2001 From: lw0704 <lw1560912@163.com> Date: Wed, 21 Feb 2024 07:56:38 +0000 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20'lib'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/getsearchtxt.py | 118 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 lib/getsearchtxt.py diff --git a/lib/getsearchtxt.py b/lib/getsearchtxt.py new file mode 100644 index 0000000..444f02e --- /dev/null +++ b/lib/getsearchtxt.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 + +import sys +import re +import json +import requests +import time +import traceback +import gzip + +p=re.compile(r'.*/s/(.*)') +skipp = re.compile(r'.*(cover|screen|频道).*',re.IGNORECASE) +reqcount=1 +sharedict=set() + +def getlist(w,shareid, fileid,morepage): + global p + global skipp + global reqcount + global sharedict + + reqcount += 1 + if reqcount % 5 == 0: + print(f"reqcount:{reqcount} shareid:{shareid} fileid:{fileid}",file=sys.stderr) + #time.sleep(1) + url = f'http://192.168.101.188:9978/proxy?do=pikpak&type=list&share_id={shareid}&file_id={fileid}&pass_code=&morepage={morepage}' + print(f"url: {url}",file=sys.stderr) + resp = requests.get(url) + content = resp.content.decode('utf-8') + lines = content.split("\n") + if "folder" not in content and len(lines)<=4: + return + isfirst=True + for line in lines: + if isfirst: + isfirst=False + print(f"first line:{line}",file=sys.stderr) + if skipp.match(line): + continue + linearr = line.split('\t') + if len(linearr)>2: + m = p.match(linearr[0]) + if m: + arr = m.group(1).split("/") + else: + arr = linearr[0].split("/") + shareid=arr[0] + fileid=arr[1] if len(arr)>1 else "" + if shareid+"/"+fileid in sharedict: + print(f"skip shareid{shareid} fileid:{fileid}", file=sys.stderr) + continue + w.write(line+"\n") + w.flush() + if linearr[2] == "folder": + getlist(w,shareid,fileid,False) + + if len(lines)>0: + getlist(w,shareid,fileid,True) + +def main(): + try: + f = gzip.open(sys.argv[1]+".raw.gz",mode="rt",encoding="utf-8") + if f is not None: + print(f"found gz raw file:{sys.argv[1]}.raw.gz, extract it",file=sys.stderr) + with(open(sys.argv[1]+".raw","w",encoding="utf-8")) as w: + while(True): + lines = f.readlines() + if len(lines)<=0: + break + for line in lines: + line=line.strip() + w.write(line+"\n") + f.seek(0) + except: + traceback.print_exc() + try: + f = open(sys.argv[1]+".raw","r",encoding="utf-8") + except: + f = None + if f is not None: + print("found old raw file") + while True: + lines = f.readlines() + if len(lines)<=0: + break + for line in lines: + linearr = line.split("\t") + m = p.match(linearr[0]) + if m: + arr = m.group(1).split("/") + else: + arr = linearr[0].split("/") + if len(arr)>1: + shareid = arr[0] + fileid = arr[1] + sharedict.add(shareid+"/"+fileid) + f.close() + print(f"old raw file record:{len(sharedict)}") + else: + print("no old raw file") + with(open(sys.argv[1]+".raw","a+",encoding="utf-8")) as w: + with(open(sys.argv[1],"r",encoding="utf-8")) as f: + j = json.load(f) + for c in j: + shareid=c.get("type_id") + fileid="" + m = p.match(shareid) + if m: + arr = m.group(1).split("/") + else: + arr = shareid.split("/") + shareid=arr[0] + fileid=arr[1] if len(arr)>1 else "" + if shareid+"/"+fileid in sharedict: + continue + getlist(w,shareid,fileid,False) + +main()