上传文件至 'lib'

2024-02-21 07:56:38 +00:00 · 2024-02-21 07:56:38 +00:00 · fc90801208
commit fc90801208
parent 21e1ab14b4
1 changed files with 118 additions and 0 deletions
--- a/lib/getsearchtxt.py
+++ b/lib/getsearchtxt.py
@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+
+import sys
+import re
+import json
+import requests
+import time
+import traceback
+import gzip
+
+p=re.compile(r'.*/s/(.*)')
+skipp = re.compile(r'.*(cover|screen|频道).*',re.IGNORECASE)
+reqcount=1
+sharedict=set()
+
+def getlist(w,shareid, fileid,morepage):
+    global p
+    global skipp
+    global reqcount
+    global sharedict
+
+    reqcount += 1
+    if reqcount % 5 == 0:
+        print(f"reqcount:{reqcount} shareid:{shareid} fileid:{fileid}",file=sys.stderr)
+        #time.sleep(1)
+    url = f'http://192.168.101.188:9978/proxy?do=pikpak&type=list&share_id={shareid}&file_id={fileid}&pass_code=&morepage={morepage}'
+    print(f"url: {url}",file=sys.stderr)
+    resp = requests.get(url)
+    content = resp.content.decode('utf-8')
+    lines = content.split("\n")
+    if "folder" not in content and len(lines)<=4:
+        return
+    isfirst=True
+    for line in lines:
+        if isfirst:
+            isfirst=False
+            print(f"first line:{line}",file=sys.stderr)
+        if skipp.match(line):
+            continue
+        linearr = line.split('\t')
+        if len(linearr)>2:
+            m = p.match(linearr[0])
+            if m:
+                arr = m.group(1).split("/")
+            else:
+                arr = linearr[0].split("/")
+            shareid=arr[0]
+            fileid=arr[1] if len(arr)>1 else ""
+            if shareid+"/"+fileid in sharedict:
+                print(f"skip shareid{shareid} fileid:{fileid}", file=sys.stderr)
+                continue
+            w.write(line+"\n")
+            w.flush()
+            if linearr[2] == "folder":
+                getlist(w,shareid,fileid,False)
+
+    if len(lines)>0:
+        getlist(w,shareid,fileid,True)
+
+def main():
+    try:
+        f = gzip.open(sys.argv[1]+".raw.gz",mode="rt",encoding="utf-8")
+        if f is not None:
+            print(f"found gz raw file:{sys.argv[1]}.raw.gz, extract it",file=sys.stderr)
+            with(open(sys.argv[1]+".raw","w",encoding="utf-8")) as w:
+                while(True):
+                    lines = f.readlines()
+                    if len(lines)<=0:
+                        break
+                    for line in lines:
+                        line=line.strip()
+                        w.write(line+"\n")
+            f.seek(0)
+    except:
+        traceback.print_exc()
+        try:
+            f = open(sys.argv[1]+".raw","r",encoding="utf-8")
+        except:
+            f = None
+    if f is not None:
+        print("found old raw file")
+        while True:
+            lines = f.readlines()
+            if len(lines)<=0:
+                break
+            for line in lines:
+                linearr = line.split("\t")
+                m = p.match(linearr[0])
+                if m:
+                    arr = m.group(1).split("/")
+                else:
+                    arr = linearr[0].split("/")
+                if len(arr)>1:
+                    shareid = arr[0]
+                    fileid = arr[1]
+                    sharedict.add(shareid+"/"+fileid)
+        f.close()
+        print(f"old raw file record:{len(sharedict)}")
+    else:
+        print("no old raw file")
+    with(open(sys.argv[1]+".raw","a+",encoding="utf-8")) as w:
+        with(open(sys.argv[1],"r",encoding="utf-8")) as f:
+            j = json.load(f)
+            for c in j:
+                shareid=c.get("type_id")
+                fileid=""
+                m = p.match(shareid)
+                if m:
+                    arr = m.group(1).split("/")
+                else:
+                    arr = shareid.split("/")
+                shareid=arr[0]
+                fileid=arr[1] if len(arr)>1 else ""
+                if shareid+"/"+fileid in sharedict:
+                    continue
+                getlist(w,shareid,fileid,False)
+
+main()