From fc908012087883a890b47b6edbd3b448f93c103e Mon Sep 17 00:00:00 2001
From: lw0704 <lw1560912@163.com>
Date: Wed, 21 Feb 2024 07:56:38 +0000
Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?=
 =?UTF-8?q?=20'lib'?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 lib/getsearchtxt.py | 118 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 118 insertions(+)
 create mode 100644 lib/getsearchtxt.py

diff --git a/lib/getsearchtxt.py b/lib/getsearchtxt.py
new file mode 100644
index 0000000..444f02e
--- /dev/null
+++ b/lib/getsearchtxt.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+
+import sys
+import re
+import json
+import requests
+import time
+import traceback
+import gzip
+
+p=re.compile(r'.*/s/(.*)')
+skipp = re.compile(r'.*(cover|screen|频道).*',re.IGNORECASE)
+reqcount=1
+sharedict=set()
+
+def getlist(w,shareid, fileid,morepage):
+    global p
+    global skipp
+    global reqcount
+    global sharedict
+
+    reqcount += 1
+    if reqcount % 5 == 0:
+        print(f"reqcount:{reqcount} shareid:{shareid} fileid:{fileid}",file=sys.stderr)
+        #time.sleep(1)
+    url = f'http://192.168.101.188:9978/proxy?do=pikpak&type=list&share_id={shareid}&file_id={fileid}&pass_code=&morepage={morepage}'
+    print(f"url: {url}",file=sys.stderr)
+    resp = requests.get(url)
+    content = resp.content.decode('utf-8')
+    lines = content.split("\n")
+    if "folder" not in content and len(lines)<=4:
+        return
+    isfirst=True
+    for line in lines:
+        if isfirst:
+            isfirst=False
+            print(f"first line:{line}",file=sys.stderr)
+        if skipp.match(line):
+            continue
+        linearr = line.split('\t')
+        if len(linearr)>2:
+            m = p.match(linearr[0])
+            if m:
+                arr = m.group(1).split("/")
+            else:
+                arr = linearr[0].split("/")
+            shareid=arr[0]
+            fileid=arr[1] if len(arr)>1 else ""
+            if shareid+"/"+fileid in sharedict:
+                print(f"skip shareid{shareid} fileid:{fileid}", file=sys.stderr)
+                continue
+            w.write(line+"\n")
+            w.flush()
+            if linearr[2] == "folder":
+                getlist(w,shareid,fileid,False)
+
+    if len(lines)>0:
+        getlist(w,shareid,fileid,True)
+
+def main():
+    try:
+        f = gzip.open(sys.argv[1]+".raw.gz",mode="rt",encoding="utf-8")
+        if f is not None:
+            print(f"found gz raw file:{sys.argv[1]}.raw.gz, extract it",file=sys.stderr)
+            with(open(sys.argv[1]+".raw","w",encoding="utf-8")) as w:
+                while(True):
+                    lines = f.readlines()
+                    if len(lines)<=0:
+                        break
+                    for line in lines:
+                        line=line.strip()
+                        w.write(line+"\n")
+            f.seek(0)
+    except:
+        traceback.print_exc()
+        try:
+            f = open(sys.argv[1]+".raw","r",encoding="utf-8")
+        except:
+            f = None
+    if f is not None:
+        print("found old raw file")
+        while True:
+            lines = f.readlines()
+            if len(lines)<=0:
+                break
+            for line in lines:
+                linearr = line.split("\t")
+                m = p.match(linearr[0])
+                if m:
+                    arr = m.group(1).split("/")
+                else:
+                    arr = linearr[0].split("/")
+                if len(arr)>1:
+                    shareid = arr[0]
+                    fileid = arr[1]
+                    sharedict.add(shareid+"/"+fileid)
+        f.close()
+        print(f"old raw file record:{len(sharedict)}")
+    else:
+        print("no old raw file")
+    with(open(sys.argv[1]+".raw","a+",encoding="utf-8")) as w:
+        with(open(sys.argv[1],"r",encoding="utf-8")) as f:
+            j = json.load(f)
+            for c in j:
+                shareid=c.get("type_id")
+                fileid=""
+                m = p.match(shareid)
+                if m:
+                    arr = m.group(1).split("/")
+                else:
+                    arr = shareid.split("/")
+                shareid=arr[0]
+                fileid=arr[1] if len(arr)>1 else ""
+                if shareid+"/"+fileid in sharedict:
+                    continue
+                getlist(w,shareid,fileid,False)
+
+main()