Merge branch 'master' of gitea.moe:lamp/pixiv-popular-downloader

eliminate the need to edit hosts file
2021-09-21 13:12:43 -07:00 · 2021-09-21 13:10:56 -07:00
3 changed files with 17 additions and 12 deletions
@@ -2,12 +2,10 @@ This is a python script for downloading original pixiv images from popular searc

 # Instructions

-1. Add `210.140.131.219 www.pixiv.net` to your hosts file to bypass Cloudflare, or the script will be blocked. (`nslookup pixiv.net` in case the address needs to be changed)
+1. Download this repo to your computer of course, and open the terminal in it. Run `pip install -r requirements.txt` if necessary.

-2. Download this repo to your computer of course, and open the terminal in it.
+2. In your browser, on Pixiv logged in to a premium account, in dev tools Application tab, copy the **value** of the `PHPSESSID` cookie, and paste it into a new file named `PHPSESSID.txt` in this folder.

-3. In your browser, on Pixiv logged in to a premium account, in dev tools Application tab, copy the **value** of the `PHPSESSID` cookie, and paste it into a new file named `PHPSESSID.txt` in this folder.
+3. Run `python pixiv-popular-downloader.py -h` for usage information. Example usage to download 10 pages of 初音ミク tag, including r18: `python pixiv-popular-downloader.py -r -p 10 "初音ミク"`

-4. Run `python pixiv-popular-downloader.py -h` for usage information. Example usage to download 10 pages of 初音ミク tag, including r18: `python pixiv-popular-downloader.py -r -p 10 "初音ミク"`
-
-5. Check the download folder. If you're getting newest results instead of popular results, then your PHPSESSID failed to work.
+4. Check the download folder. If you're getting newest results instead of popular results, then your PHPSESSID failed to work.
@@ -1,5 +1,6 @@
 import argparse
 import requests
+from requests_toolbelt.adapters import host_header_ssl
 from urllib.parse import quote as encodeURI
 import os

@@ -14,19 +15,22 @@ PHPSESSID = None
 with open("PHPSESSID.txt", 'r') as f:
 	PHPSESSID = f.read()

+rqs = requests.Session()
+rqs.mount('https://', host_header_ssl.HostHeaderSSLAdapter())
+
 download_count = 1
 for i in range(args.startpagenum, args.numpages+1):
-	page_url = f"https://www.pixiv.net/ajax/search/artworks/{encodeURI(args.tag, safe='')}?order=popular_d&mode={'all' if args.r else 'safe'}&p={i}"
+	page_url = f"https://210.140.131.219/ajax/search/artworks/{encodeURI(args.tag, safe='')}?order=popular_d&mode={'all' if args.r else 'safe'}&p={i}"
 	print("get", page_url)
-	page_data = requests.get(page_url, cookies={"PHPSESSID": PHPSESSID}).json()
+	page_data = rqs.get(page_url, cookies={"PHPSESSID": PHPSESSID}, headers={"host":"www.pixiv.net"}).json()
 	if (page_data['error']):
 		print(page_data['message'])
 		exit(1)
 	for illust in page_data['body']['illustManga']['data']:
 		illust_r18 = bool(illust['xRestrict'])
-		illust_url = f"https://www.pixiv.net/ajax/illust/{illust['id']}/pages"
+		illust_url = f"https://210.140.131.219/ajax/illust/{illust['id']}/pages"
 		print("get", illust_url)
-		illust_data = requests.get(illust_url).json()
+		illust_data = rqs.get(illust_url, headers={"host":"www.pixiv.net"}).json()
 		if (illust_data['error']):
 			print(illust_data['message'])
 		else:
@@ -40,8 +44,8 @@ for i in range(args.startpagenum, args.numpages+1):
 					print(download_path, "already exists")
 					continue
 				print("get", image_url)
-				req = requests.get(image_url, headers={'referer':'https://www.pixiv.net'})
+				res = rqs.get(image_url, headers={'referer':'https://www.pixiv.net'})
 				with open(download_path, "wb") as f:
-					f.write(req.content)
+					f.write(res.content)
 				print("saved", download_filename)
 				download_count = download_count + 1
@@ -0,0 +1,3 @@
+requests==2.26.0
+requests-toolbelt==0.9.1
+urllib3==1.26.6
Author	SHA1	Message	Date
lamp	79c5a7abe5	Merge branch 'master' of gitea.moe:lamp/pixiv-popular-downloader	2021-09-21 13:12:43 -07:00
lamp	edb1af72eb	eliminate the need to edit hosts file	2021-09-21 13:10:56 -07:00