Compare commits

..

No commits in common. "f5eb46e8a8919c8f7b466589561912a7db387de4" and "69a1c20922d1cd3265e896fe41ebe3c6b96bb127" have entirely different histories.

9 changed files with 47 additions and 72 deletions

3
.gitignore vendored
View File

@ -1,2 +1,3 @@
PHPSESSID.txt
node_modules
download
chromium_data

View File

@ -1,11 +1,12 @@
This is a python script for downloading original pixiv images from popular search results via a premium account.
# Instructions
### example usage
```sh
node pixiv-downloader.js "初音ミク" -p 3
```
1. Add `210.140.131.219 www.pixiv.net` to your hosts file to bypass Cloudflare, or the script will be blocked. (`nslookup pixiv.net` in case the address needs to be changed)
Chromium window will appear where you'll need to sign in to your pixiv premium account if you haven't already.
2. Download this repo to your computer of course, and open the terminal in it.
3. In your browser, on Pixiv logged in to a premium account, in dev tools Application tab, copy the **value** of the `PHPSESSID` cookie, and paste it into a new file named `PHPSESSID.txt` in this folder.
4. Run `python pixiv-popular-downloader.py -h` for usage information. Example usage to download 10 pages of 初音ミク tag, including r18: `python pixiv-popular-downloader.py -r -p 10 "初音ミク"`
### args
- `-p <num>` for number of pages (default 1)
- `-s <num>` for page number to start on (default 1)
- `-r` to include R18

3
junk/.gitignore vendored
View File

@ -1,3 +0,0 @@
node_modules
download
chromium_data

View File

@ -1,12 +0,0 @@
### example usage
```sh
node pixiv-downloader.js "初音ミク" -p 3
```
Chromium window will appear where you'll need to sign in to your pixiv premium account if you haven't already.
### args
- `-p <num>` for number of pages (default 1)
- `-s <num>` for page number to start on (default 1)
- `-r` to include R18

View File

@ -1,47 +0,0 @@
import argparse
import requests
from urllib.parse import quote as encodeURI
import os
ap = argparse.ArgumentParser()
ap.add_argument("tag", help="Pixiv tag(s) to search")
ap.add_argument("-p", dest="numpages", type=int, default=1, help="number of pages to download (default 1)")
ap.add_argument("-s", dest="startpagenum", type=int, default=1, help="page number to start at")
ap.add_argument("-r", action='store_true', help="include r18 posts")
args = ap.parse_args()
PHPSESSID = None
with open("PHPSESSID.txt", 'r') as f:
PHPSESSID = f.read()
download_count = 1
for i in range(args.startpagenum, args.numpages+1):
page_url = f"https://www.pixiv.net/ajax/search/artworks/{encodeURI(args.tag, safe='')}?order=popular_d&mode={'all' if args.r else 'safe'}&p={i}"
print("get", page_url)
page_data = requests.get(page_url, cookies={"PHPSESSID": PHPSESSID}).json()
if (page_data['error']):
print(page_data['message'])
exit(1)
for illust in page_data['body']['illustManga']['data']:
illust_r18 = bool(illust['xRestrict'])
illust_url = f"https://www.pixiv.net/ajax/illust/{illust['id']}/pages"
print("get", illust_url)
illust_data = requests.get(illust_url).json()
if (illust_data['error']):
print(illust_data['message'])
else:
for image in illust_data['body']:
image_url = image['urls']['original']
download_dir = f"download/{args.tag}/"
os.makedirs(download_dir, exist_ok=True)
download_filename = str(download_count) + '_' + ('x_' if illust_r18 else '') + image_url.split('/').pop()
download_path = download_dir + download_filename
if os.path.exists(download_path):
print(download_path, "already exists")
continue
print("get", image_url)
req = requests.get(image_url, headers={'referer':'https://www.pixiv.net'})
with open(download_path, "wb") as f:
f.write(req.content)
print("saved", download_filename)
download_count = download_count + 1

35
scrap/pixiv-downloader.py Normal file
View File

@ -0,0 +1,35 @@
# simple downloader in python; i couldn't get cookie to work in order to get sorted by popularity so this is only useful for getting results sorted by newest/oldest and non-r18
import argparse
import requests
from urllib.parse import quote as encodeURI
#cookie = open("cookie.txt", 'r').read()
cookie = None
ap = argparse.ArgumentParser()
ap.add_argument("tags")
ap.add_argument("-p", "--pages", type=int, default=1, help="number of pages")
args = ap.parse_args()
for i in range(1, args.pages+1):
url = f"https://www.pixiv.net/ajax/search/artworks/{encodeURI(args.tags, safe='')}?order=popular_d&mode=safe&p={i}"
print("get", url)
data = requests.get(url, headers={cookie: cookie}).json()
if (data['error']):
print(data['message'])
exit(1)
for item in data['body']['illustManga']['data']:
url = f"https://www.pixiv.net/ajax/illust/{item['id']}/pages"
print("get", url)
data = requests.get(url, headers={cookie: cookie}).json()
if (data['error']):
print(data['message'])
else:
for image in data['body']:
url = image['urls']['original']
filename = url.split('/').pop()
print("get", url)
req = requests.get(url, headers={'referer':'https://www.pixiv.net'})
open("download/"+ filename, "wb").write(req.content)
print("saved", filename)