Compare commits
No commits in common. "f5eb46e8a8919c8f7b466589561912a7db387de4" and "69a1c20922d1cd3265e896fe41ebe3c6b96bb127" have entirely different histories.
f5eb46e8a8
...
69a1c20922
5
.gitignore
vendored
5
.gitignore
vendored
@ -1,2 +1,3 @@
|
|||||||
PHPSESSID.txt
|
node_modules
|
||||||
download
|
download
|
||||||
|
chromium_data
|
||||||
|
17
README.md
17
README.md
@ -1,11 +1,12 @@
|
|||||||
This is a python script for downloading original pixiv images from popular search results via a premium account.
|
|
||||||
|
|
||||||
# Instructions
|
### example usage
|
||||||
|
```sh
|
||||||
|
node pixiv-downloader.js "初音ミク" -p 3
|
||||||
|
```
|
||||||
|
|
||||||
1. Add `210.140.131.219 www.pixiv.net` to your hosts file to bypass Cloudflare, or the script will be blocked. (`nslookup pixiv.net` in case the address needs to be changed)
|
Chromium window will appear where you'll need to sign in to your pixiv premium account if you haven't already.
|
||||||
|
|
||||||
2. Download this repo to your computer of course, and open the terminal in it.
|
### args
|
||||||
|
- `-p <num>` for number of pages (default 1)
|
||||||
3. In your browser, on Pixiv logged in to a premium account, in dev tools Application tab, copy the **value** of the `PHPSESSID` cookie, and paste it into a new file named `PHPSESSID.txt` in this folder.
|
- `-s <num>` for page number to start on (default 1)
|
||||||
|
- `-r` to include R18
|
||||||
4. Run `python pixiv-popular-downloader.py -h` for usage information. Example usage to download 10 pages of 初音ミク tag, including r18: `python pixiv-popular-downloader.py -r -p 10 "初音ミク"`
|
|
3
junk/.gitignore
vendored
3
junk/.gitignore
vendored
@ -1,3 +0,0 @@
|
|||||||
node_modules
|
|
||||||
download
|
|
||||||
chromium_data
|
|
@ -1,12 +0,0 @@
|
|||||||
|
|
||||||
### example usage
|
|
||||||
```sh
|
|
||||||
node pixiv-downloader.js "初音ミク" -p 3
|
|
||||||
```
|
|
||||||
|
|
||||||
Chromium window will appear where you'll need to sign in to your pixiv premium account if you haven't already.
|
|
||||||
|
|
||||||
### args
|
|
||||||
- `-p <num>` for number of pages (default 1)
|
|
||||||
- `-s <num>` for page number to start on (default 1)
|
|
||||||
- `-r` to include R18
|
|
0
junk/package-lock.json → package-lock.json
generated
0
junk/package-lock.json → package-lock.json
generated
@ -1,47 +0,0 @@
|
|||||||
import argparse
|
|
||||||
import requests
|
|
||||||
from urllib.parse import quote as encodeURI
|
|
||||||
import os
|
|
||||||
|
|
||||||
ap = argparse.ArgumentParser()
|
|
||||||
ap.add_argument("tag", help="Pixiv tag(s) to search")
|
|
||||||
ap.add_argument("-p", dest="numpages", type=int, default=1, help="number of pages to download (default 1)")
|
|
||||||
ap.add_argument("-s", dest="startpagenum", type=int, default=1, help="page number to start at")
|
|
||||||
ap.add_argument("-r", action='store_true', help="include r18 posts")
|
|
||||||
args = ap.parse_args()
|
|
||||||
|
|
||||||
PHPSESSID = None
|
|
||||||
with open("PHPSESSID.txt", 'r') as f:
|
|
||||||
PHPSESSID = f.read()
|
|
||||||
|
|
||||||
download_count = 1
|
|
||||||
for i in range(args.startpagenum, args.numpages+1):
|
|
||||||
page_url = f"https://www.pixiv.net/ajax/search/artworks/{encodeURI(args.tag, safe='')}?order=popular_d&mode={'all' if args.r else 'safe'}&p={i}"
|
|
||||||
print("get", page_url)
|
|
||||||
page_data = requests.get(page_url, cookies={"PHPSESSID": PHPSESSID}).json()
|
|
||||||
if (page_data['error']):
|
|
||||||
print(page_data['message'])
|
|
||||||
exit(1)
|
|
||||||
for illust in page_data['body']['illustManga']['data']:
|
|
||||||
illust_r18 = bool(illust['xRestrict'])
|
|
||||||
illust_url = f"https://www.pixiv.net/ajax/illust/{illust['id']}/pages"
|
|
||||||
print("get", illust_url)
|
|
||||||
illust_data = requests.get(illust_url).json()
|
|
||||||
if (illust_data['error']):
|
|
||||||
print(illust_data['message'])
|
|
||||||
else:
|
|
||||||
for image in illust_data['body']:
|
|
||||||
image_url = image['urls']['original']
|
|
||||||
download_dir = f"download/{args.tag}/"
|
|
||||||
os.makedirs(download_dir, exist_ok=True)
|
|
||||||
download_filename = str(download_count) + '_' + ('x_' if illust_r18 else '') + image_url.split('/').pop()
|
|
||||||
download_path = download_dir + download_filename
|
|
||||||
if os.path.exists(download_path):
|
|
||||||
print(download_path, "already exists")
|
|
||||||
continue
|
|
||||||
print("get", image_url)
|
|
||||||
req = requests.get(image_url, headers={'referer':'https://www.pixiv.net'})
|
|
||||||
with open(download_path, "wb") as f:
|
|
||||||
f.write(req.content)
|
|
||||||
print("saved", download_filename)
|
|
||||||
download_count = download_count + 1
|
|
35
scrap/pixiv-downloader.py
Normal file
35
scrap/pixiv-downloader.py
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
# simple downloader in python; i couldn't get cookie to work in order to get sorted by popularity so this is only useful for getting results sorted by newest/oldest and non-r18
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import requests
|
||||||
|
from urllib.parse import quote as encodeURI
|
||||||
|
|
||||||
|
#cookie = open("cookie.txt", 'r').read()
|
||||||
|
cookie = None
|
||||||
|
|
||||||
|
ap = argparse.ArgumentParser()
|
||||||
|
ap.add_argument("tags")
|
||||||
|
ap.add_argument("-p", "--pages", type=int, default=1, help="number of pages")
|
||||||
|
args = ap.parse_args()
|
||||||
|
|
||||||
|
for i in range(1, args.pages+1):
|
||||||
|
url = f"https://www.pixiv.net/ajax/search/artworks/{encodeURI(args.tags, safe='')}?order=popular_d&mode=safe&p={i}"
|
||||||
|
print("get", url)
|
||||||
|
data = requests.get(url, headers={cookie: cookie}).json()
|
||||||
|
if (data['error']):
|
||||||
|
print(data['message'])
|
||||||
|
exit(1)
|
||||||
|
for item in data['body']['illustManga']['data']:
|
||||||
|
url = f"https://www.pixiv.net/ajax/illust/{item['id']}/pages"
|
||||||
|
print("get", url)
|
||||||
|
data = requests.get(url, headers={cookie: cookie}).json()
|
||||||
|
if (data['error']):
|
||||||
|
print(data['message'])
|
||||||
|
else:
|
||||||
|
for image in data['body']:
|
||||||
|
url = image['urls']['original']
|
||||||
|
filename = url.split('/').pop()
|
||||||
|
print("get", url)
|
||||||
|
req = requests.get(url, headers={'referer':'https://www.pixiv.net'})
|
||||||
|
open("download/"+ filename, "wb").write(req.content)
|
||||||
|
print("saved", filename)
|
Loading…
Reference in New Issue
Block a user