Compare commits
6 Commits
nodejs-pup
...
master
Author | SHA1 | Date | |
---|---|---|---|
e9f2a5606a | |||
79c5a7abe5 | |||
edb1af72eb | |||
9440fcf740 | |||
f5eb46e8a8 | |||
f815b730f0 |
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,3 +1,2 @@
|
||||
node_modules
|
||||
PHPSESSID.txt
|
||||
download
|
||||
chromium_data
|
||||
|
17
README.md
17
README.md
@ -1,12 +1,11 @@
|
||||
This is a python script for downloading original pixiv images from popular search results via a premium account.
|
||||
|
||||
### example usage
|
||||
```sh
|
||||
node pixiv-downloader.js "初音ミク" -p 3
|
||||
```
|
||||
# Instructions
|
||||
|
||||
Chromium window will appear where you'll need to sign in to your pixiv premium account if you haven't already.
|
||||
1. Download this repo to your computer of course, and open the terminal in it. Run `pip install -r requirements.txt` if necessary.
|
||||
|
||||
### args
|
||||
- `-p <num>` for number of pages (default 1)
|
||||
- `-s <num>` for page number to start on (default 1)
|
||||
- `-r` to include R18
|
||||
2. In your browser, on Pixiv logged in to a premium account, in dev tools Application tab, copy the **value** of the `PHPSESSID` cookie, and paste it into a new file named `PHPSESSID.txt` in this folder.
|
||||
|
||||
3. Run `python pixiv-popular-downloader.py -h` for usage information. Example usage to download 10 pages of 初音ミク tag, including r18: `python pixiv-popular-downloader.py -r -p 10 "初音ミク"`
|
||||
|
||||
4. Check the download folder. If you're getting newest results instead of popular results, then your PHPSESSID failed to work.
|
||||
|
2631
package-lock.json
generated
2631
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -1 +0,0 @@
|
||||
{"dependencies":{"download":"^8.0.0","minimist":"^1.2.5","puppeteer":"^10.2.0"}}
|
@ -1,65 +0,0 @@
|
||||
// node pixiv-downloader.js <tags> -p [numpages] -s [startpage] [-r]
|
||||
var argv = require("minimist")(process.argv.slice(2), {
|
||||
default: {
|
||||
"p": 1,
|
||||
"s": 1
|
||||
}
|
||||
});
|
||||
var query = argv._.join(" ");
|
||||
var puppeteer = require("puppeteer");
|
||||
var download = require("download");
|
||||
var fs = require("fs");
|
||||
|
||||
(async function(){
|
||||
var browser = await puppeteer.launch({
|
||||
headless: false,
|
||||
userDataDir: process.cwd() + "/chromium_data"
|
||||
});
|
||||
|
||||
var page = await browser.newPage();
|
||||
await page.setContent(`login to <a target="_blank" href="https://accounts.pixiv.net/login">pixiv</a> if necessary, then close this page to continue.`);
|
||||
await new Promise(r => page.on("close", r));
|
||||
|
||||
var page = await browser.newPage();
|
||||
for (let i = argv.s; i < argv.s + argv.p; i++) {
|
||||
let url = `https://www.pixiv.net/ajax/search/artworks/${encodeURIComponent(query)}?order=popular_d&mode=${argv.r ? "all" : "safe"}&p=${i}`;
|
||||
console.log("get page", i, url);
|
||||
await page.goto(url);
|
||||
let data = JSON.parse(await page.evaluate(() => document.querySelector("body").innerText));
|
||||
if (data.error) throw data.message;
|
||||
for (let item of data.body.illustManga.data) {
|
||||
let url = `https://www.pixiv.net/ajax/illust/${item.id}/pages`;
|
||||
console.log("get", url);
|
||||
await page.goto(url);
|
||||
let data = JSON.parse(await page.evaluate(() => document.querySelector("body").innerText));
|
||||
if (data.error) {
|
||||
console.error(error.message);
|
||||
} else {
|
||||
for (let image of data.body) {
|
||||
let url = image.urls.original;
|
||||
console.log("download", url);
|
||||
if (!fs.existsSync("download")) fs.mkdirSync("download");
|
||||
let filename = url.split('/').pop();
|
||||
let filedir = "download/" + query + "/";
|
||||
if (!fs.existsSync(filedir)) fs.mkdirSync(filedir);
|
||||
let filepath = filedir + filename;
|
||||
if (fs.existsSync(filepath)) {
|
||||
console.log(filename, "already exists");
|
||||
continue;
|
||||
}
|
||||
let write = fs.createWriteStream(filepath);
|
||||
download(url, {
|
||||
headers: {
|
||||
"Referer": "https://www.pixiv.net"
|
||||
}
|
||||
}).pipe(write);
|
||||
await new Promise(r => write.on("close", r));
|
||||
console.log("saved", filename)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
console.log("complete");
|
||||
await browser.close();
|
||||
})();
|
||||
|
51
pixiv-popular-downloader.py
Normal file
51
pixiv-popular-downloader.py
Normal file
@ -0,0 +1,51 @@
|
||||
import argparse
|
||||
import requests
|
||||
from requests_toolbelt.adapters import host_header_ssl
|
||||
from urllib.parse import quote as encodeURI
|
||||
import os
|
||||
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("tag", help="Pixiv tag(s) to search")
|
||||
ap.add_argument("-p", dest="numpages", type=int, default=1, help="number of pages to download (default 1)")
|
||||
ap.add_argument("-s", dest="startpagenum", type=int, default=1, help="page number to start at")
|
||||
ap.add_argument("-r", action='store_true', help="include r18 posts")
|
||||
args = ap.parse_args()
|
||||
|
||||
PHPSESSID = None
|
||||
with open("PHPSESSID.txt", 'r') as f:
|
||||
PHPSESSID = f.read()
|
||||
|
||||
rqs = requests.Session()
|
||||
rqs.mount('https://', host_header_ssl.HostHeaderSSLAdapter())
|
||||
|
||||
download_count = 1
|
||||
for i in range(args.startpagenum, args.numpages+1):
|
||||
page_url = f"https://210.140.131.219/ajax/search/artworks/{encodeURI(args.tag, safe='')}?order=popular_d&mode={'all' if args.r else 'safe'}&p={i}"
|
||||
print("get", page_url)
|
||||
page_data = rqs.get(page_url, cookies={"PHPSESSID": PHPSESSID}, headers={"host":"www.pixiv.net"}).json()
|
||||
if (page_data['error']):
|
||||
print(page_data['message'])
|
||||
exit(1)
|
||||
for illust in page_data['body']['illustManga']['data']:
|
||||
illust_r18 = bool(illust['xRestrict'])
|
||||
illust_url = f"https://210.140.131.219/ajax/illust/{illust['id']}/pages"
|
||||
print("get", illust_url)
|
||||
illust_data = rqs.get(illust_url, headers={"host":"www.pixiv.net"}).json()
|
||||
if (illust_data['error']):
|
||||
print(illust_data['message'])
|
||||
else:
|
||||
for image in illust_data['body']:
|
||||
image_url = image['urls']['original']
|
||||
download_dir = f"download/{args.tag}/"
|
||||
os.makedirs(download_dir, exist_ok=True)
|
||||
download_filename = str(download_count) + '_' + ('x_' if illust_r18 else '') + image_url.split('/').pop()
|
||||
download_path = download_dir + download_filename
|
||||
if os.path.exists(download_path):
|
||||
print(download_path, "already exists")
|
||||
continue
|
||||
print("get", image_url)
|
||||
res = rqs.get(image_url, headers={'referer':'https://www.pixiv.net'})
|
||||
with open(download_path, "wb") as f:
|
||||
f.write(res.content)
|
||||
print("saved", download_filename)
|
||||
download_count = download_count + 1
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
@ -0,0 +1,3 @@
|
||||
requests==2.26.0
|
||||
requests-toolbelt==0.9.1
|
||||
urllib3==1.26.6
|
Loading…
Reference in New Issue
Block a user