Compare commits

...

6 Commits

Author SHA1 Message Date
Lamp e9f2a5606a delete junk
i branched
2021-09-24 10:53:14 -07:00
Lamp 79c5a7abe5 Merge branch 'master' of gitea.moe:lamp/pixiv-popular-downloader 2021-09-21 13:12:43 -07:00
Lamp edb1af72eb eliminate the need to edit hosts file 2021-09-21 13:10:56 -07:00
Lamp 9440fcf740 Update 'README.md' 2021-09-21 06:09:21 -05:00
Lamp f5eb46e8a8 refactored on python 2021-09-21 04:04:51 -07:00
Lamp f815b730f0 resurrect python script 2021-09-21 03:38:29 -07:00
8 changed files with 64 additions and 2744 deletions

5
.gitignore vendored
View File

@ -1,3 +1,2 @@
node_modules
download
chromium_data
PHPSESSID.txt
download

View File

@ -1,12 +1,11 @@
This is a python script for downloading original pixiv images from popular search results via a premium account.
### example usage
```sh
node pixiv-downloader.js "初音ミク" -p 3
```
# Instructions
Chromium window will appear where you'll need to sign in to your pixiv premium account if you haven't already.
1. Download this repo to your computer of course, and open the terminal in it. Run `pip install -r requirements.txt` if necessary.
### args
- `-p <num>` for number of pages (default 1)
- `-s <num>` for page number to start on (default 1)
- `-r` to include R18
2. In your browser, on Pixiv logged in to a premium account, in dev tools Application tab, copy the **value** of the `PHPSESSID` cookie, and paste it into a new file named `PHPSESSID.txt` in this folder.
3. Run `python pixiv-popular-downloader.py -h` for usage information. Example usage to download 10 pages of 初音ミク tag, including r18: `python pixiv-popular-downloader.py -r -p 10 "初音ミク"`
4. Check the download folder. If you're getting newest results instead of popular results, then your PHPSESSID failed to work.

2631
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -1 +0,0 @@
{"dependencies":{"download":"^8.0.0","minimist":"^1.2.5","puppeteer":"^10.2.0"}}

View File

@ -1,65 +0,0 @@
// node pixiv-downloader.js <tags> -p [numpages] -s [startpage] [-r]
var argv = require("minimist")(process.argv.slice(2), {
default: {
"p": 1,
"s": 1
}
});
var query = argv._.join(" ");
var puppeteer = require("puppeteer");
var download = require("download");
var fs = require("fs");
(async function(){
var browser = await puppeteer.launch({
headless: false,
userDataDir: process.cwd() + "/chromium_data"
});
var page = await browser.newPage();
await page.setContent(`login to <a target="_blank" href="https://accounts.pixiv.net/login">pixiv</a> if necessary, then close this page to continue.`);
await new Promise(r => page.on("close", r));
var page = await browser.newPage();
for (let i = argv.s; i < argv.s + argv.p; i++) {
let url = `https://www.pixiv.net/ajax/search/artworks/${encodeURIComponent(query)}?order=popular_d&mode=${argv.r ? "all" : "safe"}&p=${i}`;
console.log("get page", i, url);
await page.goto(url);
let data = JSON.parse(await page.evaluate(() => document.querySelector("body").innerText));
if (data.error) throw data.message;
for (let item of data.body.illustManga.data) {
let url = `https://www.pixiv.net/ajax/illust/${item.id}/pages`;
console.log("get", url);
await page.goto(url);
let data = JSON.parse(await page.evaluate(() => document.querySelector("body").innerText));
if (data.error) {
console.error(error.message);
} else {
for (let image of data.body) {
let url = image.urls.original;
console.log("download", url);
if (!fs.existsSync("download")) fs.mkdirSync("download");
let filename = url.split('/').pop();
let filedir = "download/" + query + "/";
if (!fs.existsSync(filedir)) fs.mkdirSync(filedir);
let filepath = filedir + filename;
if (fs.existsSync(filepath)) {
console.log(filename, "already exists");
continue;
}
let write = fs.createWriteStream(filepath);
download(url, {
headers: {
"Referer": "https://www.pixiv.net"
}
}).pipe(write);
await new Promise(r => write.on("close", r));
console.log("saved", filename)
}
}
}
}
console.log("complete");
await browser.close();
})();

View File

@ -0,0 +1,51 @@
import argparse
import requests
from requests_toolbelt.adapters import host_header_ssl
from urllib.parse import quote as encodeURI
import os
ap = argparse.ArgumentParser()
ap.add_argument("tag", help="Pixiv tag(s) to search")
ap.add_argument("-p", dest="numpages", type=int, default=1, help="number of pages to download (default 1)")
ap.add_argument("-s", dest="startpagenum", type=int, default=1, help="page number to start at")
ap.add_argument("-r", action='store_true', help="include r18 posts")
args = ap.parse_args()
PHPSESSID = None
with open("PHPSESSID.txt", 'r') as f:
PHPSESSID = f.read()
rqs = requests.Session()
rqs.mount('https://', host_header_ssl.HostHeaderSSLAdapter())
download_count = 1
for i in range(args.startpagenum, args.numpages+1):
page_url = f"https://210.140.131.219/ajax/search/artworks/{encodeURI(args.tag, safe='')}?order=popular_d&mode={'all' if args.r else 'safe'}&p={i}"
print("get", page_url)
page_data = rqs.get(page_url, cookies={"PHPSESSID": PHPSESSID}, headers={"host":"www.pixiv.net"}).json()
if (page_data['error']):
print(page_data['message'])
exit(1)
for illust in page_data['body']['illustManga']['data']:
illust_r18 = bool(illust['xRestrict'])
illust_url = f"https://210.140.131.219/ajax/illust/{illust['id']}/pages"
print("get", illust_url)
illust_data = rqs.get(illust_url, headers={"host":"www.pixiv.net"}).json()
if (illust_data['error']):
print(illust_data['message'])
else:
for image in illust_data['body']:
image_url = image['urls']['original']
download_dir = f"download/{args.tag}/"
os.makedirs(download_dir, exist_ok=True)
download_filename = str(download_count) + '_' + ('x_' if illust_r18 else '') + image_url.split('/').pop()
download_path = download_dir + download_filename
if os.path.exists(download_path):
print(download_path, "already exists")
continue
print("get", image_url)
res = rqs.get(image_url, headers={'referer':'https://www.pixiv.net'})
with open(download_path, "wb") as f:
f.write(res.content)
print("saved", download_filename)
download_count = download_count + 1

3
requirements.txt Normal file
View File

@ -0,0 +1,3 @@
requests==2.26.0
requests-toolbelt==0.9.1
urllib3==1.26.6

View File

@ -1,35 +0,0 @@
# simple downloader in python; i couldn't get cookie to work in order to get sorted by popularity so this is only useful for getting results sorted by newest/oldest and non-r18
import argparse
import requests
from urllib.parse import quote as encodeURI
#cookie = open("cookie.txt", 'r').read()
cookie = None
ap = argparse.ArgumentParser()
ap.add_argument("tags")
ap.add_argument("-p", "--pages", type=int, default=1, help="number of pages")
args = ap.parse_args()
for i in range(1, args.pages+1):
url = f"https://www.pixiv.net/ajax/search/artworks/{encodeURI(args.tags, safe='')}?order=popular_d&mode=safe&p={i}"
print("get", url)
data = requests.get(url, headers={cookie: cookie}).json()
if (data['error']):
print(data['message'])
exit(1)
for item in data['body']['illustManga']['data']:
url = f"https://www.pixiv.net/ajax/illust/{item['id']}/pages"
print("get", url)
data = requests.get(url, headers={cookie: cookie}).json()
if (data['error']):
print(data['message'])
else:
for image in data['body']:
url = image['urls']['original']
filename = url.split('/').pop()
print("get", url)
req = requests.get(url, headers={'referer':'https://www.pixiv.net'})
open("download/"+ filename, "wb").write(req.content)
print("saved", filename)