4 Commits

Author SHA1 Message Date
lamp e9f2a5606a delete junk
i branched
2021-09-24 10:53:14 -07:00
lamp 79c5a7abe5 Merge branch 'master' of gitea.moe:lamp/pixiv-popular-downloader 2021-09-21 13:12:43 -07:00
lamp edb1af72eb eliminate the need to edit hosts file 2021-09-21 13:10:56 -07:00
lamp 9440fcf740 Update 'README.md' 2021-09-21 06:09:21 -05:00
8 changed files with 17 additions and 2722 deletions
+4 -4
View File
@@ -2,10 +2,10 @@ This is a python script for downloading original pixiv images from popular searc
# Instructions
1. Add `210.140.131.219 www.pixiv.net` to your hosts file to bypass Cloudflare, or the script will be blocked. (`nslookup pixiv.net` in case the address needs to be changed)
1. Download this repo to your computer of course, and open the terminal in it. Run `pip install -r requirements.txt` if necessary.
2. Download this repo to your computer of course, and open the terminal in it.
2. In your browser, on Pixiv logged in to a premium account, in dev tools Application tab, copy the **value** of the `PHPSESSID` cookie, and paste it into a new file named `PHPSESSID.txt` in this folder.
3. In your browser, on Pixiv logged in to a premium account, in dev tools Application tab, copy the **value** of the `PHPSESSID` cookie, and paste it into a new file named `PHPSESSID.txt` in this folder.
3. Run `python pixiv-popular-downloader.py -h` for usage information. Example usage to download 10 pages of 初音ミク tag, including r18: `python pixiv-popular-downloader.py -r -p 10 "初音ミク"`
4. Run `python pixiv-popular-downloader.py -h` for usage information. Example usage to download 10 pages of 初音ミク tag, including r18: `python pixiv-popular-downloader.py -r -p 10 "初音ミク"`
4. Check the download folder. If you're getting newest results instead of popular results, then your PHPSESSID failed to work.
-3
View File
@@ -1,3 +0,0 @@
node_modules
download
chromium_data
-12
View File
@@ -1,12 +0,0 @@
### example usage
```sh
node pixiv-downloader.js "初音ミク" -p 3
```
Chromium window will appear where you'll need to sign in to your pixiv premium account if you haven't already.
### args
- `-p <num>` for number of pages (default 1)
- `-s <num>` for page number to start on (default 1)
- `-r` to include R18
-2631
View File
File diff suppressed because it is too large Load Diff
-1
View File
@@ -1 +0,0 @@
{"dependencies":{"download":"^8.0.0","minimist":"^1.2.5","puppeteer":"^10.2.0"}}
-65
View File
@@ -1,65 +0,0 @@
// node pixiv-downloader.js <tags> -p [numpages] -s [startpage] [-r]
var argv = require("minimist")(process.argv.slice(2), {
default: {
"p": 1,
"s": 1
}
});
var query = argv._.join(" ");
var puppeteer = require("puppeteer");
var download = require("download");
var fs = require("fs");
(async function(){
var browser = await puppeteer.launch({
headless: false,
userDataDir: process.cwd() + "/chromium_data"
});
var page = await browser.newPage();
await page.setContent(`login to <a target="_blank" href="https://accounts.pixiv.net/login">pixiv</a> if necessary, then close this page to continue.`);
await new Promise(r => page.on("close", r));
var page = await browser.newPage();
for (let i = argv.s; i < argv.s + argv.p; i++) {
let url = `https://www.pixiv.net/ajax/search/artworks/${encodeURIComponent(query)}?order=popular_d&mode=${argv.r ? "all" : "safe"}&p=${i}`;
console.log("get page", i, url);
await page.goto(url);
let data = JSON.parse(await page.evaluate(() => document.querySelector("body").innerText));
if (data.error) throw data.message;
for (let item of data.body.illustManga.data) {
let url = `https://www.pixiv.net/ajax/illust/${item.id}/pages`;
console.log("get", url);
await page.goto(url);
let data = JSON.parse(await page.evaluate(() => document.querySelector("body").innerText));
if (data.error) {
console.error(error.message);
} else {
for (let image of data.body) {
let url = image.urls.original;
console.log("download", url);
if (!fs.existsSync("download")) fs.mkdirSync("download");
let filename = url.split('/').pop();
let filedir = "download/" + query + "/";
if (!fs.existsSync(filedir)) fs.mkdirSync(filedir);
let filepath = filedir + filename;
if (fs.existsSync(filepath)) {
console.log(filename, "already exists");
continue;
}
let write = fs.createWriteStream(filepath);
download(url, {
headers: {
"Referer": "https://www.pixiv.net"
}
}).pipe(write);
await new Promise(r => write.on("close", r));
console.log("saved", filename)
}
}
}
}
console.log("complete");
await browser.close();
})();
+10 -6
View File
@@ -1,5 +1,6 @@
import argparse
import requests
from requests_toolbelt.adapters import host_header_ssl
from urllib.parse import quote as encodeURI
import os
@@ -14,19 +15,22 @@ PHPSESSID = None
with open("PHPSESSID.txt", 'r') as f:
PHPSESSID = f.read()
rqs = requests.Session()
rqs.mount('https://', host_header_ssl.HostHeaderSSLAdapter())
download_count = 1
for i in range(args.startpagenum, args.numpages+1):
page_url = f"https://www.pixiv.net/ajax/search/artworks/{encodeURI(args.tag, safe='')}?order=popular_d&mode={'all' if args.r else 'safe'}&p={i}"
page_url = f"https://210.140.131.219/ajax/search/artworks/{encodeURI(args.tag, safe='')}?order=popular_d&mode={'all' if args.r else 'safe'}&p={i}"
print("get", page_url)
page_data = requests.get(page_url, cookies={"PHPSESSID": PHPSESSID}).json()
page_data = rqs.get(page_url, cookies={"PHPSESSID": PHPSESSID}, headers={"host":"www.pixiv.net"}).json()
if (page_data['error']):
print(page_data['message'])
exit(1)
for illust in page_data['body']['illustManga']['data']:
illust_r18 = bool(illust['xRestrict'])
illust_url = f"https://www.pixiv.net/ajax/illust/{illust['id']}/pages"
illust_url = f"https://210.140.131.219/ajax/illust/{illust['id']}/pages"
print("get", illust_url)
illust_data = requests.get(illust_url).json()
illust_data = rqs.get(illust_url, headers={"host":"www.pixiv.net"}).json()
if (illust_data['error']):
print(illust_data['message'])
else:
@@ -40,8 +44,8 @@ for i in range(args.startpagenum, args.numpages+1):
print(download_path, "already exists")
continue
print("get", image_url)
req = requests.get(image_url, headers={'referer':'https://www.pixiv.net'})
res = rqs.get(image_url, headers={'referer':'https://www.pixiv.net'})
with open(download_path, "wb") as f:
f.write(req.content)
f.write(res.content)
print("saved", download_filename)
download_count = download_count + 1
+3
View File
@@ -0,0 +1,3 @@
requests==2.26.0
requests-toolbelt==0.9.1
urllib3==1.26.6