pixiv-popular-id-lists/pixiv-popular-id-scrape.js

49 lines
1.6 KiB
JavaScript
Executable File

var https = require("https");
var util = require("util");
var fs = require("fs");
var TAG = process.argv[2];
function get(path, callback) {
console.log("get", path);
var req = https.get({
hostname: "210.140.131.219",
path,
headers: {
"Host": "www.pixiv.net",
"Cookie": process.env.COOKIE
},
checkServerIdentity: (hostname, cert) => hostname == "www.pixiv.net" ? undefined : require("tls").checkServerIdentity(hostname, cert)
}, res => {
res.setEncoding('utf8');
var data = "";
res.on("data", x => data += x);
res.on("end", () => callback(data));
});
req.on("error", error => {
console.error(error.message);
setTimeout(get, 3000, path, callback);
});
}
if (!fs.existsSync("output_lists")) fs.mkdirSync("output_lists");
if (!fs.existsSync("output_raw")) fs.mkdirSync("output_raw");
if (!fs.existsSync(`output_raw/${TAG}`)) fs.mkdirSync(`output_raw/${TAG}`);
var lastIds;
(async function scrapePage(pagenum) {
get(`/ajax/search/artworks/${encodeURIComponent(TAG)}?order=popular_d&mode=all&p=${pagenum}`, data => {
fs.writeFileSync(`output_raw/${TAG}/${pagenum}.json`, data);
var json = JSON.parse(data);
if (data.error) return console.error(data.message);
var results = json.body.illustManga.data;
if (!results.length) return console.log("No more results");
var ids = results.map(x => x.id).join(' ') + ' ';
console.log(ids);
if (ids == lastIds) return console.log("Repeated results");
else lastIds = ids;
fs.appendFileSync(`output_lists/${TAG}.txt`, ids);
scrapePage(++pagenum);
});
})(Number(process.argv[3]) || 1)