49 lines
1.6 KiB
JavaScript
Executable File
49 lines
1.6 KiB
JavaScript
Executable File
var https = require("https");
|
|
var util = require("util");
|
|
var fs = require("fs");
|
|
|
|
var TAG = process.argv[2];
|
|
|
|
function get(path, callback) {
|
|
console.log("get", path);
|
|
var req = https.get({
|
|
hostname: "210.140.131.219",
|
|
path,
|
|
headers: {
|
|
"Host": "www.pixiv.net",
|
|
"Cookie": process.env.COOKIE
|
|
},
|
|
checkServerIdentity: (hostname, cert) => hostname == "www.pixiv.net" ? undefined : require("tls").checkServerIdentity(hostname, cert)
|
|
}, res => {
|
|
res.setEncoding('utf8');
|
|
var data = "";
|
|
res.on("data", x => data += x);
|
|
res.on("end", () => callback(data));
|
|
});
|
|
req.on("error", error => {
|
|
console.error(error.message);
|
|
setTimeout(get, 3000, path, callback);
|
|
});
|
|
}
|
|
|
|
if (!fs.existsSync("output_lists")) fs.mkdirSync("output_lists");
|
|
if (!fs.existsSync("output_raw")) fs.mkdirSync("output_raw");
|
|
if (!fs.existsSync(`output_raw/${TAG}`)) fs.mkdirSync(`output_raw/${TAG}`);
|
|
|
|
var lastIds;
|
|
(async function scrapePage(pagenum) {
|
|
get(`/ajax/search/artworks/${encodeURIComponent(TAG)}?order=popular_d&mode=all&p=${pagenum}`, data => {
|
|
fs.writeFileSync(`output_raw/${TAG}/${pagenum}.json`, data);
|
|
var json = JSON.parse(data);
|
|
if (data.error) return console.error(data.message);
|
|
var results = json.body.illustManga.data;
|
|
if (!results.length) return console.log("No more results");
|
|
var ids = results.map(x => x.id).join(' ') + ' ';
|
|
console.log(ids);
|
|
if (ids == lastIds) return console.log("Repeated results");
|
|
else lastIds = ids;
|
|
fs.appendFileSync(`output_lists/${TAG}.txt`, ids);
|
|
scrapePage(++pagenum);
|
|
});
|
|
})(Number(process.argv[3]) || 1)
|