Compare commits
2 Commits
666628dfc7
...
a1c603edcf
Author | SHA1 | Date | |
---|---|---|---|
a1c603edcf | |||
704b95823b |
@ -38,7 +38,7 @@ def download_popular(tag, startpagenum = 1, numpages = 1):
|
|||||||
# record this search session in the database
|
# record this search session in the database
|
||||||
search_document_id = search_collection.insert_one({
|
search_document_id = search_collection.insert_one({
|
||||||
"date": datetime.now(), # date started
|
"date": datetime.now(), # date started
|
||||||
"query": None, # the tag being searched
|
"query": tag, # the tag being searched
|
||||||
"current_page": startpagenum, # keep track of the page we're on
|
"current_page": startpagenum, # keep track of the page we're on
|
||||||
"current_illust": None, # keep track of which item is being downloaded
|
"current_illust": None, # keep track of which item is being downloaded
|
||||||
"search_data": [], # save each payload
|
"search_data": [], # save each payload
|
||||||
@ -100,6 +100,28 @@ def download_popular(tag, startpagenum = 1, numpages = 1):
|
|||||||
if (illust_ajax_data['error']):
|
if (illust_ajax_data['error']):
|
||||||
print("error from ajax api:", illust_ajax_data['message'])
|
print("error from ajax api:", illust_ajax_data['message'])
|
||||||
|
|
||||||
|
|
||||||
|
# save animated works
|
||||||
|
ugoira_data = None
|
||||||
|
if (illust_ajax_data['body']['illustType'] == 2):
|
||||||
|
illust_ugoira_url = illust_ajax_url + "/ugoira_meta"
|
||||||
|
print("get", illust_ugoira_url)
|
||||||
|
illust_ugoira_data = rqs.get(illust_ugoira_url, cookies={"PHPSESSID": os.environ["PHPSESSID"]}, headers={"host":"www.pixiv.net"}).json()
|
||||||
|
if (illust_ugoira_data['error']):
|
||||||
|
print("error from ajax ugoira api:", illust_ugoira_data['message'])
|
||||||
|
else:
|
||||||
|
original_ugoira_url = illust_ugoira_data['body']['originalSrc']
|
||||||
|
print("get", original_ugoira_url)
|
||||||
|
res = rqs.get(original_ugoira_url, headers={'referer':'https://www.pixiv.net'})
|
||||||
|
print("gridfs put", original_ugoira_url)
|
||||||
|
ugoira_data = {
|
||||||
|
"gridfs_id": gridfs.put(res.content, filename=original_ugoira_url.split('/').pop(), original_url=original_ugoira_url, ugoira=True),
|
||||||
|
"ugoira_meta": illust_ugoira_data
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# illust_ajax_data does not have "page" data (additional image urls)
|
# illust_ajax_data does not have "page" data (additional image urls)
|
||||||
# download that
|
# download that
|
||||||
illust_pages_url = illust_ajax_url + "/pages"
|
illust_pages_url = illust_ajax_url + "/pages"
|
||||||
@ -114,8 +136,9 @@ def download_popular(tag, startpagenum = 1, numpages = 1):
|
|||||||
document = {
|
document = {
|
||||||
"_id": illust_id, # use the unique artwork id for document id so we can't have duplicates
|
"_id": illust_id, # use the unique artwork id for document id so we can't have duplicates
|
||||||
"illust_ajax_data": illust_ajax_data, # save all the metadata for the artwork
|
"illust_ajax_data": illust_ajax_data, # save all the metadata for the artwork
|
||||||
"illist_pages_data": illust_pages_data, # save all the image urls of the data
|
"illust_pages_data": illust_pages_data, # save all the image urls of the data
|
||||||
"downloaded_images": {}, # map of image filenames to gridfs ids
|
"downloaded_images": {}, # map of image filenames to gridfs ids
|
||||||
|
"ugoira_data": ugoira_data, # animation data
|
||||||
"date_saved": datetime.now()
|
"date_saved": datetime.now()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -129,15 +152,13 @@ def download_popular(tag, startpagenum = 1, numpages = 1):
|
|||||||
res = rqs.get(original_image_url, headers={'referer':'https://www.pixiv.net'})
|
res = rqs.get(original_image_url, headers={'referer':'https://www.pixiv.net'})
|
||||||
|
|
||||||
print("gridfs put", res.url)
|
print("gridfs put", res.url)
|
||||||
gridfs_id = gridfs.put(res.content)
|
gridfs_id = gridfs.put(res.content, filename=original_image_filename, original_url=original_image_url)
|
||||||
document['downloaded_images'][original_image_filename] = gridfs_id
|
document['downloaded_images'][original_image_filename] = gridfs_id
|
||||||
|
|
||||||
|
|
||||||
# add to db
|
# add to db
|
||||||
illustration_collection.insert_one(document)
|
illustration_collection.insert_one(document)
|
||||||
search_collection.update_one({"_id": search_document_id}, {"$push": {"results":{
|
search_collection.update_one({"_id": search_document_id}, {"$push": {"results": illust_id}})
|
||||||
"id": illust_id
|
|
||||||
}}})
|
|
||||||
|
|
||||||
search_collection.update_one({"_id": search_document_id}, {"$set":{"completed": True}})
|
search_collection.update_one({"_id": search_document_id}, {"$set":{"completed": True}})
|
||||||
print("end of loop")
|
print("end of loop")
|
||||||
|
Loading…
Reference in New Issue
Block a user