This commit is contained in:
Lamp 2024-02-17 16:36:01 -08:00
parent fda3e02823
commit f9c8ce8f2c
8 changed files with 88 additions and 97 deletions

@ -17,6 +17,7 @@ Test in browser: https://api.u2b.cx/test.html
- `thumbnails`: set to `1`, `true`, `yes`, `on` or whatever to load thumbnails
- `icons`: set to `1`, `true`, `yes`, `on` or whatever to load channel icons
- `captions`: set to `1`, `true`, `yes`, `on` or whatever if you need access to closed captioning data
### Example URL
@ -52,6 +53,7 @@ JSON object:
- `y`: (integer) px from top
- `width`: (integer)
- `height`: (integer)
- `captions_vrcurl`?: (integer) index of vrcurl to get the caption data json
- `imagesheet_vrcurl`?: (integer) index of the vrcurl for the collage of thumbnails and/or icons
- `nextpage_vrcurl`: (integer) index of the vrcurl that will serve the JSON for the next page of results
@ -61,7 +63,17 @@ JSON object:
- `{pool}`: must be same as pool param in search endpoint.
- `{index}`: vrcurl index number
Response may be 302 redirect to youtube url, `image/png` for imagesheet or `application/json` for next page
Response may be 302 redirect to youtube url, `image/png` for imagesheet, `application/json` for next page (see response format above) or caption data:
### Caption JSON format
- Array of Object
- `name`: (string) caption track name like "English" or "English (auto-generated)"
- `id`: (string) id like `.en` or `a.en`
- `lines`: Array of Object
- `start`: (float) video seconds when the caption is displayed
- `dur`: (float) seconds to display the caption
- `text`: (string) caption text
# VRCUrls

@ -67,13 +67,14 @@ async function VRCYoutubeSearch(pool, query, options = {}) {
height: iconHeight
};
}
if (options.captions) {
video.captions_vrcurl = await putVrcUrl(pool, {type: "captions", videoId: video.id});
}
delete video.thumbnailUrl;
delete video.channel.iconUrl;
data.results.push(video);
}
if (continuationData) data.nextpage_vrcurl = await putVrcUrl(pool, {
type: "ytContinuation",
continuationData,

8
app.js

@ -4,6 +4,7 @@ import send from "koa-send";
import { cachedVRCYoutubeSearch } from "./VRCYoutubeSearch.js"
import { getImageSheet } from "./imagesheet.js";
import { resolveVrcUrl } from "./vrcurl.js";
import { getVideoCaptionsCached } from "./captions.js";
import { stringToBoolean } from "./util.js";
export var app = new Koa();
@ -26,7 +27,8 @@ router.get("/search", async ctx => {
var options = {
thumbnails: stringToBoolean(ctx.query.thumbnails),
icons: stringToBoolean(ctx.query.icons)
icons: stringToBoolean(ctx.query.icons),
captions: stringToBoolean(ctx.query.captions)
};
ctx.body = await cachedVRCYoutubeSearch(ctx.query.pool, query, options);
@ -55,6 +57,9 @@ router.get("/vrcurl/:pool/:num", async ctx => {
case "ytContinuation":
ctx.body = await cachedVRCYoutubeSearch(ctx.params.pool, dest.continuationData, dest.options);
break;
case "captions":
ctx.body = await getVideoCaptionsCached(dest.videoId);
break;
default:
ctx.status = 500;
}
@ -90,7 +95,6 @@ app.use(async (ctx, next) => {
}
})(ctx.body);
ctx.body = JSON.stringify(ctx.body).replaceAll("\\\\u", "\\u");
ctx.type = "json";
});
app.use(router.routes());

43
captions.js Normal file

@ -0,0 +1,43 @@
import { XMLParser } from "fast-xml-parser";
var xmlParser = new XMLParser({
ignoreAttributes: false
});
async function getVideoData(videoId) {
var html = await fetch(`https://www.youtube.com/watch?v=${videoId}`).then(res => res.text());
var ytInitialPlayerResponse = html.match(/var ytInitialPlayerResponse = ({.*});/)[1];
ytInitialPlayerResponse = JSON.parse(ytInitialPlayerResponse);
return ytInitialPlayerResponse;
}
async function getVideoCaptions(videoId) {
var ytInitialPlayerResponse = await getVideoData(videoId);
if (!ytInitialPlayerResponse.captions) return [];
var captionTracks = ytInitialPlayerResponse.captions.playerCaptionsTracklistRenderer.captionTracks;
captionTracks = await Promise.all(captionTracks.map(captionTrack => (async () => {
var xml = await fetch(captionTrack.baseUrl).then(res => res.text());
var parsed = xmlParser.parse(xml);
var lines = parsed.transcript.text.map(({ "#text": text, "@_start": start, "@_dur": dur }) => ({ start: Number(start), dur: Number(dur), text }));
return {
name: captionTrack.name.simpleText,
id: captionTrack.vssId,
lines
};
})().catch(error => console.error(error.stack))));
return captionTracks;
}
var cache = {};
export async function getVideoCaptionsCached(videoId) {
if (!cache[videoId]) {
cache[videoId] = getVideoCaptions(videoId);
setTimeout(() => {
delete cache[videoId];
}, 1000*60*60*6); // 6 hours
}
return await cache[videoId];
}

90
package-lock.json generated

@ -8,10 +8,10 @@
"@keyv/sqlite": "^3.6.6",
"@koa/router": "^12.0.1",
"canvas": "^2.11.2",
"fast-xml-parser": "^4.3.4",
"keyv": "^4.5.4",
"koa": "^2.14.2",
"koa-send": "^5.0.1",
"node-fetch": "^3.3.2"
"koa-send": "^5.0.1"
}
},
"node_modules/@gar/promisify": {
@ -362,14 +362,6 @@
"node": ">= 0.8"
}
},
"node_modules/data-uri-to-buffer": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",
"integrity": "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==",
"engines": {
"node": ">= 12"
}
},
"node_modules/debug": {
"version": "4.3.4",
"resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz",
@ -491,37 +483,25 @@
"resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz",
"integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow=="
},
"node_modules/fetch-blob": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.2.0.tgz",
"integrity": "sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==",
"node_modules/fast-xml-parser": {
"version": "4.3.4",
"resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-4.3.4.tgz",
"integrity": "sha512-utnwm92SyozgA3hhH2I8qldf2lBqm6qHOICawRNRFu1qMe3+oqr+GcXjGqTmXTMGE5T4eC03kr/rlh5C1IRdZA==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/jimmywarting"
"url": "https://github.com/sponsors/NaturalIntelligence"
},
{
"type": "paypal",
"url": "https://paypal.me/jimmywarting"
"url": "https://paypal.me/naturalintelligence"
}
],
"dependencies": {
"node-domexception": "^1.0.0",
"web-streams-polyfill": "^3.0.3"
"strnum": "^1.0.5"
},
"engines": {
"node": "^12.20 || >= 14.13"
}
},
"node_modules/formdata-polyfill": {
"version": "4.0.10",
"resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz",
"integrity": "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==",
"dependencies": {
"fetch-blob": "^3.1.2"
},
"engines": {
"node": ">=12.20.0"
"bin": {
"fxparser": "src/cli/cli.js"
}
},
"node_modules/fresh": {
@ -1102,41 +1082,6 @@
"resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-4.3.0.tgz",
"integrity": "sha512-73sE9+3UaLYYFmDsFZnqCInzPyh3MqIwZO9cw58yIqAZhONrrabrYyYe3TuIqtIiOuTXVhsGau8hcrhhwSsDIQ=="
},
"node_modules/node-domexception": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
"integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/jimmywarting"
},
{
"type": "github",
"url": "https://paypal.me/jimmywarting"
}
],
"engines": {
"node": ">=10.5.0"
}
},
"node_modules/node-fetch": {
"version": "3.3.2",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.3.2.tgz",
"integrity": "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==",
"dependencies": {
"data-uri-to-buffer": "^4.0.0",
"fetch-blob": "^3.1.4",
"formdata-polyfill": "^4.0.10"
},
"engines": {
"node": "^12.20.0 || ^14.13.1 || >=16.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/node-fetch"
}
},
"node_modules/node-gyp": {
"version": "8.4.1",
"resolved": "https://registry.npmjs.org/node-gyp/-/node-gyp-8.4.1.tgz",
@ -1595,6 +1540,11 @@
"node": ">=8"
}
},
"node_modules/strnum": {
"version": "1.0.5",
"resolved": "https://registry.npmjs.org/strnum/-/strnum-1.0.5.tgz",
"integrity": "sha512-J8bbNyKKXl5qYcR36TIO8W3mVGVHrmmxsd5PAItGkmyzwJvybiw2IVq5nqd0i4LSNSkB/sx9VHllbfFdr9k1JA=="
},
"node_modules/tar": {
"version": "6.2.0",
"resolved": "https://registry.npmjs.org/tar/-/tar-6.2.0.tgz",
@ -1683,14 +1633,6 @@
"node": ">= 0.8"
}
},
"node_modules/web-streams-polyfill": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.2.1.tgz",
"integrity": "sha512-e0MO3wdXWKrLbL0DgGnUV7WHVuw9OUvL4hjgnPkIeEvESk74gAITi5G606JtZPp39cd8HA9VQzCIvA49LpPN5Q==",
"engines": {
"node": ">= 8"
}
},
"node_modules/webidl-conversions": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",

@ -3,10 +3,13 @@
"@keyv/sqlite": "^3.6.6",
"@koa/router": "^12.0.1",
"canvas": "^2.11.2",
"fast-xml-parser": "^4.3.4",
"keyv": "^4.5.4",
"koa": "^2.14.2",
"koa-send": "^5.0.1",
"node-fetch": "^3.3.2"
"koa-send": "^5.0.1"
},
"engines": {
"node": ">=18.0.0"
},
"type": "module"
}

@ -1,5 +1,3 @@
var fetch = global.fetch || (await import("node-fetch")).default;
export async function searchYouTubeVideos(query) {
var url = `https://www.youtube.com/results?search_query=${encodeURIComponent(query.replaceAll(' ', '+'))}&sp=EgIQAQ%253D%253D`;
@ -69,16 +67,3 @@ function parseVideoRendererData(data) {
}
};
}
/*
var page1 = await searchYouTubeVideos("test video");
console.log("page1", page1);
var page2 = await continueYouTubeVideoSearch(page1.continuationData);
console.log("page2", page2);
var page3 = await continueYouTubeVideoSearch(page2.continuationData);
console.log("page3", page3);
console.log("videos", [...page1.videos, ...page2.videos, ...page3.videos]);
debugger;
*/

@ -12,6 +12,7 @@
<label>search: <input id="input" type="text" value="nyan cat" /></label>
<label><input id="thumbnails" type="checkbox" checked>thumbnails</label>
<label><input id="icons" type="checkbox" checked>icons</label>
<label><input id="captions" type="checkbox" checked>captions</label>
<button id="start">start</button>
</div>
@ -26,7 +27,7 @@ var lastData;
start.onclick = () => {
output.innerHTML = "";
loadData(`/search?pool=test1000&thumbnails=${thumbnails.checked}&icons=${icons.checked}&input=${encodeURIComponent(input.value)}`);
loadData(`/search?pool=test1000&thumbnails=${thumbnails.checked}&icons=${icons.checked}&captions=${captions.checked}&input=${encodeURIComponent(input.value)}`);
};
nextpage.onclick = () => loadData(`/vrcurl/test1000/${lastData.nextpage_vrcurl}`);