Merge branch 'master' into nginx

fix for missing header
cache prune thread is daemon
2023-07-31 19:35:58 -07:00 · 2023-07-31 18:39:23 -07:00 · 2023-07-28 13:34:20 -07:00 · 2023-07-28 00:37:33 -07:00 · 2023-07-28 02:29:21 -05:00 · 2023-07-23 02:36:00 -05:00
4 changed files with 64 additions and 23 deletions
--- a/README.md
+++ b/README.md
@ -1,8 +1,12 @@
 # u2b.cx

-A YouTube video file resolver thingy via search query, made to let you watch YouTube in Quest VRChat where there aren't really any other options to do so.
+A YouTube search resolver + raw file resolver w/ proxy for Quest VRChat.

-## Features
+- Get the video you want just by typing its name in the URL
+- Video works for both PC and Quest VRChat
+- Proxying avoids random blocks from google's servers
+
+## Technical Features

 - Written in Python to integrate with YoutubeDL (yt-dlp) for fastest performance
 - Multi-threaded for concurrent usage
@ -10,7 +14,8 @@ A YouTube video file resolver thingy via search query, made to let you watch You
 - Limited to one YoutubeDL invocation per IP address
 - Results cached for 5 hours or until expiry found in extracted URL
 - Extracted URLs proxied in Caddy so that they work in all countries
- Errors displayed as a video
+- Errors displayed as a 10 second single-frame video
+- PC VRchat bypassed to save bandwidth (todo: sacrifices consistency)

 ### Planned

@ -21,8 +26,19 @@ A YouTube video file resolver thingy via search query, made to let you watch You

 ### GET `https://u2b.cx/<query>`

-The server will search YouTube for `<query>`, pick the first result, pick the best quality all-in-one MP4 format available, and respond with a 302 redirect to the proxied raw MP4 file.
+The server will search YouTube for `<query>`, pick the first result, pick the best quality all-in-one MP4 format available, and respond with a 302 redirect to the proxied raw MP4 file. If the client is PC VRChat, the server may instead redirect to the YouTube video URL to save bandwidth on the server.
+
+NOTE: query must not start with a dot (.)

 ### GET `https://u2b.cx/id/<video id>`
+### GET `https://u2b.cx/https://www.youtube.com/watch?v=<video id>`
+### GET `https://u2b.cx/https://youtu.be/<video id>`
+### GET `https://u2b.cx/https://www.youtube.com/shorts/<video id>`
+### GET `https://u2b.cx/https://music.youtube.com/watch?v=<video id>`
+### etcetera...

-Bypasses search to serve the video directly.
+Bypasses search to look up the video directly by its id. If the client is PC VRChat, it may be immediately redirected to the YouTube url to save resources on the server.
+
+Regex only matches the start of the string; anything after the 11-char video id is ignored.
+
+Malformed YouTube URLs will be treated as a YouTube search query and YouTube search will probably give what you want.
--- a/caddy/Caddyfile
+++ b/caddy/Caddyfile
@ -1,6 +1,21 @@
-#edit for deployment
-#u2b.cx {
+{
+	log http.log.access {
+		include http.log.access
+		output stdout
+		format formatted "[35m[{ts}][0m [96m[1m{request>remote_ip}[0m [31m{request>headers>X-Forwarded-For}[0m [33m{request>method}[0m [92m{request>host}[32m{request>uri}[0m [97m{status}[0m	[90m{request>headers>User-Agent}[0m [34m{request>headers>Referer}[0m" {
+			time_format "02/Jan/2006:15:04:05-0700"
+		}
+	}
+	log default {
+		exclude http.log.access
+		output stderr
+		format console
+	}
+}
+
 :80 {
+#nl.u2b.cx u2b.cx {
+	log
 	handle_path /proxy/* {
 		@gv path_regexp gvurl ^\/([a-z0-9-]+\.googlevideo\.com)
 		handle @gv {
--- a/caddy/Dockerfile
+++ b/caddy/Dockerfile
@ -0,0 +1,4 @@
+FROM caddy:2.6-builder AS builder
+RUN xcaddy build --with github.com/caddyserver/transform-encoder
+FROM caddy:2.6
+COPY --from=builder /usr/bin/caddy /usr/bin/caddy
--- a/server.py
+++ b/server.py
@ -11,15 +11,6 @@ import logging
 import re
 from textvid import generate_video_from_text

-def get_expire(url):
-	alt_expire = datetime.now() + timedelta(hours=5)
-	if not url: return alt_expire
-	q = parse_qs(urlparse(url).query)
-	expire = q.get('expire')
-	if not expire: return alt_expire
-	expire = datetime.fromtimestamp(int(expire[0])) #this seems to always be +6 hours
-	return min([expire, alt_expire])
-
 ctx_cache = {}
 ips_running_ytdl = []

@ -29,15 +20,15 @@ def cache_prune_loop():
 		for key in ctx_cache:
 			if datetime.now() >= ctx_cache[key]['expire']:
 				del ctx_cache[key]
-Thread(target=cache_prune_loop).start()
+Thread(target=cache_prune_loop, daemon=True).start()

 class Handler(BaseHTTPRequestHandler):
 	def address_string(self):
 		return getattr(self, 'headers', {}).get('X-Forwarded-For', '').split(',')[0] or self.client_address[0]
 	
 	def is_pc_vrchat(self):
-		ua = self.headers.get('User-Agent')
-		ae = self.headers.get('Accept-Encoding')
+		ua = self.headers.get('User-Agent', '')
+		ae = self.headers.get('Accept-Encoding', '')
 		return ua.startswith("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/") and ua.endswith(" Safari/537.36") and ae == "identity"
 	
 	def send_error(self, code, message=""):
@ -55,8 +46,16 @@ class Handler(BaseHTTPRequestHandler):
 			return
 		
 		path = unquote(self.path)
-		match = re.match("\/(?:id\/|(?:https?:\/\/)?(?:(?:www\.)?youtube\.com\/(?:watch\?v=|shorts\/)|youtu\.be\/))([A-Za-z0-9_-]{11})", path)
-		query = match[1] if match else "ytsearch:" + path[1:]
+		match = re.match("\/(?:id\/|(?:https?:\/\/)?(?:(?:www\.|music\.|m\.)?youtube\.com\/(?:watch\?v=|shorts\/)|youtu\.be\/))([A-Za-z0-9_-]{11})", path)
+		if match:
+			if self.is_pc_vrchat():
+				self.send_response(302)
+				self.send_header("Location", "https://www.youtube.com/watch?v=" + match[1])
+				self.end_headers()
+				return
+			query = match[1]
+		else:
+			query = "ytsearch:" + path[1:]

 		ctx = ctx_cache.get(query)

@ -67,7 +66,10 @@ class Handler(BaseHTTPRequestHandler):
 				return
 			try:
 				ips_running_ytdl.append(client_ip)
-				ctx_cache[query] = ctx = {'event': Event()}
+				ctx_cache[query] = ctx = {
+					'event': Event(),
+					'expire': datetime.now() + timedelta(hours=5)
+				}
 				with YoutubeDL() as ydl:
 					info = ydl.extract_info(query, download=False)

@ -86,7 +88,11 @@ class Handler(BaseHTTPRequestHandler):
 					best_format = max(suitable_formats, key=lambda x: x['height'])

 					ctx['url'] = best_format['url']
-					ctx['expire'] = get_expire(best_format['url'])
+
+					expire = parse_qs(urlparse(best_format['url']).query).get('expire', [])[0]
+					if expire:
+						expire = datetime.fromtimestamp(int(expire))
+						if expire < ctx['expire']: ctx['expire'] = expire
 			except Exception as e:
 				logging.exception(e)
 				ctx['exception'] = e
Author	SHA1	Message	Date
Lamp	669ea6a41b	Merge branch 'master' into nginx	2023-07-31 19:35:58 -07:00
Lamp	b2a3c5a048	fix for missing header	2023-07-31 18:39:23 -07:00
Lamp	5a91e654f8	cache prune thread is daemon	2023-07-28 13:34:20 -07:00
Lamp	3a19f8fe72	support music.youtube.com	2023-07-28 00:37:33 -07:00
Lamp	b49918f8a6	Update 'README.md'	2023-07-28 02:29:21 -05:00
Lamp	a79ee967d2	Update 'README.md'	2023-07-23 02:36:00 -05:00
Lamp	cddbbcc866	fix expire	2023-07-22 23:25:56 -07:00
Lamp	a007796147	total bypass from pc vrchat with id	2023-07-22 23:09:56 -07:00
Lamp	83690c4535	caddy formatted log	2023-07-22 21:35:03 -07:00