Merge branch 'master' into nginx

nginx
maybe uses less cpu idk
2023-07-31 19:35:58 -07:00 · 2023-07-22 21:07:26 -07:00
10 changed files with 265 additions and 206 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,3 @@
 .vscode
 __pycache__
 test.mp4
 env
 errors
--- a/README.md
+++ b/README.md
@ -0,0 +1,44 @@
 # u2b.cx
 A YouTube search resolver + raw file resolver w/ proxy for Quest VRChat.
 - Get the video you want just by typing its name in the URL
 - Video works for both PC and Quest VRChat
 - Proxying avoids random blocks from google's servers
 ## Technical Features
 - Written in Python to integrate with YoutubeDL (yt-dlp) for fastest performance
 - Multi-threaded for concurrent usage
 - Requests coalesced to one YoutubeDL invocation per input
 - Limited to one YoutubeDL invocation per IP address
 - Results cached for 5 hours or until expiry found in extracted URL
 - Extracted URLs proxied in Caddy so that they work in all countries
 - Errors displayed as a 10 second single-frame video
 - PC VRchat bypassed to save bandwidth (todo: sacrifices consistency)
 ### Planned
 - Option to get Nth search result (requires deeper integration into YoutubeDL)
 ## Usage
 ### GET `https://u2b.cx/<query>`
 The server will search YouTube for `<query>`, pick the first result, pick the best quality all-in-one MP4 format available, and respond with a 302 redirect to the proxied raw MP4 file. If the client is PC VRChat, the server may instead redirect to the YouTube video URL to save bandwidth on the server.
 NOTE: query must not start with a dot (.)
 ### GET `https://u2b.cx/id/<video id>`
 ### GET `https://u2b.cx/https://www.youtube.com/watch?v=<video id>`
 ### GET `https://u2b.cx/https://youtu.be/<video id>`
 ### GET `https://u2b.cx/https://www.youtube.com/shorts/<video id>`
 ### GET `https://u2b.cx/https://music.youtube.com/watch?v=<video id>`
 ### etcetera...
 Bypasses search to look up the video directly by its id. If the client is PC VRChat, it may be immediately redirected to the YouTube url to save resources on the server.
 Regex only matches the start of the string; anything after the 11-char video id is ignored.
 Malformed YouTube URLs will be treated as a YouTube search query and YouTube search will probably give what you want.
--- a/caddy/Caddyfile
+++ b/caddy/Caddyfile
@ -13,14 +13,9 @@
 	}
 }
-{$CADDY_SITE:":80"} {
+:80 {
 #nl.u2b.cx u2b.cx {
 	log
 	tls {
 		dns porkbun {
 			api_key {env.PORKBUN_API_KEY}
 			api_secret_key {env.PORKBUN_API_SECRET}
 		}
 	}
 	handle_path /proxy/* {
 		@gv path_regexp gvurl ^\/([a-z0-9-]+\.googlevideo\.com)
 		handle @gv {
@ -50,7 +45,7 @@ Disallow: /
 			@notget not method GET
 			respond @notget 403
-			reverse_proxy http://127.0.0.1:8080
+			reverse_proxy http://app:8080
 		}
 	}
 }
--- a/caddy/Dockerfile
+++ b/caddy/Dockerfile
@ -1,6 +1,4 @@
 FROM caddy:2.6-builder AS builder
-RUN xcaddy build \
+RUN xcaddy build --with github.com/caddyserver/transform-encoder
 	 --with github.com/caddyserver/transform-encoder \
 	 --with github.com/caddy-dns/porkbun
 FROM caddy:2.6
 COPY --from=builder /usr/bin/caddy /usr/bin/caddy
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -8,20 +8,29 @@ services:
      - ./:/app/
    working_dir: /app/
    environment:
      - ADDRESS=127.0.0.1
      - PORT=8080
      - PROXY=/proxy/
    network_mode: host
    command: python server.py
-  caddy:
+#  caddy:
-    build: caddy
+#    image: caddy:2.6
 #    restart: always
 #    ports:
 #      - "80:80"
 #      - "443:443"
 #      - "443:443/udp"
 #    volumes:
 #      - ./Caddyfile:/etc/caddy/Caddyfile
 #      - caddy_data:/data
 #      - caddy_config:/config
  nginx:
    image: nginx:1.25
    restart: always
    network_mode: host
    volumes:
-      - ./caddy/Caddyfile:/etc/caddy/Caddyfile
+      - ./nginx.conf:/etc/nginx/nginx.conf:ro
-      - caddy_data:/data
+    ports:
-      - caddy_config:/config
+      - "80:80"
-    env_file: env
+      - "443:443"
      - "443:443/udp"
 volumes:
  caddy_data:
  caddy_config:
--- a/nginx.conf
+++ b/nginx.conf
@ -0,0 +1,56 @@
 user  nginx;
 worker_processes  auto;
 error_log  /var/log/nginx/error.log notice;
 pid        /var/run/nginx.pid;
 events {
    worker_connections  1024;
 }
 http {
 	log_format main '$time_local $remote_addr "$request" $status "$http_user_agent"';
 	access_log /var/log/nginx/access.log main;
 	resolver 8.8.8.8 ipv6=off;#until I can get ipv6 on the new host
 	server {
 		listen 80;
 		location /proxy/ {
 			limit_except GET { deny all; }
 			location ~^/proxy/([a-z0-9-]+)\.googlevideo\.com/videoplayback {
 				#return 200 "$uri\n\n$request_uri\n\n$query_string\n\nhttps://$1/$2";
 				proxy_pass https://$1.googlevideo.com/videoplayback?$query_string;
 				#proxy_redirect ~https://([a-z0-9-]+).googlevideo.com/ /proxy/$1.googlevideo.com/;
 				proxy_redirect https:// /proxy/;
 			}
 			return 403;
 		}
 		location = / {
 			return 301 https://www.u2b.cx/;
 		}
 		location = /favicon.ico {
 			return 404;
 		}
 		location /. {
 			return 403;
 		}
 		location = /robots.txt {
 			add_header Content-Type text/plain;
 			return 200 "User-agent: *\nDisallow: /\n";
 		}
 		if ($request_method != GET) {
 			return 403;
 		}
 		location / {
 			proxy_http_version 1.1;
 			proxy_set_header Connection "";
 			proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
 			proxy_set_header X-Forwarded-Host $host;
 			proxy_set_header X-Forwarded-Proto $scheme;
 			proxy_pass http://app:8080;
 		}
 	}
 }
--- a/requirements.txt
+++ b/requirements.txt
--- a/server.py
+++ b/server.py
@ -6,25 +6,31 @@ from urllib.parse import unquote, urlparse, parse_qs
 from threading import Event, Thread
 from datetime import datetime, timedelta
 from time import sleep
-from os import environ, makedirs, stat
+from os import environ
 import logging
 import re
-from ffmpeg import FFmpeg
+from textvid import generate_video_from_text
 import textwrap
 from pathlib import Path
 from hashlib import sha256
 from shutil import copyfileobj
 from math import ceil
-if 'DEBUG' in environ:
+ctx_cache = {}
-	logging.basicConfig(level=logging.DEBUG)
+ips_running_ytdl = []
 def cache_prune_loop():
 	while True:
 		sleep(3600)
 		for key in ctx_cache:
 			if datetime.now() >= ctx_cache[key]['expire']:
 				del ctx_cache[key]
 Thread(target=cache_prune_loop, daemon=True).start()
 class Ratelimit(Exception): pass
 class CachedException(Exception): pass
 class Handler(BaseHTTPRequestHandler):
 	def address_string(self):
 		return getattr(self, 'headers', {}).get('X-Forwarded-For', '').split(',')[0] or self.client_address[0]
 	def is_pc_vrchat(self):
 		ua = self.headers.get('User-Agent', '')
 		ae = self.headers.get('Accept-Encoding', '')
 		return ua.startswith("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/") and ua.endswith(" Safari/537.36") and ae == "identity"
 	def send_error(self, code, message=""):
 		body = bytes(message, "utf-8")
 		self.send_response(code)
@ -33,185 +39,96 @@ class Handler(BaseHTTPRequestHandler):
 		self.end_headers()
 		self.wfile.write(body)
 	def send_error_video(self, text: str):
 		makedirs("errors", exist_ok=True)
 		hash = sha256(bytes(text, "utf8")).hexdigest()
 		file = Path(f"errors/{hash}.mp4")
 		if not file.exists():
 			text = re.sub("(\x9B|\x1B\[)[0-?]*[ -\/]*[@-~]", '', text)
 			text = text.replace("\\", "\\\\").replace('"', '""').replace("'", "''").replace("%", "\\%").replace(":", "\\:")
 			text = textwrap.fill(text, 90)
 			peg = FFmpeg().option("y").input("bg.png", {
 				"framerate": "0.1"
 			}).output(str(file), {
 				"f": "mp4",
 				"t": "10",
 				"c:v": "libx264",
 				"pix_fmt": "yuv420p",
 				"vf": "drawtext=font=monospace:fontsize=24:x=10:y=10:text='"+text+"':"
 			})
 			@peg.on("start")
 			def on_start(arguments):
 				logging.debug("cmd:" + ' '.join(arguments))
 			@peg.on("stderr")
 			def on_stderr(line):
 				logging.debug("stderr:" + line)
 			peg.execute()
 		with file.open('rb') as f:
 			fs = stat(f.fileno())
 			self.send_response(200)
 			self.send_header("Content-Type", "video/mp4")
 			self.send_header("Content-Length", str(fs.st_size))
 			self.end_headers()
 			copyfileobj(f, self.wfile)
 	def do_GET(self):
-		try:
+		# block other bot junk in reverse proxy
-			if self.path in ["/", "/favicon.ico"] or self.path.startswith("/."):
+		if self.path in ["/", "/favicon.ico"] or self.path.startswith("/."):
-				self.send_error(404)
+			self.send_error(404)
 				return
 			path = unquote(self.path)
 			id_match = re.match("\/(?:id\/|(?:https?:\/\/)?(?:(?:www\.|music\.|m\.)?youtube\.com\/(?:watch\?v=|shorts\/|live\/)|youtu\.be\/))([A-Za-z0-9_-]{11})", path)
 			if id_match:
 				video_id = id_match[1]
 			else:
 				search_match = re.match("^\/(.+?)(?:\/(\d*))?$", path)
 				if not search_match:
 					self.send_error(404)
 					return
 				search_query = search_match[1]
 				search_index = int(search_match[2]) if search_match[2] and search_match[2].isdigit() else 1
 				search_index = max(min(search_index, 100), 1)
 				video_id = ytdl_search_to_id(self, search_query, search_index)
 			self.send_response(302)
 			self.send_header("Location", f"https://www.youtube.com/watch?v={video_id}")
 			self.end_headers()
 			#half this code now defunct
 			return
-			video_url = ytdl_resolve_mp4_url(self, video_id)
+		path = unquote(self.path)
 		match = re.match("\/(?:id\/|(?:https?:\/\/)?(?:(?:www\.|music\.|m\.)?youtube\.com\/(?:watch\?v=|shorts\/)|youtu\.be\/))([A-Za-z0-9_-]{11})", path)
 		if match:
 			if self.is_pc_vrchat():
 				self.send_response(302)
 				self.send_header("Location", "https://www.youtube.com/watch?v=" + match[1])
 				self.end_headers()
 				return
 			query = match[1]
 		else:
 			query = "ytsearch:" + path[1:]
-			if 'PROXY' in environ:
+		ctx = ctx_cache.get(query)
 				video_url = environ['PROXY'] + video_url.replace("https://",'')
 			self.send_response(302)
 			self.send_header("Location", video_url)
 			self.end_headers()
 		except Ratelimit:
 			self.send_error(429)
 		except CachedException as e:
 			self.send_error_video(str(e))
 		except Exception as e:
 			logging.exception(e)
 			self.send_error_video(str(e))
 		if not ctx or 'expire' in ctx and datetime.now() >= ctx['expire']:
 			client_ip = self.address_string()
 			if client_ip in ips_running_ytdl:
 				self.send_error(429)
 				return
 			try:
 				ips_running_ytdl.append(client_ip)
 				ctx_cache[query] = ctx = {
 					'event': Event(),
 					'expire': datetime.now() + timedelta(hours=5)
 				}
 				with YoutubeDL() as ydl:
 					info = ydl.extract_info(query, download=False)
-ips_running_ytdl = []
+					selection = info
-def invoke_youtubedl(self: Handler, input: str) -> dict:
+					if "entries" in info:
-	ip = self.address_string()
+						if not info["entries"]:
-	if ip in ips_running_ytdl:
+							raise Exception("ERROR: No videos found!")
-		raise Ratelimit()
+						else:
-	ips_running_ytdl.append(ip)
+							selection = info["entries"][0]
 	try:
 		with YoutubeDL({'extractor_args': {'youtube': {'skip': ['dash', 'hls']}}}) as ydl:
 			return ydl.extract_info(input, download=False, process=False)
 	finally:
 		ips_running_ytdl.remove(ip)
 					ctx['id'] = selection['id']
-search_cache = {}
+					suitable_formats = list(filter(lambda x: x['ext'] == "mp4" and x['vcodec'] != 'none' and x['acodec'] != 'none', selection["formats"]))
-def ytdl_search_to_id(self: Handler, query: str, index: int) -> str:
+					if not suitable_formats:
-	ctx = search_cache.get(query)
+						raise Exception(f"ERROR: {selection['id']}: No suitable formats of this video available!")
-	if ctx:
+					best_format = max(suitable_formats, key=lambda x: x['height'])
 		ctx['event'].wait(60)
 		if 'error' in ctx:
 			raise CachedException(ctx['error'])
 		results = ctx.get('results')
 	else:
 		results = None
-	if results == None or results['count'] < index or datetime.now() >= ctx['expires_at']:
+					ctx['url'] = best_format['url']
 		search_cache[query] = ctx = {
 			'event': Event(),
 			'expires_at': datetime.now() + timedelta(hours=5)
 		}
 		try:
 			count = ceil(index/10)*10
 			info = invoke_youtubedl(self, f"ytsearch{count}:{query}")
 			entries = list(info['entries'])
 			if not entries:
 				raise Exception("ERROR: No results!")
 			ids = [video['id'] for video in entries]
 			ctx['results'] = results = {'ids': ids, 'count': count}
 		except Exception as e:
 			ctx['error'] = str(e)
 			raise
 		finally:
 			ctx['event'].set()
-	return results['ids'][min(index, len(results['ids'])) - 1]
+					expire = parse_qs(urlparse(best_format['url']).query).get('expire', [])[0]
 					if expire:
 						expire = datetime.fromtimestamp(int(expire))
 						if expire < ctx['expire']: ctx['expire'] = expire
 			except Exception as e:
 				logging.exception(e)
 				ctx['exception'] = e
 				ctx['error_vid'] = generate_video_from_text(re.sub("(\x9B|\x1B\[)[0-?]*[ -\/]*[@-~]", '', str(e)))
 			finally:
 				ips_running_ytdl.remove(client_ip)
 				ctx['event'].set()
 		elif 'url' not in ctx:
 			ctx['event'].wait(60)
 		if self.is_pc_vrchat():
 			if ctx.get('id'):
 				self.send_response(302)
 				self.send_header("Location", "https://www.youtube.com/watch?v=" + ctx['id'])
 				self.end_headers()
 				return
-resolve_cache = {}
+		if not ctx.get('url'):
-def ytdl_resolve_mp4_url(self: Handler, input: str) -> str:
+			if 'exception' in ctx:
-	ctx = resolve_cache.get(input)
+				if 'error_vid' in ctx:
-	if ctx and datetime.now() <= ctx['expires_at']:
+					self.send_response(200)
-		ctx['event'].wait(60)
+					self.send_header("Content-Type", "video/mp4")
-		if 'error' in ctx:
+					self.send_header("Content-Length", str(len(ctx['error_vid'])))
-			raise CachedException(ctx['error'])
+					self.end_headers()
-		return ctx['url']
+					self.wfile.write(ctx['error_vid'])
-	
+				else:
-	resolve_cache[input] = ctx = {
+					self.send_error(500, message=str(ctx['exception']))
 		'event': Event(),
 		'expires_at': datetime.now() + timedelta(hours=5)
 	}
 	try:
 		info = invoke_youtubedl(self, input)
 		selection = info
 		if "entries" in info:
 			if not info["entries"]:
 				raise Exception("ERROR: No video found!")
 			else:
-				selection = info["entries"][0]
+				self.send_error(404)
-
+		else:
-		suitable_formats = list(filter(lambda x: x['ext'] == "mp4" and x['vcodec'] != 'none' and x['acodec'] != 'none', selection["formats"]))
+			url = ctx['url']
-		if not suitable_formats:
+			if 'PROXY' in environ:
-			raise Exception(f"ERROR: {selection['id']}: No suitable formats of this video available!")
+				url = environ['PROXY'] + url.replace("https://",'')
-		best_format = max(suitable_formats, key=lambda x: x['height'])
+			self.send_response(302)
-
+			self.send_header("Location", url)
-		ctx['url'] = url = best_format['url']
+			self.end_headers()
 		try:
 			expire = parse_qs(urlparse(url).query).get('expire', [])[0]
 			if expire:
 				expire = datetime.fromtimestamp(int(expire))
 				if expire < ctx['expires_at']:
 					ctx['expires_at'] = expire
 		except Exception as e:
 			logging.exception("failed parsing expire", e)
 	except Exception as e:
 		ctx['error'] = str(e)
 		raise
 	finally:
 		ctx['event'].set()
 	return url
 def cache_prune_loop():
 	while True:
 		sleep(3600)
 		for key in list(search_cache.keys()):
 			if datetime.now() >= search_cache[key]['expires_at']:
 				del search_cache[key]
 		for key in list(resolve_cache.keys()):
 			if datetime.now() >= resolve_cache[key]['expires_at']:
 				del resolve_cache[key]
 Thread(target=cache_prune_loop, daemon=True).start()
 with ThreadingHTTPServer((environ.get('ADDRESS', ''), int(environ.get('PORT', 80))), Handler) as server:
 	server.serve_forever()
--- a/textvid.py
+++ b/textvid.py
@ -0,0 +1,44 @@
 from ffmpeg import FFmpeg
 import textwrap
 from tempfile import mktemp
 from os import remove
 def generate_video_from_text(text, test=False) -> bytes:
 	""" generate a single-frame ten-second mp4 displaying the text """
 	text = text.replace("\\", "\\\\").replace('"', '""').replace("'", "''").replace("%", "\\%").replace(":", "\\:")
 	text = textwrap.fill(text, 90)
 	file = mktemp()
 	peg = FFmpeg().option("y").input("bg.png", {
 		"framerate": "0.1"
 	}).output(file, {
 		"f": "mp4",
 		"t": "10",
 		"c:v": "libx264",
 		"pix_fmt": "yuv420p",
 		"vf": "drawtext=font=monospace:fontsize=24:x=10:y=10:text='"+text+"':"
 	})
 	if test:
 		@peg.on("start")
 		def on_start(arguments): print("cmd:", ' '.join(arguments))
 		@peg.on("stderr")
 		def on_stderr(line): print("stderr:", line)
 	peg.execute()
 	fp = open(file, "rb")
 	data = fp.read()
 	fp.close()
 	remove(file)
 	return data
 if __name__ == "__main__":
 	print("test")
 	v = generate_video_from_text("""Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut sem viverra aliquet eget sit amet. Senectus et netus et malesuada fames ac. Gravida quis blandit turpis cursus in hac habitasse platea. Sed ullamcorper morbi tincidunt ornare massa eget egestas purus. Tristique risus nec feugiat in. Malesuada bibendum arcu vitae elementum curabitur vitae nunc sed velit. Porta lorem mollis aliquam ut porttitor leo a. Tellus rutrum tellus pellentesque eu tincidunt. Enim diam vulputate ut pharetra sit amet. Platea dictumst vestibulum rhoncus est. Sed sed risus pretium quam vulputate dignissim suspendisse. Viverra maecenas accumsan lacus vel facilisis volutpat est velit egestas. Lorem ipsum dolor sit amet consectetur. Netus et malesuada fames ac turpis egestas integer eget. Tellus elementum sagittis vitae et leo duis ut. Ipsum a arcu cursus vitae. Amet aliquam id diam maecenas ultricies mi. Mattis vulputate enim nulla aliquet porttitor lacus luctus accumsan. Magna ac placerat vestibulum lectus mauris ultrices eros in.
 Dui sapien eget mi proin sed libero enim sed faucibus. Hac habitasse platea dictumst quisque sagittis purus sit. Mi eget mauris pharetra et ultrices neque ornare. Sagittis aliquam malesuada bibendum arcu vitae elementum curabitur vitae. Eget arcu dictum varius duis. Purus in massa tempor nec feugiat nisl pretium. Ipsum nunc aliquet bibendum enim facilisis gravida neque convallis. Adipiscing diam donec adipiscing tristique risus. Pulvinar neque laoreet suspendisse interdum consectetur libero id faucibus. Non quam lacus suspendisse faucibus.
 Sed libero enim sed faucibus. Ut etiam sit amet nisl purus in mollis nunc sed. Cursus eget nunc scelerisque viverra mauris in aliquam sem fringilla. Eget aliquet nibh praesent tristique magna sit amet purus. Dui accumsan sit amet nulla facilisi morbi tempus. Lacus laoreet non curabitur gravida. Mi eget mauris pharetra et ultrices neque. Volutpat est velit egestas dui id ornare arcu odio. Porttitor lacus luctus accumsan tortor posuere ac. Morbi quis commodo odio aenean. Accumsan in nisl nisi scelerisque eu. Tincidunt dui ut ornare lectus sit amet est placerat in. Libero enim sed faucibus turpis in eu mi bibendum neque. At lectus urna duis convallis convallis.
 Vel risus commodo viverra maecenas accumsan lacus. Mauris pharetra et ultrices neque ornare aenean euismod elementum. Non enim praesent elementum facilisis leo. Amet massa vitae tortor condimentum lacinia. Ornare aenean euismod elementum nisi quis eleifend. Diam donec adipiscing tristique risus nec. Volutpat diam ut venenatis tellus. Mauris nunc congue nisi vitae. Sit amet nisl suscipit adipiscing bibendum est ultricies integer quis. Turpis massa tincidunt dui ut ornare lectus sit amet. Libero enim sed faucibus turpis in. Sit amet porttitor eget dolor morbi non arcu risus quis. Sem integer vitae justo eget magna fermentum iaculis eu. Mattis molestie a iaculis at. Amet volutpat consequat mauris nunc congue. Et tortor at risus viverra adipiscing at in tellus integer. Amet mattis vulputate enim nulla.
 Dignissim convallis aenean et tortor. Vitae congue eu consequat ac felis donec et odio. Risus at ultrices mi tempus imperdiet. Amet massa vitae tortor condimentum lacinia quis. Consectetur adipiscing elit ut aliquam purus. Integer quis auctor elit sed vulputate mi sit amet. Tellus id interdum velit laoreet. Sed risus ultricies tristique nulla aliquet. Fermentum dui faucibus in ornare quam. Lobortis elementum nibh tellus molestie nunc non blandit. Amet dictum sit amet justo donec. Iaculis urna id volutpat lacus.""", True)
 	with open("test.mp4", "wb") as f:
 		f.write(v)
--- a/u2b.cx.service
+++ b/u2b.cx.service
@ -7,11 +7,9 @@ After=network.target
 User=u2b
 Group=u2b
 WorkingDirectory=/srv/u2b.cx/
-Environment=ADDRESS=127.0.0.1 PORT=52482
+Environment=ADDRESS=127.29.151.200 PORT=52482 PROXY=https://proxy.u2b.cx/
-ExecStart=/usr/bin/python3.11 server.py
+ExecStart=/usr/bin/python3.9 server.py
 MemoryMax=1G
 LimitNOFILE=262144
 Restart=always
 [Install]
 WantedBy=multi-user.target
Author	SHA1	Message	Date
Lamp	669ea6a41b	Merge branch 'master' into nginx	2023-07-31 19:35:58 -07:00
Lamp	8057cdb44d	nginx maybe uses less cpu idk	2023-07-22 21:07:26 -07:00