Compare commits

..

2 Commits

Author SHA1 Message Date
669ea6a41b Merge branch 'master' into nginx 2023-07-31 19:35:58 -07:00
8057cdb44d nginx
maybe uses less cpu idk
2023-07-22 21:07:26 -07:00
10 changed files with 265 additions and 206 deletions

2
.gitignore vendored
View File

@ -1,5 +1,3 @@
.vscode .vscode
__pycache__ __pycache__
test.mp4 test.mp4
env
errors

44
README.md Normal file
View File

@ -0,0 +1,44 @@
# u2b.cx
A YouTube search resolver + raw file resolver w/ proxy for Quest VRChat.
- Get the video you want just by typing its name in the URL
- Video works for both PC and Quest VRChat
- Proxying avoids random blocks from google's servers
## Technical Features
- Written in Python to integrate with YoutubeDL (yt-dlp) for fastest performance
- Multi-threaded for concurrent usage
- Requests coalesced to one YoutubeDL invocation per input
- Limited to one YoutubeDL invocation per IP address
- Results cached for 5 hours or until expiry found in extracted URL
- Extracted URLs proxied in Caddy so that they work in all countries
- Errors displayed as a 10 second single-frame video
- PC VRchat bypassed to save bandwidth (todo: sacrifices consistency)
### Planned
- Option to get Nth search result (requires deeper integration into YoutubeDL)
## Usage
### GET `https://u2b.cx/<query>`
The server will search YouTube for `<query>`, pick the first result, pick the best quality all-in-one MP4 format available, and respond with a 302 redirect to the proxied raw MP4 file. If the client is PC VRChat, the server may instead redirect to the YouTube video URL to save bandwidth on the server.
NOTE: query must not start with a dot (.)
### GET `https://u2b.cx/id/<video id>`
### GET `https://u2b.cx/https://www.youtube.com/watch?v=<video id>`
### GET `https://u2b.cx/https://youtu.be/<video id>`
### GET `https://u2b.cx/https://www.youtube.com/shorts/<video id>`
### GET `https://u2b.cx/https://music.youtube.com/watch?v=<video id>`
### etcetera...
Bypasses search to look up the video directly by its id. If the client is PC VRChat, it may be immediately redirected to the YouTube url to save resources on the server.
Regex only matches the start of the string; anything after the 11-char video id is ignored.
Malformed YouTube URLs will be treated as a YouTube search query and YouTube search will probably give what you want.

View File

@ -13,14 +13,9 @@
} }
} }
{$CADDY_SITE:":80"} { :80 {
#nl.u2b.cx u2b.cx {
log log
tls {
dns porkbun {
api_key {env.PORKBUN_API_KEY}
api_secret_key {env.PORKBUN_API_SECRET}
}
}
handle_path /proxy/* { handle_path /proxy/* {
@gv path_regexp gvurl ^\/([a-z0-9-]+\.googlevideo\.com) @gv path_regexp gvurl ^\/([a-z0-9-]+\.googlevideo\.com)
handle @gv { handle @gv {
@ -50,7 +45,7 @@ Disallow: /
@notget not method GET @notget not method GET
respond @notget 403 respond @notget 403
reverse_proxy http://127.0.0.1:8080 reverse_proxy http://app:8080
} }
} }
} }

View File

@ -1,6 +1,4 @@
FROM caddy:2.6-builder AS builder FROM caddy:2.6-builder AS builder
RUN xcaddy build \ RUN xcaddy build --with github.com/caddyserver/transform-encoder
--with github.com/caddyserver/transform-encoder \
--with github.com/caddy-dns/porkbun
FROM caddy:2.6 FROM caddy:2.6
COPY --from=builder /usr/bin/caddy /usr/bin/caddy COPY --from=builder /usr/bin/caddy /usr/bin/caddy

View File

@ -8,20 +8,29 @@ services:
- ./:/app/ - ./:/app/
working_dir: /app/ working_dir: /app/
environment: environment:
- ADDRESS=127.0.0.1
- PORT=8080 - PORT=8080
- PROXY=/proxy/ - PROXY=/proxy/
network_mode: host
command: python server.py command: python server.py
caddy: # caddy:
build: caddy # image: caddy:2.6
# restart: always
# ports:
# - "80:80"
# - "443:443"
# - "443:443/udp"
# volumes:
# - ./Caddyfile:/etc/caddy/Caddyfile
# - caddy_data:/data
# - caddy_config:/config
nginx:
image: nginx:1.25
restart: always restart: always
network_mode: host
volumes: volumes:
- ./caddy/Caddyfile:/etc/caddy/Caddyfile - ./nginx.conf:/etc/nginx/nginx.conf:ro
- caddy_data:/data ports:
- caddy_config:/config - "80:80"
env_file: env - "443:443"
- "443:443/udp"
volumes: volumes:
caddy_data: caddy_data:
caddy_config: caddy_config:

56
nginx.conf Normal file
View File

@ -0,0 +1,56 @@
user nginx;
worker_processes auto;
error_log /var/log/nginx/error.log notice;
pid /var/run/nginx.pid;
events {
worker_connections 1024;
}
http {
log_format main '$time_local $remote_addr "$request" $status "$http_user_agent"';
access_log /var/log/nginx/access.log main;
resolver 8.8.8.8 ipv6=off;#until I can get ipv6 on the new host
server {
listen 80;
location /proxy/ {
limit_except GET { deny all; }
location ~^/proxy/([a-z0-9-]+)\.googlevideo\.com/videoplayback {
#return 200 "$uri\n\n$request_uri\n\n$query_string\n\nhttps://$1/$2";
proxy_pass https://$1.googlevideo.com/videoplayback?$query_string;
#proxy_redirect ~https://([a-z0-9-]+).googlevideo.com/ /proxy/$1.googlevideo.com/;
proxy_redirect https:// /proxy/;
}
return 403;
}
location = / {
return 301 https://www.u2b.cx/;
}
location = /favicon.ico {
return 404;
}
location /. {
return 403;
}
location = /robots.txt {
add_header Content-Type text/plain;
return 200 "User-agent: *\nDisallow: /\n";
}
if ($request_method != GET) {
return 403;
}
location / {
proxy_http_version 1.1;
proxy_set_header Connection "";
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_pass http://app:8080;
}
}
}

Binary file not shown.

231
server.py
View File

@ -6,25 +6,31 @@ from urllib.parse import unquote, urlparse, parse_qs
from threading import Event, Thread from threading import Event, Thread
from datetime import datetime, timedelta from datetime import datetime, timedelta
from time import sleep from time import sleep
from os import environ, makedirs, stat from os import environ
import logging import logging
import re import re
from ffmpeg import FFmpeg from textvid import generate_video_from_text
import textwrap
from pathlib import Path
from hashlib import sha256
from shutil import copyfileobj
from math import ceil
if 'DEBUG' in environ: ctx_cache = {}
logging.basicConfig(level=logging.DEBUG) ips_running_ytdl = []
def cache_prune_loop():
while True:
sleep(3600)
for key in ctx_cache:
if datetime.now() >= ctx_cache[key]['expire']:
del ctx_cache[key]
Thread(target=cache_prune_loop, daemon=True).start()
class Ratelimit(Exception): pass
class CachedException(Exception): pass
class Handler(BaseHTTPRequestHandler): class Handler(BaseHTTPRequestHandler):
def address_string(self): def address_string(self):
return getattr(self, 'headers', {}).get('X-Forwarded-For', '').split(',')[0] or self.client_address[0] return getattr(self, 'headers', {}).get('X-Forwarded-For', '').split(',')[0] or self.client_address[0]
def is_pc_vrchat(self):
ua = self.headers.get('User-Agent', '')
ae = self.headers.get('Accept-Encoding', '')
return ua.startswith("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/") and ua.endswith(" Safari/537.36") and ae == "identity"
def send_error(self, code, message=""): def send_error(self, code, message=""):
body = bytes(message, "utf-8") body = bytes(message, "utf-8")
self.send_response(code) self.send_response(code)
@ -33,185 +39,96 @@ class Handler(BaseHTTPRequestHandler):
self.end_headers() self.end_headers()
self.wfile.write(body) self.wfile.write(body)
def send_error_video(self, text: str):
makedirs("errors", exist_ok=True)
hash = sha256(bytes(text, "utf8")).hexdigest()
file = Path(f"errors/{hash}.mp4")
if not file.exists():
text = re.sub("(\x9B|\x1B\[)[0-?]*[ -\/]*[@-~]", '', text)
text = text.replace("\\", "\\\\").replace('"', '""').replace("'", "''").replace("%", "\\%").replace(":", "\\:")
text = textwrap.fill(text, 90)
peg = FFmpeg().option("y").input("bg.png", {
"framerate": "0.1"
}).output(str(file), {
"f": "mp4",
"t": "10",
"c:v": "libx264",
"pix_fmt": "yuv420p",
"vf": "drawtext=font=monospace:fontsize=24:x=10:y=10:text='"+text+"':"
})
@peg.on("start")
def on_start(arguments):
logging.debug("cmd:" + ' '.join(arguments))
@peg.on("stderr")
def on_stderr(line):
logging.debug("stderr:" + line)
peg.execute()
with file.open('rb') as f:
fs = stat(f.fileno())
self.send_response(200)
self.send_header("Content-Type", "video/mp4")
self.send_header("Content-Length", str(fs.st_size))
self.end_headers()
copyfileobj(f, self.wfile)
def do_GET(self): def do_GET(self):
try: # block other bot junk in reverse proxy
if self.path in ["/", "/favicon.ico"] or self.path.startswith("/."): if self.path in ["/", "/favicon.ico"] or self.path.startswith("/."):
self.send_error(404) self.send_error(404)
return return
path = unquote(self.path) path = unquote(self.path)
id_match = re.match("\/(?:id\/|(?:https?:\/\/)?(?:(?:www\.|music\.|m\.)?youtube\.com\/(?:watch\?v=|shorts\/|live\/)|youtu\.be\/))([A-Za-z0-9_-]{11})", path) match = re.match("\/(?:id\/|(?:https?:\/\/)?(?:(?:www\.|music\.|m\.)?youtube\.com\/(?:watch\?v=|shorts\/)|youtu\.be\/))([A-Za-z0-9_-]{11})", path)
if match:
if id_match: if self.is_pc_vrchat():
video_id = id_match[1] self.send_response(302)
self.send_header("Location", "https://www.youtube.com/watch?v=" + match[1])
self.end_headers()
return
query = match[1]
else: else:
search_match = re.match("^\/(.+?)(?:\/(\d*))?$", path) query = "ytsearch:" + path[1:]
if not search_match:
self.send_error(404)
return
search_query = search_match[1]
search_index = int(search_match[2]) if search_match[2] and search_match[2].isdigit() else 1
search_index = max(min(search_index, 100), 1)
video_id = ytdl_search_to_id(self, search_query, search_index)
self.send_response(302) ctx = ctx_cache.get(query)
self.send_header("Location", f"https://www.youtube.com/watch?v={video_id}")
self.end_headers()
#half this code now defunct
return
video_url = ytdl_resolve_mp4_url(self, video_id) if not ctx or 'expire' in ctx and datetime.now() >= ctx['expire']:
client_ip = self.address_string()
if 'PROXY' in environ: if client_ip in ips_running_ytdl:
video_url = environ['PROXY'] + video_url.replace("https://",'')
self.send_response(302)
self.send_header("Location", video_url)
self.end_headers()
except Ratelimit:
self.send_error(429) self.send_error(429)
except CachedException as e: return
self.send_error_video(str(e))
except Exception as e:
logging.exception(e)
self.send_error_video(str(e))
ips_running_ytdl = []
def invoke_youtubedl(self: Handler, input: str) -> dict:
ip = self.address_string()
if ip in ips_running_ytdl:
raise Ratelimit()
ips_running_ytdl.append(ip)
try: try:
with YoutubeDL({'extractor_args': {'youtube': {'skip': ['dash', 'hls']}}}) as ydl: ips_running_ytdl.append(client_ip)
return ydl.extract_info(input, download=False, process=False) ctx_cache[query] = ctx = {
finally:
ips_running_ytdl.remove(ip)
search_cache = {}
def ytdl_search_to_id(self: Handler, query: str, index: int) -> str:
ctx = search_cache.get(query)
if ctx:
ctx['event'].wait(60)
if 'error' in ctx:
raise CachedException(ctx['error'])
results = ctx.get('results')
else:
results = None
if results == None or results['count'] < index or datetime.now() >= ctx['expires_at']:
search_cache[query] = ctx = {
'event': Event(), 'event': Event(),
'expires_at': datetime.now() + timedelta(hours=5) 'expire': datetime.now() + timedelta(hours=5)
} }
try: with YoutubeDL() as ydl:
count = ceil(index/10)*10 info = ydl.extract_info(query, download=False)
info = invoke_youtubedl(self, f"ytsearch{count}:{query}")
entries = list(info['entries'])
if not entries:
raise Exception("ERROR: No results!")
ids = [video['id'] for video in entries]
ctx['results'] = results = {'ids': ids, 'count': count}
except Exception as e:
ctx['error'] = str(e)
raise
finally:
ctx['event'].set()
return results['ids'][min(index, len(results['ids'])) - 1]
resolve_cache = {}
def ytdl_resolve_mp4_url(self: Handler, input: str) -> str:
ctx = resolve_cache.get(input)
if ctx and datetime.now() <= ctx['expires_at']:
ctx['event'].wait(60)
if 'error' in ctx:
raise CachedException(ctx['error'])
return ctx['url']
resolve_cache[input] = ctx = {
'event': Event(),
'expires_at': datetime.now() + timedelta(hours=5)
}
try:
info = invoke_youtubedl(self, input)
selection = info selection = info
if "entries" in info: if "entries" in info:
if not info["entries"]: if not info["entries"]:
raise Exception("ERROR: No video found!") raise Exception("ERROR: No videos found!")
else: else:
selection = info["entries"][0] selection = info["entries"][0]
ctx['id'] = selection['id']
suitable_formats = list(filter(lambda x: x['ext'] == "mp4" and x['vcodec'] != 'none' and x['acodec'] != 'none', selection["formats"])) suitable_formats = list(filter(lambda x: x['ext'] == "mp4" and x['vcodec'] != 'none' and x['acodec'] != 'none', selection["formats"]))
if not suitable_formats: if not suitable_formats:
raise Exception(f"ERROR: {selection['id']}: No suitable formats of this video available!") raise Exception(f"ERROR: {selection['id']}: No suitable formats of this video available!")
best_format = max(suitable_formats, key=lambda x: x['height']) best_format = max(suitable_formats, key=lambda x: x['height'])
ctx['url'] = url = best_format['url'] ctx['url'] = best_format['url']
try: expire = parse_qs(urlparse(best_format['url']).query).get('expire', [])[0]
expire = parse_qs(urlparse(url).query).get('expire', [])[0]
if expire: if expire:
expire = datetime.fromtimestamp(int(expire)) expire = datetime.fromtimestamp(int(expire))
if expire < ctx['expires_at']: if expire < ctx['expire']: ctx['expire'] = expire
ctx['expires_at'] = expire
except Exception as e: except Exception as e:
logging.exception("failed parsing expire", e) logging.exception(e)
except Exception as e: ctx['exception'] = e
ctx['error'] = str(e) ctx['error_vid'] = generate_video_from_text(re.sub("(\x9B|\x1B\[)[0-?]*[ -\/]*[@-~]", '', str(e)))
raise
finally: finally:
ips_running_ytdl.remove(client_ip)
ctx['event'].set() ctx['event'].set()
elif 'url' not in ctx:
ctx['event'].wait(60)
return url if self.is_pc_vrchat():
if ctx.get('id'):
self.send_response(302)
self.send_header("Location", "https://www.youtube.com/watch?v=" + ctx['id'])
self.end_headers()
return
if not ctx.get('url'):
def cache_prune_loop(): if 'exception' in ctx:
while True: if 'error_vid' in ctx:
sleep(3600) self.send_response(200)
for key in list(search_cache.keys()): self.send_header("Content-Type", "video/mp4")
if datetime.now() >= search_cache[key]['expires_at']: self.send_header("Content-Length", str(len(ctx['error_vid'])))
del search_cache[key] self.end_headers()
for key in list(resolve_cache.keys()): self.wfile.write(ctx['error_vid'])
if datetime.now() >= resolve_cache[key]['expires_at']: else:
del resolve_cache[key] self.send_error(500, message=str(ctx['exception']))
Thread(target=cache_prune_loop, daemon=True).start() else:
self.send_error(404)
else:
url = ctx['url']
if 'PROXY' in environ:
url = environ['PROXY'] + url.replace("https://",'')
self.send_response(302)
self.send_header("Location", url)
self.end_headers()
with ThreadingHTTPServer((environ.get('ADDRESS', ''), int(environ.get('PORT', 80))), Handler) as server: with ThreadingHTTPServer((environ.get('ADDRESS', ''), int(environ.get('PORT', 80))), Handler) as server:
server.serve_forever() server.serve_forever()

44
textvid.py Normal file
View File

@ -0,0 +1,44 @@
from ffmpeg import FFmpeg
import textwrap
from tempfile import mktemp
from os import remove
def generate_video_from_text(text, test=False) -> bytes:
""" generate a single-frame ten-second mp4 displaying the text """
text = text.replace("\\", "\\\\").replace('"', '""').replace("'", "''").replace("%", "\\%").replace(":", "\\:")
text = textwrap.fill(text, 90)
file = mktemp()
peg = FFmpeg().option("y").input("bg.png", {
"framerate": "0.1"
}).output(file, {
"f": "mp4",
"t": "10",
"c:v": "libx264",
"pix_fmt": "yuv420p",
"vf": "drawtext=font=monospace:fontsize=24:x=10:y=10:text='"+text+"':"
})
if test:
@peg.on("start")
def on_start(arguments): print("cmd:", ' '.join(arguments))
@peg.on("stderr")
def on_stderr(line): print("stderr:", line)
peg.execute()
fp = open(file, "rb")
data = fp.read()
fp.close()
remove(file)
return data
if __name__ == "__main__":
print("test")
v = generate_video_from_text("""Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut sem viverra aliquet eget sit amet. Senectus et netus et malesuada fames ac. Gravida quis blandit turpis cursus in hac habitasse platea. Sed ullamcorper morbi tincidunt ornare massa eget egestas purus. Tristique risus nec feugiat in. Malesuada bibendum arcu vitae elementum curabitur vitae nunc sed velit. Porta lorem mollis aliquam ut porttitor leo a. Tellus rutrum tellus pellentesque eu tincidunt. Enim diam vulputate ut pharetra sit amet. Platea dictumst vestibulum rhoncus est. Sed sed risus pretium quam vulputate dignissim suspendisse. Viverra maecenas accumsan lacus vel facilisis volutpat est velit egestas. Lorem ipsum dolor sit amet consectetur. Netus et malesuada fames ac turpis egestas integer eget. Tellus elementum sagittis vitae et leo duis ut. Ipsum a arcu cursus vitae. Amet aliquam id diam maecenas ultricies mi. Mattis vulputate enim nulla aliquet porttitor lacus luctus accumsan. Magna ac placerat vestibulum lectus mauris ultrices eros in.
Dui sapien eget mi proin sed libero enim sed faucibus. Hac habitasse platea dictumst quisque sagittis purus sit. Mi eget mauris pharetra et ultrices neque ornare. Sagittis aliquam malesuada bibendum arcu vitae elementum curabitur vitae. Eget arcu dictum varius duis. Purus in massa tempor nec feugiat nisl pretium. Ipsum nunc aliquet bibendum enim facilisis gravida neque convallis. Adipiscing diam donec adipiscing tristique risus. Pulvinar neque laoreet suspendisse interdum consectetur libero id faucibus. Non quam lacus suspendisse faucibus.
Sed libero enim sed faucibus. Ut etiam sit amet nisl purus in mollis nunc sed. Cursus eget nunc scelerisque viverra mauris in aliquam sem fringilla. Eget aliquet nibh praesent tristique magna sit amet purus. Dui accumsan sit amet nulla facilisi morbi tempus. Lacus laoreet non curabitur gravida. Mi eget mauris pharetra et ultrices neque. Volutpat est velit egestas dui id ornare arcu odio. Porttitor lacus luctus accumsan tortor posuere ac. Morbi quis commodo odio aenean. Accumsan in nisl nisi scelerisque eu. Tincidunt dui ut ornare lectus sit amet est placerat in. Libero enim sed faucibus turpis in eu mi bibendum neque. At lectus urna duis convallis convallis.
Vel risus commodo viverra maecenas accumsan lacus. Mauris pharetra et ultrices neque ornare aenean euismod elementum. Non enim praesent elementum facilisis leo. Amet massa vitae tortor condimentum lacinia. Ornare aenean euismod elementum nisi quis eleifend. Diam donec adipiscing tristique risus nec. Volutpat diam ut venenatis tellus. Mauris nunc congue nisi vitae. Sit amet nisl suscipit adipiscing bibendum est ultricies integer quis. Turpis massa tincidunt dui ut ornare lectus sit amet. Libero enim sed faucibus turpis in. Sit amet porttitor eget dolor morbi non arcu risus quis. Sem integer vitae justo eget magna fermentum iaculis eu. Mattis molestie a iaculis at. Amet volutpat consequat mauris nunc congue. Et tortor at risus viverra adipiscing at in tellus integer. Amet mattis vulputate enim nulla.
Dignissim convallis aenean et tortor. Vitae congue eu consequat ac felis donec et odio. Risus at ultrices mi tempus imperdiet. Amet massa vitae tortor condimentum lacinia quis. Consectetur adipiscing elit ut aliquam purus. Integer quis auctor elit sed vulputate mi sit amet. Tellus id interdum velit laoreet. Sed risus ultricies tristique nulla aliquet. Fermentum dui faucibus in ornare quam. Lobortis elementum nibh tellus molestie nunc non blandit. Amet dictum sit amet justo donec. Iaculis urna id volutpat lacus.""", True)
with open("test.mp4", "wb") as f:
f.write(v)

View File

@ -7,11 +7,9 @@ After=network.target
User=u2b User=u2b
Group=u2b Group=u2b
WorkingDirectory=/srv/u2b.cx/ WorkingDirectory=/srv/u2b.cx/
Environment=ADDRESS=127.0.0.1 PORT=52482 Environment=ADDRESS=127.29.151.200 PORT=52482 PROXY=https://proxy.u2b.cx/
ExecStart=/usr/bin/python3.11 server.py ExecStart=/usr/bin/python3.9 server.py
MemoryMax=1G MemoryMax=1G
LimitNOFILE=262144
Restart=always
[Install] [Install]
WantedBy=multi-user.target WantedBy=multi-user.target