Compare commits

...

16 Commits

Author SHA1 Message Date
Lamp 9de3ef8e1f restart always 2023-12-05 23:14:48 -06:00
Lamp 92ad71dbd4 python3.11 2023-11-27 19:11:00 -06:00
Lamp bd3e1447e4 update regex 2023-09-19 02:59:21 -07:00
Lamp eec86be88c disable resolver 2023-09-19 02:28:47 -07:00
Lamp a2b64041ba fix cache prune 2023-08-19 14:37:46 -07:00
Lamp 162bd1c1b9 fix using expired 2023-08-19 14:03:24 -07:00
Lamp 63cc3e90b9 fix formatting 2023-08-18 20:03:42 -05:00
Lamp a08303eb3a [REFACTOR] allow selecting Nth search result 2023-08-18 17:58:31 -07:00
Lamp 6bc155c6d2 delete commented 2023-08-17 23:45:33 -07:00
Lamp f704098473 use host network mode 2023-08-15 19:19:47 -07:00
Lamp d5ead66a07 rename env 2023-08-15 11:35:48 -07:00
Lamp f8436927cf use env for caddy site 2023-08-15 11:34:02 -07:00
Lamp 3b4ee632b1 why do I have log template but then don't log 2023-08-15 11:29:30 -07:00
Lamp 6b868c2b63 Disable PC vrchat bypass 2023-08-15 11:26:52 -07:00
Lamp 40bda207e1 skip streaming manifests 2023-08-01 00:35:51 -07:00
Lamp ea8bf8fa1e porkbun dns 2023-07-31 21:34:04 -07:00
9 changed files with 210 additions and 204 deletions

4
.gitignore vendored
View File

@ -1,3 +1,5 @@
.vscode
__pycache__
test.mp4
test.mp4
env
errors

View File

@ -1,44 +0,0 @@
# u2b.cx
A YouTube search resolver + raw file resolver w/ proxy for Quest VRChat.
- Get the video you want just by typing its name in the URL
- Video works for both PC and Quest VRChat
- Proxying avoids random blocks from google's servers
## Technical Features
- Written in Python to integrate with YoutubeDL (yt-dlp) for fastest performance
- Multi-threaded for concurrent usage
- Requests coalesced to one YoutubeDL invocation per input
- Limited to one YoutubeDL invocation per IP address
- Results cached for 5 hours or until expiry found in extracted URL
- Extracted URLs proxied in Caddy so that they work in all countries
- Errors displayed as a 10 second single-frame video
- PC VRchat bypassed to save bandwidth (todo: sacrifices consistency)
### Planned
- Option to get Nth search result (requires deeper integration into YoutubeDL)
## Usage
### GET `https://u2b.cx/<query>`
The server will search YouTube for `<query>`, pick the first result, pick the best quality all-in-one MP4 format available, and respond with a 302 redirect to the proxied raw MP4 file. If the client is PC VRChat, the server may instead redirect to the YouTube video URL to save bandwidth on the server.
NOTE: query must not start with a dot (.)
### GET `https://u2b.cx/id/<video id>`
### GET `https://u2b.cx/https://www.youtube.com/watch?v=<video id>`
### GET `https://u2b.cx/https://youtu.be/<video id>`
### GET `https://u2b.cx/https://www.youtube.com/shorts/<video id>`
### GET `https://u2b.cx/https://music.youtube.com/watch?v=<video id>`
### etcetera...
Bypasses search to look up the video directly by its id. If the client is PC VRChat, it may be immediately redirected to the YouTube url to save resources on the server.
Regex only matches the start of the string; anything after the 11-char video id is ignored.
Malformed YouTube URLs will be treated as a YouTube search query and YouTube search will probably give what you want.

View File

@ -13,9 +13,14 @@
}
}
:80 {
#nl.u2b.cx u2b.cx {
{$CADDY_SITE:":80"} {
log
tls {
dns porkbun {
api_key {env.PORKBUN_API_KEY}
api_secret_key {env.PORKBUN_API_SECRET}
}
}
handle_path /proxy/* {
@gv path_regexp gvurl ^\/([a-z0-9-]+\.googlevideo\.com)
handle @gv {
@ -45,7 +50,7 @@ Disallow: /
@notget not method GET
respond @notget 403
reverse_proxy http://app:8080
reverse_proxy http://127.0.0.1:8080
}
}
}

View File

@ -1,4 +1,6 @@
FROM caddy:2.6-builder AS builder
RUN xcaddy build --with github.com/caddyserver/transform-encoder
RUN xcaddy build \
--with github.com/caddyserver/transform-encoder \
--with github.com/caddy-dns/porkbun
FROM caddy:2.6
COPY --from=builder /usr/bin/caddy /usr/bin/caddy

View File

@ -8,20 +8,20 @@ services:
- ./:/app/
working_dir: /app/
environment:
- ADDRESS=127.0.0.1
- PORT=8080
- PROXY=/proxy/
network_mode: host
command: python server.py
caddy:
build: caddy
restart: always
ports:
- "80:80"
- "443:443"
- "443:443/udp"
network_mode: host
volumes:
- ./caddy/Caddyfile:/etc/caddy/Caddyfile
- caddy_data:/data
- caddy_config:/config
env_file: env
volumes:
caddy_data:
caddy_config:

Binary file not shown.

293
server.py
View File

@ -6,31 +6,25 @@ from urllib.parse import unquote, urlparse, parse_qs
from threading import Event, Thread
from datetime import datetime, timedelta
from time import sleep
from os import environ
from os import environ, makedirs, stat
import logging
import re
from textvid import generate_video_from_text
from ffmpeg import FFmpeg
import textwrap
from pathlib import Path
from hashlib import sha256
from shutil import copyfileobj
from math import ceil
ctx_cache = {}
ips_running_ytdl = []
def cache_prune_loop():
while True:
sleep(3600)
for key in ctx_cache:
if datetime.now() >= ctx_cache[key]['expire']:
del ctx_cache[key]
Thread(target=cache_prune_loop, daemon=True).start()
if 'DEBUG' in environ:
logging.basicConfig(level=logging.DEBUG)
class Ratelimit(Exception): pass
class CachedException(Exception): pass
class Handler(BaseHTTPRequestHandler):
def address_string(self):
return getattr(self, 'headers', {}).get('X-Forwarded-For', '').split(',')[0] or self.client_address[0]
def is_pc_vrchat(self):
ua = self.headers.get('User-Agent', '')
ae = self.headers.get('Accept-Encoding', '')
return ua.startswith("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/") and ua.endswith(" Safari/537.36") and ae == "identity"
def send_error(self, code, message=""):
body = bytes(message, "utf-8")
self.send_response(code)
@ -39,96 +33,185 @@ class Handler(BaseHTTPRequestHandler):
self.end_headers()
self.wfile.write(body)
def do_GET(self):
# block other bot junk in reverse proxy
if self.path in ["/", "/favicon.ico"] or self.path.startswith("/."):
self.send_error(404)
return
path = unquote(self.path)
match = re.match("\/(?:id\/|(?:https?:\/\/)?(?:(?:www\.|music\.|m\.)?youtube\.com\/(?:watch\?v=|shorts\/)|youtu\.be\/))([A-Za-z0-9_-]{11})", path)
if match:
if self.is_pc_vrchat():
self.send_response(302)
self.send_header("Location", "https://www.youtube.com/watch?v=" + match[1])
self.end_headers()
return
query = match[1]
else:
query = "ytsearch:" + path[1:]
def send_error_video(self, text: str):
makedirs("errors", exist_ok=True)
hash = sha256(bytes(text, "utf8")).hexdigest()
file = Path(f"errors/{hash}.mp4")
ctx = ctx_cache.get(query)
if not file.exists():
text = re.sub("(\x9B|\x1B\[)[0-?]*[ -\/]*[@-~]", '', text)
text = text.replace("\\", "\\\\").replace('"', '""').replace("'", "''").replace("%", "\\%").replace(":", "\\:")
text = textwrap.fill(text, 90)
peg = FFmpeg().option("y").input("bg.png", {
"framerate": "0.1"
}).output(str(file), {
"f": "mp4",
"t": "10",
"c:v": "libx264",
"pix_fmt": "yuv420p",
"vf": "drawtext=font=monospace:fontsize=24:x=10:y=10:text='"+text+"':"
})
@peg.on("start")
def on_start(arguments):
logging.debug("cmd:" + ' '.join(arguments))
@peg.on("stderr")
def on_stderr(line):
logging.debug("stderr:" + line)
peg.execute()
if not ctx or 'expire' in ctx and datetime.now() >= ctx['expire']:
client_ip = self.address_string()
if client_ip in ips_running_ytdl:
self.send_error(429)
return
try:
ips_running_ytdl.append(client_ip)
ctx_cache[query] = ctx = {
'event': Event(),
'expire': datetime.now() + timedelta(hours=5)
}
with YoutubeDL() as ydl:
info = ydl.extract_info(query, download=False)
selection = info
if "entries" in info:
if not info["entries"]:
raise Exception("ERROR: No videos found!")
else:
selection = info["entries"][0]
ctx['id'] = selection['id']
suitable_formats = list(filter(lambda x: x['ext'] == "mp4" and x['vcodec'] != 'none' and x['acodec'] != 'none', selection["formats"]))
if not suitable_formats:
raise Exception(f"ERROR: {selection['id']}: No suitable formats of this video available!")
best_format = max(suitable_formats, key=lambda x: x['height'])
ctx['url'] = best_format['url']
expire = parse_qs(urlparse(best_format['url']).query).get('expire', [])[0]
if expire:
expire = datetime.fromtimestamp(int(expire))
if expire < ctx['expire']: ctx['expire'] = expire
except Exception as e:
logging.exception(e)
ctx['exception'] = e
ctx['error_vid'] = generate_video_from_text(re.sub("(\x9B|\x1B\[)[0-?]*[ -\/]*[@-~]", '', str(e)))
finally:
ips_running_ytdl.remove(client_ip)
ctx['event'].set()
elif 'url' not in ctx:
ctx['event'].wait(60)
if self.is_pc_vrchat():
if ctx.get('id'):
self.send_response(302)
self.send_header("Location", "https://www.youtube.com/watch?v=" + ctx['id'])
self.end_headers()
return
if not ctx.get('url'):
if 'exception' in ctx:
if 'error_vid' in ctx:
self.send_response(200)
self.send_header("Content-Type", "video/mp4")
self.send_header("Content-Length", str(len(ctx['error_vid'])))
self.end_headers()
self.wfile.write(ctx['error_vid'])
else:
self.send_error(500, message=str(ctx['exception']))
else:
self.send_error(404)
else:
url = ctx['url']
if 'PROXY' in environ:
url = environ['PROXY'] + url.replace("https://",'')
self.send_response(302)
self.send_header("Location", url)
with file.open('rb') as f:
fs = stat(f.fileno())
self.send_response(200)
self.send_header("Content-Type", "video/mp4")
self.send_header("Content-Length", str(fs.st_size))
self.end_headers()
copyfileobj(f, self.wfile)
def do_GET(self):
try:
if self.path in ["/", "/favicon.ico"] or self.path.startswith("/."):
self.send_error(404)
return
path = unquote(self.path)
id_match = re.match("\/(?:id\/|(?:https?:\/\/)?(?:(?:www\.|music\.|m\.)?youtube\.com\/(?:watch\?v=|shorts\/|live\/)|youtu\.be\/))([A-Za-z0-9_-]{11})", path)
if id_match:
video_id = id_match[1]
else:
search_match = re.match("^\/(.+?)(?:\/(\d*))?$", path)
if not search_match:
self.send_error(404)
return
search_query = search_match[1]
search_index = int(search_match[2]) if search_match[2] and search_match[2].isdigit() else 1
search_index = max(min(search_index, 100), 1)
video_id = ytdl_search_to_id(self, search_query, search_index)
self.send_response(302)
self.send_header("Location", f"https://www.youtube.com/watch?v={video_id}")
self.end_headers()
#half this code now defunct
return
video_url = ytdl_resolve_mp4_url(self, video_id)
if 'PROXY' in environ:
video_url = environ['PROXY'] + video_url.replace("https://",'')
self.send_response(302)
self.send_header("Location", video_url)
self.end_headers()
except Ratelimit:
self.send_error(429)
except CachedException as e:
self.send_error_video(str(e))
except Exception as e:
logging.exception(e)
self.send_error_video(str(e))
ips_running_ytdl = []
def invoke_youtubedl(self: Handler, input: str) -> dict:
ip = self.address_string()
if ip in ips_running_ytdl:
raise Ratelimit()
ips_running_ytdl.append(ip)
try:
with YoutubeDL({'extractor_args': {'youtube': {'skip': ['dash', 'hls']}}}) as ydl:
return ydl.extract_info(input, download=False, process=False)
finally:
ips_running_ytdl.remove(ip)
search_cache = {}
def ytdl_search_to_id(self: Handler, query: str, index: int) -> str:
ctx = search_cache.get(query)
if ctx:
ctx['event'].wait(60)
if 'error' in ctx:
raise CachedException(ctx['error'])
results = ctx.get('results')
else:
results = None
if results == None or results['count'] < index or datetime.now() >= ctx['expires_at']:
search_cache[query] = ctx = {
'event': Event(),
'expires_at': datetime.now() + timedelta(hours=5)
}
try:
count = ceil(index/10)*10
info = invoke_youtubedl(self, f"ytsearch{count}:{query}")
entries = list(info['entries'])
if not entries:
raise Exception("ERROR: No results!")
ids = [video['id'] for video in entries]
ctx['results'] = results = {'ids': ids, 'count': count}
except Exception as e:
ctx['error'] = str(e)
raise
finally:
ctx['event'].set()
return results['ids'][min(index, len(results['ids'])) - 1]
resolve_cache = {}
def ytdl_resolve_mp4_url(self: Handler, input: str) -> str:
ctx = resolve_cache.get(input)
if ctx and datetime.now() <= ctx['expires_at']:
ctx['event'].wait(60)
if 'error' in ctx:
raise CachedException(ctx['error'])
return ctx['url']
resolve_cache[input] = ctx = {
'event': Event(),
'expires_at': datetime.now() + timedelta(hours=5)
}
try:
info = invoke_youtubedl(self, input)
selection = info
if "entries" in info:
if not info["entries"]:
raise Exception("ERROR: No video found!")
else:
selection = info["entries"][0]
suitable_formats = list(filter(lambda x: x['ext'] == "mp4" and x['vcodec'] != 'none' and x['acodec'] != 'none', selection["formats"]))
if not suitable_formats:
raise Exception(f"ERROR: {selection['id']}: No suitable formats of this video available!")
best_format = max(suitable_formats, key=lambda x: x['height'])
ctx['url'] = url = best_format['url']
try:
expire = parse_qs(urlparse(url).query).get('expire', [])[0]
if expire:
expire = datetime.fromtimestamp(int(expire))
if expire < ctx['expires_at']:
ctx['expires_at'] = expire
except Exception as e:
logging.exception("failed parsing expire", e)
except Exception as e:
ctx['error'] = str(e)
raise
finally:
ctx['event'].set()
return url
def cache_prune_loop():
while True:
sleep(3600)
for key in list(search_cache.keys()):
if datetime.now() >= search_cache[key]['expires_at']:
del search_cache[key]
for key in list(resolve_cache.keys()):
if datetime.now() >= resolve_cache[key]['expires_at']:
del resolve_cache[key]
Thread(target=cache_prune_loop, daemon=True).start()
with ThreadingHTTPServer((environ.get('ADDRESS', ''), int(environ.get('PORT', 80))), Handler) as server:
server.serve_forever()

View File

@ -1,44 +0,0 @@
from ffmpeg import FFmpeg
import textwrap
from tempfile import mktemp
from os import remove
def generate_video_from_text(text, test=False) -> bytes:
""" generate a single-frame ten-second mp4 displaying the text """
text = text.replace("\\", "\\\\").replace('"', '""').replace("'", "''").replace("%", "\\%").replace(":", "\\:")
text = textwrap.fill(text, 90)
file = mktemp()
peg = FFmpeg().option("y").input("bg.png", {
"framerate": "0.1"
}).output(file, {
"f": "mp4",
"t": "10",
"c:v": "libx264",
"pix_fmt": "yuv420p",
"vf": "drawtext=font=monospace:fontsize=24:x=10:y=10:text='"+text+"':"
})
if test:
@peg.on("start")
def on_start(arguments): print("cmd:", ' '.join(arguments))
@peg.on("stderr")
def on_stderr(line): print("stderr:", line)
peg.execute()
fp = open(file, "rb")
data = fp.read()
fp.close()
remove(file)
return data
if __name__ == "__main__":
print("test")
v = generate_video_from_text("""Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut sem viverra aliquet eget sit amet. Senectus et netus et malesuada fames ac. Gravida quis blandit turpis cursus in hac habitasse platea. Sed ullamcorper morbi tincidunt ornare massa eget egestas purus. Tristique risus nec feugiat in. Malesuada bibendum arcu vitae elementum curabitur vitae nunc sed velit. Porta lorem mollis aliquam ut porttitor leo a. Tellus rutrum tellus pellentesque eu tincidunt. Enim diam vulputate ut pharetra sit amet. Platea dictumst vestibulum rhoncus est. Sed sed risus pretium quam vulputate dignissim suspendisse. Viverra maecenas accumsan lacus vel facilisis volutpat est velit egestas. Lorem ipsum dolor sit amet consectetur. Netus et malesuada fames ac turpis egestas integer eget. Tellus elementum sagittis vitae et leo duis ut. Ipsum a arcu cursus vitae. Amet aliquam id diam maecenas ultricies mi. Mattis vulputate enim nulla aliquet porttitor lacus luctus accumsan. Magna ac placerat vestibulum lectus mauris ultrices eros in.
Dui sapien eget mi proin sed libero enim sed faucibus. Hac habitasse platea dictumst quisque sagittis purus sit. Mi eget mauris pharetra et ultrices neque ornare. Sagittis aliquam malesuada bibendum arcu vitae elementum curabitur vitae. Eget arcu dictum varius duis. Purus in massa tempor nec feugiat nisl pretium. Ipsum nunc aliquet bibendum enim facilisis gravida neque convallis. Adipiscing diam donec adipiscing tristique risus. Pulvinar neque laoreet suspendisse interdum consectetur libero id faucibus. Non quam lacus suspendisse faucibus.
Sed libero enim sed faucibus. Ut etiam sit amet nisl purus in mollis nunc sed. Cursus eget nunc scelerisque viverra mauris in aliquam sem fringilla. Eget aliquet nibh praesent tristique magna sit amet purus. Dui accumsan sit amet nulla facilisi morbi tempus. Lacus laoreet non curabitur gravida. Mi eget mauris pharetra et ultrices neque. Volutpat est velit egestas dui id ornare arcu odio. Porttitor lacus luctus accumsan tortor posuere ac. Morbi quis commodo odio aenean. Accumsan in nisl nisi scelerisque eu. Tincidunt dui ut ornare lectus sit amet est placerat in. Libero enim sed faucibus turpis in eu mi bibendum neque. At lectus urna duis convallis convallis.
Vel risus commodo viverra maecenas accumsan lacus. Mauris pharetra et ultrices neque ornare aenean euismod elementum. Non enim praesent elementum facilisis leo. Amet massa vitae tortor condimentum lacinia. Ornare aenean euismod elementum nisi quis eleifend. Diam donec adipiscing tristique risus nec. Volutpat diam ut venenatis tellus. Mauris nunc congue nisi vitae. Sit amet nisl suscipit adipiscing bibendum est ultricies integer quis. Turpis massa tincidunt dui ut ornare lectus sit amet. Libero enim sed faucibus turpis in. Sit amet porttitor eget dolor morbi non arcu risus quis. Sem integer vitae justo eget magna fermentum iaculis eu. Mattis molestie a iaculis at. Amet volutpat consequat mauris nunc congue. Et tortor at risus viverra adipiscing at in tellus integer. Amet mattis vulputate enim nulla.
Dignissim convallis aenean et tortor. Vitae congue eu consequat ac felis donec et odio. Risus at ultrices mi tempus imperdiet. Amet massa vitae tortor condimentum lacinia quis. Consectetur adipiscing elit ut aliquam purus. Integer quis auctor elit sed vulputate mi sit amet. Tellus id interdum velit laoreet. Sed risus ultricies tristique nulla aliquet. Fermentum dui faucibus in ornare quam. Lobortis elementum nibh tellus molestie nunc non blandit. Amet dictum sit amet justo donec. Iaculis urna id volutpat lacus.""", True)
with open("test.mp4", "wb") as f:
f.write(v)

View File

@ -7,9 +7,11 @@ After=network.target
User=u2b
Group=u2b
WorkingDirectory=/srv/u2b.cx/
Environment=ADDRESS=127.29.151.200 PORT=52482 PROXY=https://proxy.u2b.cx/
ExecStart=/usr/bin/python3.9 server.py
Environment=ADDRESS=127.0.0.1 PORT=52482
ExecStart=/usr/bin/python3.11 server.py
MemoryMax=1G
LimitNOFILE=262144
Restart=always
[Install]
WantedBy=multi-user.target