Skip to content

Commit

Permalink
Proxy: Use connection pools for images (#4326)
Browse files Browse the repository at this point in the history
Theoretically this should improve memory usage and performance by quite a bit
as we aren't creating a new HTTP::Client and in a turn a new connection for
every image we request from YouTube.

Closes issue 4009
  • Loading branch information
SamantazFox committed Oct 30, 2024
2 parents f326bcf + 75b6861 commit 9957da2
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 95 deletions.
4 changes: 4 additions & 0 deletions src/invidious.cr
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ SOFTWARE = {

YT_POOL = YoutubeConnectionPool.new(YT_URL, capacity: CONFIG.pool_size)

# Image request pool

GGPHT_POOL = YoutubeConnectionPool.new(URI.parse("https://yt3.ggpht.com"), capacity: CONFIG.pool_size)

# CLI
Kemal.config.extra_options do |parser|
parser.banner = "Usage: invidious [arguments]"
Expand Down
106 changes: 25 additions & 81 deletions src/invidious/routes/images.cr
Original file line number Diff line number Diff line change
Expand Up @@ -11,29 +11,9 @@ module Invidious::Routes::Images
end
end

# We're encapsulating this into a proc in order to easily reuse this
# portion of the code for each request block below.
request_proc = ->(response : HTTP::Client::Response) {
env.response.status_code = response.status_code
response.headers.each do |key, value|
if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase)
env.response.headers[key] = value
end
end

env.response.headers["Access-Control-Allow-Origin"] = "*"

if response.status_code >= 300
env.response.headers.delete("Transfer-Encoding")
return
end

proxy_file(response, env)
}

begin
HTTP::Client.get("https://yt3.ggpht.com#{url}") do |resp|
return request_proc.call(resp)
GGPHT_POOL.client &.get(url, headers) do |resp|
return self.proxy_image(env, resp)
end
rescue ex
end
Expand Down Expand Up @@ -61,27 +41,10 @@ module Invidious::Routes::Images
end
end

request_proc = ->(response : HTTP::Client::Response) {
env.response.status_code = response.status_code
response.headers.each do |key, value|
if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase)
env.response.headers[key] = value
end
end

env.response.headers["Connection"] = "close"
env.response.headers["Access-Control-Allow-Origin"] = "*"

if response.status_code >= 300
return env.response.headers.delete("Transfer-Encoding")
end

proxy_file(response, env)
}

begin
HTTP::Client.get("https://#{authority}.ytimg.com#{url}") do |resp|
return request_proc.call(resp)
get_ytimg_pool(authority).client &.get(url, headers) do |resp|
env.response.headers["Connection"] = "close"
return self.proxy_image(env, resp)
end
rescue ex
end
Expand All @@ -101,26 +64,9 @@ module Invidious::Routes::Images
end
end

request_proc = ->(response : HTTP::Client::Response) {
env.response.status_code = response.status_code
response.headers.each do |key, value|
if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase)
env.response.headers[key] = value
end
end

env.response.headers["Access-Control-Allow-Origin"] = "*"

if response.status_code >= 300 && response.status_code != 404
return env.response.headers.delete("Transfer-Encoding")
end

proxy_file(response, env)
}

begin
HTTP::Client.get("https://i9.ytimg.com#{url}") do |resp|
return request_proc.call(resp)
get_ytimg_pool("i9").client &.get(url, headers) do |resp|
return self.proxy_image(env, resp)
end
rescue ex
end
Expand Down Expand Up @@ -165,8 +111,7 @@ module Invidious::Routes::Images
if name == "maxres.jpg"
build_thumbnails(id).each do |thumb|
thumbnail_resource_path = "/vi/#{id}/#{thumb[:url]}.jpg"
# This can likely be optimized into a (small) pool sometime in the future.
if HTTP::Client.head("https://i.ytimg.com#{thumbnail_resource_path}").status_code == 200
if get_ytimg_pool("i9").client &.head(thumbnail_resource_path, headers).status_code == 200
name = thumb[:url] + ".jpg"
break
end
Expand All @@ -181,29 +126,28 @@ module Invidious::Routes::Images
end
end

request_proc = ->(response : HTTP::Client::Response) {
env.response.status_code = response.status_code
response.headers.each do |key, value|
if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase)
env.response.headers[key] = value
end
begin
get_ytimg_pool("i").client &.get(url, headers) do |resp|
return self.proxy_image(env, resp)
end
rescue ex
end
end

env.response.headers["Access-Control-Allow-Origin"] = "*"

if response.status_code >= 300 && response.status_code != 404
return env.response.headers.delete("Transfer-Encoding")
private def self.proxy_image(env, response)
env.response.status_code = response.status_code
response.headers.each do |key, value|
if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase)
env.response.headers[key] = value
end
end

proxy_file(response, env)
}
env.response.headers["Access-Control-Allow-Origin"] = "*"

begin
# This can likely be optimized into a (small) pool sometime in the future.
HTTP::Client.get("https://i.ytimg.com#{url}") do |resp|
return request_proc.call(resp)
end
rescue ex
if response.status_code >= 300
return env.response.headers.delete("Transfer-Encoding")
end

return proxy_file(response, env)
end
end
47 changes: 33 additions & 14 deletions src/invidious/yt_backend/connection_pool.cr
Original file line number Diff line number Diff line change
@@ -1,17 +1,6 @@
def add_yt_headers(request)
request.headers.delete("User-Agent") if request.headers["User-Agent"] == "Crystal"
request.headers["User-Agent"] ||= "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"

request.headers["Accept-Charset"] ||= "ISO-8859-1,utf-8;q=0.7,*;q=0.7"
request.headers["Accept"] ||= "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
request.headers["Accept-Language"] ||= "en-us,en;q=0.5"

# Preserve original cookies and add new YT consent cookie for EU servers
request.headers["Cookie"] = "#{request.headers["cookie"]?}; CONSENT=PENDING+#{Random.rand(100..999)}"
if !CONFIG.cookies.empty?
request.headers["Cookie"] = "#{(CONFIG.cookies.map { |c| "#{c.name}=#{c.value}" }).join("; ")}; #{request.headers["cookie"]?}"
end
end
# Mapping of subdomain => YoutubeConnectionPool
# This is needed as we may need to access arbitrary subdomains of ytimg
private YTIMG_POOLS = {} of String => YoutubeConnectionPool

struct YoutubeConnectionPool
property! url : URI
Expand Down Expand Up @@ -58,6 +47,21 @@ struct YoutubeConnectionPool
end
end

def add_yt_headers(request)
request.headers.delete("User-Agent") if request.headers["User-Agent"] == "Crystal"
request.headers["User-Agent"] ||= "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36"

request.headers["Accept-Charset"] ||= "ISO-8859-1,utf-8;q=0.7,*;q=0.7"
request.headers["Accept"] ||= "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
request.headers["Accept-Language"] ||= "en-us,en;q=0.5"

# Preserve original cookies and add new YT consent cookie for EU servers
request.headers["Cookie"] = "#{request.headers["cookie"]?}; CONSENT=PENDING+#{Random.rand(100..999)}"
if !CONFIG.cookies.empty?
request.headers["Cookie"] = "#{(CONFIG.cookies.map { |c| "#{c.name}=#{c.value}" }).join("; ")}; #{request.headers["cookie"]?}"
end
end

def make_client(url : URI, region = nil, force_resolve : Bool = false)
client = HTTP::Client.new(url)

Expand Down Expand Up @@ -94,3 +98,18 @@ def make_configured_http_proxy_client
password: config_proxy.password,
)
end

# Fetches a HTTP pool for the specified subdomain of ytimg.com
#
# Creates a new one when the specified pool for the subdomain does not exist
def get_ytimg_pool(subdomain)
if pool = YTIMG_POOLS[subdomain]?
return pool
else
LOGGER.info("ytimg_pool: Creating a new HTTP pool for \"https://#{subdomain}.ytimg.com\"")
pool = YoutubeConnectionPool.new(URI.parse("https://#{subdomain}.ytimg.com"), capacity: CONFIG.pool_size)
YTIMG_POOLS[subdomain] = pool

return pool
end
end

0 comments on commit 9957da2

Please sign in to comment.