Files
once-campfire/app/models/opengraph/fetch.rb
Stanko Krtalić eecdb29332 Upgrade to Rails 8 and Ruby 3.4.5 (#1)
* Bump Ruby to 3.4.5
* Update dependencies
* Adjust for Rails 8 and Ruby 3.5 API changes
* Mark params strings as mutable in prepapration for frozen strings in Ruby 3.5
* Update test for HTML5 sanitizer
    With Rails 7.1 the HTML5 sanitizer became the default, this breakts this test because the old sanitizer used to delete unpermitted nodes, while the new one returns their content
    The final string is safe, but different then it used to be in Rails 7.0
* Remove direct Turbo tesh helpers require & parallelize tests
* Fix Zeitwerk issues with rails extensions
* Update Resque setup for Redis 5+
* Remove unused views
* Remove GID v1 handler
2025-09-02 17:02:41 +02:00

80 lines
2.4 KiB
Ruby

require "net/http"
require "restricted_http/private_network_guard"
class Opengraph::Fetch
ALLOWED_DOCUMENT_CONTENT_TYPE = "text/html"
MAX_BODY_SIZE = 5.megabytes
MAX_REDIRECTS = 10
class TooManyRedirectsError < StandardError; end
class RedirectDeniedError < StandardError; end
def fetch_document(url, ip: RestrictedHTTP::PrivateNetworkGuard.resolve(url.host))
request(url, Net::HTTP::Get, ip: ip) do |response|
return body_if_acceptable(response)
end
end
def fetch_content_type(url, ip: RestrictedHTTP::PrivateNetworkGuard.resolve(url.host))
request(url, Net::HTTP::Head, ip: ip) do |response|
return response["Content-Type"]
end
end
private
def request(url, request_class, ip:)
MAX_REDIRECTS.times do
Net::HTTP.start(url.host, url.port, ipaddr: ip, use_ssl: url.scheme == "https") do |http|
http.request request_class.new(url) do |response|
if response.is_a?(Net::HTTPRedirection)
url, ip = resolve_redirect(response["location"])
else
yield response
end
end
end
end
raise TooManyRedirectsError
end
def resolve_redirect(location)
url = URI.parse(location)
raise RedirectDeniedError unless url.is_a?(URI::HTTP)
[ url, RestrictedHTTP::PrivateNetworkGuard.resolve(url.host) ]
end
def body_if_acceptable(response)
size_restricted_body(response) if response_valid?(response)
end
def size_restricted_body(response)
# We've already checked the Content-Length header, to try to avoid reading
# the body of any large responses. But that header could be wrong or
# missing. To be on the safe side, we'll read the body in chunks, and bail
# if it runs over our size limit.
StringIO.new.tap do |body|
response.read_body do |chunk|
return nil if body.string.bytesize + chunk.bytesize > MAX_BODY_SIZE
body << chunk
end
end.string
end
def response_valid?(response)
status_valid?(response) && content_type_valid?(response) && content_length_valid?(response)
end
def status_valid?(response)
response.is_a?(Net::HTTPOK)
end
def content_type_valid?(response)
response.content_type == ALLOWED_DOCUMENT_CONTENT_TYPE
end
def content_length_valid?(response)
response.content_length.to_i <= MAX_BODY_SIZE
end
end