%w[uri net/http hpricot].each {|lib| require lib}
url = 'http://en.wikipedia.org/wiki/Night'
response = Net::HTTP.get_response(URI.parse(url))
body = Hpricot.parse(response.body)
absolutisable = { 'a' => %w[href],
'applet' => %w[codebase],
'area' => %w[href],
'blockquote' => %w[cite],
'body' => %w[background],
'del' => %w[cite],
'form' => %w[action],
'frame' => %w[longdesc src],
'iframe' => %w[longdesc src],
'head' => %w[profile],
'img' => %w[longdesc src usemap],
'input' => %w[src usemap],
'ins' => %w[cite],
'link' => %w[href],
'object' => %w[classid codebase data usemap],
'q' => %w[cite],
'script' => %w[src],
}
(body/"#{absolutisable.keys.join('|')}").each do |elem|
# absolutise
absolutisable[elem.name].each do |attr|
uri = elem.attributes[attr]
elem.raw_attributes[attr] =
URI::parse(url).merge(uri).to_s unless uri.nil?
end
end
puts body
Refactorings
No refactoring yet !
Converting all URLs in a resource to absolute URLs. This doesn't take into account @import'ing CSS, and internal CSS with url will break it. Any more complete or nicer ideas?