require 'rubygems'
require 'hpricot'
require 'open-uri'
doc = Hpricot(open('http://www.mpi.mb.ca/salvage/auction.asp?salenm=1694'))
doc.search("tr > td > font").each do |i|
cars << i.inner_html
end
b = 0
car_array=[]
while b < cars.length do
car_array << {
:num => cars[b],
:year => cars[b+=1],
:make_model => cars[b+=1],
:vin => cars[b+=1],
:engine => cars[b+=1],
:status => cars[b+=1],
:km => cars[b+=1]
}
b+=1
end
Refactorings
No refactoring yet !
bob
April 26, 2010, April 26, 2010 05:35, permalink
require 'rubygems'
require 'hpricot'
require 'open-uri'
doc = Hpricot(open('http://www.mpi.mb.ca/salvage/auction.asp?salenm=1694'))
cars = doc.search("tr > td > font").collect { |i| i.inner_html }
car_array=[]
cars.each_slice(7) do |a,b,c,d,e,f,g|
car_array << {
:num => a,
:year => b,
:make_model => c,
:vin => d,
:engine => e,
:status => f,
:km => g
}
end
Martin Plöger
April 26, 2010, April 26, 2010 06:50, permalink
I'm not quite satisfied with the indexes inside the #collect-method. But I think this should be resolved by improving the scraping of the html-code before the loop, not by the loop later on.
require 'rubygems'
require 'hpricot'
require 'open-uri'
doc = Hpricot(open('http://www.mpi.mb.ca/salvage/auction.asp?salenm=1694'))
cars = doc.search("tr > td > font").collect { |i| i.inner_html }
car_array = cars.each_slice(7).collect do |car|
:num => car[0], :year => car[1],
:make_model => car[2], vin: => car[3]
:engine => car[4], :status => car[5],
:km => car[6]
end
LucaB
April 26, 2010, April 26, 2010 07:45, permalink
you are assuming that a car's values are implicitly organised in groups of 7, so you could try:
require 'rubygems'
require 'hpricot'
require 'open-uri'
doc = Hpricot(open('http://www.mpi.mb.ca/salvage/auction.asp?salenm=1694'))
cars=[] # => you can't see the variable cars outside of the search block
doc.search("tr > td > font").each do |i|
cars << i.inner_html
end
car_keys = [:num, :year, :make_model, :vin, :engine, :status, :km]
grouped_cars = cars.each_slice(7).collect do |car|
car_keys.zip(car).inject({}){|hash, pair| hash[pair[0]] = pair[1]; hash}
end
drTheMan
May 18, 2010, May 18, 2010 08:53, permalink
Note the shorter syntax in the doc.search.collect; when just calling a single method without parameters on the original value each loop; you can use this syntax.
Then I also took the liberty to shorten LucaB's inject loop a little bit (and use the original variable names).
require 'rubygems'
require 'hpricot'
require 'open-uri'
doc = Hpricot(open('http://www.mpi.mb.ca/salvage/auction.asp?salenm=1694'))
cars = doc.search("tr > td > font").collect(&:inner_html)
car_array = cars.each_slice(7).collect do |car|
[:num, :year, :make_model, :vin, :engine, :status, :km].zip(car).inject({}){|hash, pair| hash.merge({pair[0] => pair[1]})}
end
Dragan Cvetinovic
June 18, 2010, June 18, 2010 13:40, permalink
The collect(&:inner_html) is great! I'll remember it.
I would just tighten the code in the block a little bit...
require 'rubygems'
require 'hpricot'
require 'open-uri'
doc = Hpricot(open('http://www.mpi.mb.ca/salvage/auction.asp?salenm=1694'))
cars = doc.search("tr > td > font").collect(&:inner_html)
car_array = cars.each_slice(7).collect do |car|
Hash[*[:num, :year, :make_model, :vin, :engine, :status, :km].zip(car).flatten]
end
Is there a better way for me to collect the information I want, then how I have it in my while loop ?