# in lib/product_scraper/base.rb
class ProductScraper::Base
def product_pages
product_urls.collect do |url|
print "."
self.class::ProductPage.new(url)
end
end
def update_products!
#...
end
module ProductPage
attr_reader :url
def initialize(product_url)
#...
end
#...
end
end
# in lib/product_scraper/some_company.rb
class ProductScraper::SomeCompany < ProductScraper::Base
def product_urls
#...
end
class ProductPage
include ProductScraper::Base::ProductPage
#...
end
end
Refactorings
No refactoring yet !
Jordan Glasner
February 2, 2008, February 02, 2008 18:05, permalink
I forgot you didn't want to allow Base to be instantiated, so this might not work for you. You'd need to turn Base and ProductPage into modules then include them in AmazonScraper and AmazonPage, instead of subclassing them.
BTW, a product scraper sounds like an interesting product :)
module ProductScraper
class Base
class << self
# Store class for Page type here
def product_page_type
ProductPage
end
end
def initialize
end
# Array
def product_urls
end
# Create new ProductPage from url
def product_page(url)
self.class.product_page_type.new(url)
end
def product_pages
product_urls.collect { |x| product_page(x) }
end
def update_products!
product_pages.collect { |x| x.update! }
end
end
class ProductPage(url)
attr_reader :url
def initialize(url)
@url = url
end
def update!
scrape
# save
end
def scrape
# ...
end
end
end
module ProductScraper
class AmazonScraper < Base
class << self
def prouduct_page_type
AmazonPage
end
end
end
class AmazonPage < ProductPage
end
end
Jordan Glasner
February 2, 2008, February 02, 2008 19:42, permalink
OK.. the above just didn't do it for me. The following is also closer to what you wanted. All of the functionality is setup in the Scrapable module. Include that module in your Page class to make it scrapable.
I did get rid of the separate Scraper class. Didn't make sense in this context.
module Scrapable
module ClassMethods
# Returns Array of URLs as Strings
def urls
# Array of urls
[1,2,3]
end
# Returns Array of CompanyPages
def all
urls.map { |url| self.new(url) }
end
# Updates all
def update!
all.map { |x| x.update! }
end
end
# Adds methods in ClassMethods module as class methods
def self.included(base)
base.extend(ClassMethods)
end
# Instance Methods
def update!
scrape
save!
end
def scrape
# scrapes ;)
end
def save!
print "Saved\n"
end
end
class MerchantPage
include Scrapable
def initialize(url)
end
end
# Scrape specific url
MerchantPage.new('http://merchant.com/page').scrape
# List of all URLs
MerchantPage.urls
# Array of all pages
MerchantPage.all
# Update all pages
MerchantPage.update!
misfo.myopenid.com
February 2, 2008, February 02, 2008 22:58, permalink
Jordan, I like your second refactoring a lot. The object model makes so much more sense using class and instance methods instead of nesting a class. Your changing the names of the classes made it clear that this was the way to do it. I wanted to make a base class that could be subclassed just like ActiveRecord::Base, as opposed to using modules. So here is what I came up with, using your changes, but with classes.
# in lib/product_page/base.rb
class ProductPage::Base
class << self
def new(url)
raise TypeError, "ProductPage::Base cannot be instantiated" if self == ProductPage::Base
super
end
def all
urls.collect {|url| new(url) }
end
def save_all!
puts "Parsing the pages"
pages = all
puts "\nUpdating the database"
pages.each(&:save!)
end
end
attr_reader :url
def initialize(product_url)
#...
end
def save!
#...
end
end
# in lib/product_page/some_company.rb
class ProductPage::SomeCompany < ProductPage::Base
class << self
def urls
#...
end
end
def name
#...
end
#...
end
This is really my first attempt at creating my own Ruby classes that aren't subclassing anything from Rails, so I think I need some help. First of all, ProductScraper::Base should be - in Java terms, at least - an abstract class. It should never be instantiated. Neither should its nested class (module, in this case): ProductScraper::Base::ProductPage. But what I want to do is created a bunch of classes that subclass ProductScraper::Base and it's nested class. I also want to be able to refer to the more concrete classes from ProductScraper::Base, like is done in the the product_pages method.
Should ProductScraper::Base and ProductScraper::Base::ProductPage be classes or should they be modules? If they should be classes, how do I make them "abstract" classes? Are there any shortcuts to use instead of the full-qualified class names that I use here?
Thanks for the help!