def pearson(first,second)
common = first.select{|title,rating| second.include?(title) }
return if common.empty?
first_sum = common.inject(0){|total,c| total + first[c.first]}
second_sum = common.inject(0){|total,c| total + second[c.first]}
first_squares = common.inject(0){|total,c| total + first[c.first] ** 2}
second_squares = common.inject(0){|total,c| total + second[c.first] ** 2}
density = Math.sqrt((first_squares - first_sum ** 2 / common.size)*(second_squares - second_sum ** 2 / common.size))
products = common.inject(0){|total, c| total + (first[c.first] * second[c.first])}
return if density.zero?
return (products-(first_sum*second_sum/common.size))/density
end
Refactorings
No refactoring yet !
Brianthecoder
March 13, 2008, March 13, 2008 15:17, permalink
Here's a spec
before(:each) do
@critics = {
"Lisa Rose" => {"Lady in the Water" => 2.5, "Snakes on a Plane" => 3.5,
"Just My Luck" => 3.0, "Superman Returns" => 3.5, "You, Me, and Dupree" => 2.5,
"The Night Listener" => 3.0},
"Gene Seymour" => {"Lady in the Water" => 3.0, "Snakes on a Plane" => 3.5,
"Just My Luck" => 1.5, "Superman Returns" => 5.0, "You, Me, and Dupree" => 3.5,
"The Night Listener" => 3.0},
"Michael Phillips" => {"Lady in the Water" => 2.5, "Snakes on a Plane" => 3.0,
"Superman Returns" => 3.5, "The Night Listener" => 4.0},
"Claudia Puig" => {"Snakes on a Plane" => 3.5, "Just My Luck" => 3.0,
"Superman Returns" => 4.0, "You, Me, and Dupree" => 2.5, "The Night Listener" => 4.5},
"Mick LaSalle" => {"Lady in the Water" => 3.0, "Snakes on a Plane" => 4.0,
"Just My Luck" => 2.0, "Superman Returns" => 3.0, "You, Me, and Dupree" => 2.0,
"The Night Listener" => 3.0},
"Jack Matthews" => {"Lady in the Water" => 3.0, "Snakes on a Plane" => 4.0, "Superman Returns" => 5.0,
"You, Me, and Dupree" => 3.5, "The Night Listener" => 3.0},
"Toby" => {"Snakes on a Plane" => 4.5, "Superman Returns" => 4.0, "You, Me, and Dupree" => 1.0}
}
end
it "should calculate peason similarity" do
pearson(@critics["Lisa Rose"], @critics["Gene Seymour"]).should be_close(0.396059017191, 0.000001)
end
clonecd477
May 9, 2011, May 09, 2011 00:30, permalink
Industry is the soul of business and the keystone of prosperity.
This was ported from python, but I know it can be done better.