$c = []
def fork_reality(*l)
callcc {|c| $c << c }
$c.pop if l.size <= 1
l.shift
end
def collect_realities
$c.last.call until $c.empty?
end
def contract_sentence(arr, index = 0, res = [])
return res.compact.join(' ') if index == arr.size
skip = 1
x = arr[index]
nxt = arr[index+1]
nxt2 = arr[index+2]
r = case
when x == 'a' && (nxt =~ /^[aeiuo]/ || nxt =~ /^hono/ || nxt =~ /^honest/ || nxt =~ /^hour/)
'an'
when x == 'I' && nxt == 'am'
skip = 2
fork_reality("I'm", "I am")
when %w(you we they).include?(x) && %w(are have).include?(nxt) && nxt2 == 'not'
skip = 3
fork_reality("#{x}'#{nxt[-2,2]} not", "#{x} #{nxt}n't", "#{x} #{nxt} not")
when %w(you we they).include?(x) && %w(are have).include?(nxt)
skip = 3
fork_reality("#{x}'re", "#{x} #{nxt}")
when %w(he she it).include?(x) && %w(is has).include?(nxt) && nxt2 == 'not'
skip = 3
fork_reality("#{x} #{nxt}n't", "#{x} #{nxt} not")
when %w(he she it).include?(x) && %w(is has).include?(nxt)
skip = 2
fork_reality("#{x}'s", "#{x} #{nxt}")
when %w(is are).include?(x) && nxt == 'not'
skip = 2
fork_reality("#{x}n't", "#{x} #{nxt}")
else
x
end
contract_sentence(arr, index + skip, res + [r])
end
r = []
r << contract_sentence(%w(I am green but you are not white and Bob is not a honest man while Alice and Ina are not purple))
collect_realities
raise unless r.include? "I'm green but you aren't white and Bob isn't an honest man while Alice and Ina aren't purple"
raise unless r.include? "I'm green but you are not white and Bob is not an honest man while Alice and Ina are not purple"
raise unless r.include? "I am green but you are not white and Bob is not an honest man while Alice and Ina aren't purple"
p r
Refactorings
No refactoring yet !
sjs
October 14, 2007, October 14, 2007 23:10, permalink
There is a bug on line 29. The first argument should be "#{x}'#{nxt[-2,2]}", as you used on line 26.
Removing the global is easy -- wrap it in a class.
I don't think I'm going to tackle refactoring it. It's quite an elegant piece of code with the continuations, imo.
class Contracter
attr_reader :words
def self.contract(sentence)
new(sentence).contract
end
def initialize(sentence)
@forks = []
@words = sentence.respond_to?(:split) ?
sentence.gsub(/[.,?!]/, '').split(/\s+/) : words
end
def contract
(res = []) << contract_sentence
collect_realities
res
end
private
def contract_sentence(index = 0, res = [])
return res.compact.join(' ') if index == words.size
skip = 1
x = words[index]
nxt = words[index+1]
nxt2 = words[index+2]
r = case
when x == 'a' && (nxt =~ /^[aeiuo]/ || nxt =~ /^hono/ || nxt =~ /^honest/ || nxt =~ /^hour/)
'an'
when x == 'I' && nxt == 'am'
skip = 2
fork_reality("I'm", "I am")
when %w(you we they).include?(x) && %w(are have).include?(nxt) && nxt2 == 'not'
skip = 3
fork_reality("#{x}'#{nxt[-2,2]} not", "#{x} #{nxt}n't", "#{x} #{nxt} not")
when %w(you we they).include?(x) && %w(are have).include?(nxt)
skip = 3
fork_reality("#{x}'#{nxt[-2,2]}", "#{x} #{nxt}")
when %w(he she it).include?(x) && %w(is has).include?(nxt) && nxt2 == 'not'
skip = 3
fork_reality("#{x} #{nxt}n't", "#{x} #{nxt} not")
when %w(he she it).include?(x) && %w(is has).include?(nxt)
skip = 2
fork_reality("#{x}'s", "#{x} #{nxt}")
when %w(is are).include?(x) && nxt == 'not'
skip = 2
fork_reality("#{x}n't", "#{x} #{nxt}")
else
x
end
contract_sentence(index + skip, res + [r])
end
def fork_reality(*l)
callcc {|c| @forks << c }
@forks.pop if l.size <= 1
l.shift
end
def collect_realities
@forks.last.call until @forks.empty?
end
end
p Contracter.contract("I am green but you are not white and Bob is not a honest man while Alice and Ina are not purple.")
This code tries to find all variations of an English sentence produced by contracting pronouns and modal verbs.
To see what it does, just run it, or look at the assertions at the bottom.
It's a fragment of a problem that my start-up has. I'm trying to collect all translations of a given sentence, and suddenly a previously trivial method turns out to have more than one possible answer in one edge case, and I have to go on a huge refactoring spree to accomodate that.
I'm now exploring a general solution that uses continuations. I hope this will it easier to add new rules to my code.
Can you do it cleaner? Using "amb"? Without the global variable? Shorter and without continuations? Using a list monad? Using other techniques?