#!/usr/bin/env ruby

DEBUG = true

def is_linux?
  RUBY_PLATFORM.downcase.include?("linux")
end

def debug_str(str)
  puts str if DEBUG
end

def calc_levenshtein_distance(s,t) 

  n = s.length
  m = t.length

  return m if n==0
  return n if m==0

  d = Array.new(n+1).map!{Array.new(m+1)}

  for i in 0..n
    d[i][0]=i
  end

  for j in 0..m
    d[0][j]=j
  end

  for i in 1..n
    for j in 1..m do
      if s[i-1]==t[j-1]
        cost=0
      else
        cost=1
      end
      d[i][j] = [d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1] + cost].min
    end
  end

  return d[n][m]

end

def get_possible_edits(word)
  letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'.split(//)
  res = []

  for i in 0..(word.length-1)
    str = String.new(word)
    str[i] = ''
    res << str
  end
  for c in letters
    for i in 0..(word.length-1)
      next if word[i]==c
      str = String.new(word)
      str[i]=c
      res << str
    end
  end
  for c in letters
    for i in 0..(word.length)
      str = String.new(word).insert(i,c)
      res << str
    end
  end
  return res
end

def find_word(word, dic)

  if dic[word.length]
    return {:word => word, :cost => 0} if dic[word.length].include?(word)
  end
  
  minimum_errors = 1 # если в словаре слова нету, то в нём как минимум одна ошибка
  edits = get_possible_edits(word)
  edits.each do |edit|
    return {:word => edit, :cost => 1} if dic[edit.length] ? dic[edit.length].include?(edit) : false
  end
  
  minimum_errors = 2 # если в массиве ошибок первого порядка словарных слов не нашлось, то в слове как минимум две ошибки
  
  if word.length<=6
    edits.each do |edit|
      edits2 = get_possible_edits(edit)
      edits2.each do |edit2|
        return {:word => edit2, :cost => 2} if dic[edit2.length] ? dic[edit2.length].include?(edit2) : false
      end
    end
    minimum_errors = 3
  end

 
  result_word = ''
  result_cost = 999

  for offset in 0..15 do
    return {:word => result_word, :cost => result_cost} if ((result_cost<=offset) or (result_cost<=minimum_errors))
    indexes = [word.length-offset,word.length+offset].uniq
    indexes.each do |index|
      next unless dic[index]
      dictionary_part = dic[index]
      dictionary_part.each do |dic_word|
        ld = calc_levenshtein_distance(word, dic_word)
        if ld<result_cost
          result_word = dic_word
          result_cost = ld
        end
      end
    end
  end
  
  return {:word => result_word, :cost => result_cost}
end

ts1 = Time.now

dic_path = 'g:\\My Dropbox\\facebook puzzles\\breathalyzer\\breath\\lib\\twl06.txt'
dic_path = 'c:\\Dropbox\\facebook puzzles\\breathalyzer\\breath\\lib\\twl06.txt'
if is_linux?
  dic_path = '/var/tmp/twl06.txt'
end

str = ''
File.open(dic_path){|f| str = f.read}
dic_words = str.split(/[\s\n]+/)

dictionary = {}
dic_words.each do |w|
  dictionary[w.length] ||= []
  dictionary[w.length] << w
end


words = []
str = ''
File.open(ARGV.first,'r') do |f|
  str = f.read
end
words = str.upcase.split(/\s+/)



debug_str 'reading done in '+(Time.now-ts1).to_s+' seconds'

ts2 = Time.now
total_cost = 0

already_found = {}

words.each do |word|
  res = already_found.include?(word) ? already_found[word] : find_word(word, dictionary)
  already_found[word] ||= res

  total_cost += res[:cost]
  debug_str word+' : '+res[:cost].to_s+' : '+res[:word]
end

puts total_cost.to_s
debug_str 'reading: '+(ts2-ts1).to_s+' seconds'+'     calculating: '+(Time.now-ts2).to_s+' seconds'