#!/usr/bin/env ruby

DEBUG = true

def is_linux?
  RUBY_PLATFORM.downcase.include?("linux")
end

def debug_str(str)
  puts str if DEBUG
end

def min3(a,b,c)
  m = a < b ? a : b
  return m < c ? m : c
end

def arr_diff(a1,a2)
  res = 0
  for i in 0..25 do
    res += (a1[i]-a2[i]).abs
  end
  return res
end

def calc_levenshtein_distance(s,t)

  return calc_levenshtein_distance(t,s) if s.length<t.length
  n = s.length
  m = t.length

  return m if n==0
  return n if m==0

  d = Array.new(m+1).map!{Array.new(n+1).map!{50}}
  d[0][0]=0

  p = (n==m) ? 1 : 0 #добавление диагонали справа
  q = (n-m<=1)? 1 : 0 #добавление диагонали слева

  for j in 1..[(n-m+p),n].min
    d[0][j]=d[0][j-1]+1
  end

  for i in 1..m do
    for j in (i - q)..[(i + n - m + p),n].min do
      if j==0
        d[i][j] = d[i-1][j]+1
      else
        cost = 1
        cost = 0 if t[i-1] == s[j-1]
        d[i][j] = min3(d[i-1][j] + 1 , d[i][j-1] + 1 , d[i-1][j-1] + cost)
      end
    end
  end

  return d[m][n]
end

def get_possible_edits(word)
  letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'.split(//)
  res = []

  for i in 0..(word.length-1)
    str = String.new(word)
    str[i] = ''
    res << str
  end
  for c in letters
    for i in 0..(word.length-1)
      next if word[i]==c
      str = String.new(word)
      str[i]=c
      res << str
    end
  end
  for c in letters
    for i in 0..(word.length)
      str = String.new(word).insert(i,c)
      res << str
    end
  end
  return res
end

def find_word(word, dic)

  if dic[word.length]
    return {:word => word, :cost => 0} if dic[word.length].include?(word)
  end

  minimum_errors = 1 # если в словаре слова нету, то в нём как минимум одна ошибка

  edits = get_possible_edits(word)
  edits.each do |edit|
    return {:word => edit, :cost => 1} if dic[edit.length] ? dic[edit.length].include?(edit) : false
  end

  minimum_errors = 2 # если в массиве ошибок первого порядка словарных слов не нашлось, то в слове как минимум две ошибки

  if word.length<=6
    edits.each do |edit|
      edits2 = get_possible_edits(edit)
      edits2.each do |edit2|
        return {:word => edit2, :cost => 2} if dic[edit2.length] ? dic[edit2.length].include?(edit2) : false
      end
    end
    minimum_errors = 3
  end
 
  result_word = ''
  result_cost = 999

  wordhash = Array.new(26,0)
  word.each_byte{|b| wordhash[b-65]+=1}

  for offset in 0..15 do
    return {:word => result_word, :cost => result_cost} if ((result_cost<=offset) or (result_cost<=minimum_errors))
    indexes = [word.length-offset,word.length+offset].uniq
    indexes.each do |index|
      next unless dic[index]
      dictionary_part = dic[index]

      if dictionary_part[dictionary_part.keys.first].max==0  #если для текущего раздела словаря не посчитаны массивы, то считаем их
        dictionary_part.each do |key, val|
          key.each_byte{|b| val[b-65]+=1}
        end
      end

      lower_bound = (minimum_errors-offset>0 ? minimum_errors-offset : 0).abs%2 + offset
      higher_bound = 2*result_cost - offset

      dictionary_part.each do |dic_word, dic_wordhash|

        a = arr_diff(dic_wordhash,wordhash)
        next if a<lower_bound or a>higher_bound

        ld = calc_levenshtein_distance(word, dic_word)
        if ld<result_cost
          result_word = dic_word
          result_cost = ld
          higher_bound = 2*result_cost - offset
        end
      end
    end
  end
  
  return {:word => result_word, :cost => result_cost}
end

ts1 = Time.now

dic_path = 'g:\\My Dropbox\\facebook puzzles\\breathalyzer\\breath\\lib\\twl06.txt'
if is_linux?
  dic_path = '/var/tmp/twl06.txt'
end

str = ''
File.open(dic_path){|f| str = f.read}
dic_words = str.split(/[\s\n]+/)

dictionary = {}
dic_words.each do |w|
  dictionary[w.length] ||= {}
  dictionary[w.length][w] = Array.new(26,0)
end


words = []
str = ''
File.open(ARGV.first,'r') do |f|
  str = f.read
end
words = str.upcase.split(/\s+/)



debug_str 'reading done in '+(Time.now-ts1).to_s+' seconds'

ts2 = Time.now
total_cost = 0

already_found = {}

words.each do |word|
  res = already_found.include?(word) ? already_found[word] : find_word(word, dictionary)
  already_found[word] ||= res

  total_cost += res[:cost]
  debug_str word+' : '+res[:cost].to_s+' : '+res[:word]
end

puts total_cost.to_s
debug_str 'reading: '+(ts2-ts1).to_s+' seconds'+'     calculating: '+(Time.now-ts2).to_s+' seconds'