Weighted edit distance

The weighted edit distance E W ( s 1 , s 2 ) subscript 𝐸 𝑊 subscript 𝑠 1 subscript 𝑠 2 E_{W}(s_{1},s_{2}) is a generalization of the Levenshtien distance, where we assign different costs to substitutions, insertions, and deletions.

:{
data Weights = Weights { subst :: Int, delete :: Int, insert :: Int }

defaultWeights :: Weights
defaultWeights = Weights { subst = 1, delete = 1, insert = 1 }

wlev :: (Eq a) => Weights -> [a] -> [a] -> Int
wlev w [] b = insert w * length b
wlev w a [] = delete w * length a
wlev w (a:as) (b:bs) | a == b = wlev w as bs
                  | otherwise = minimum [delete w + wlev w as (b:bs), insert w + wlev w (a:as) bs, subst w + wlev w as bs]

:}

wlev (defaultWeights { delete = 2 }) "thatch" "hatch"
wlev (defaultWeights { insert = 2 }) "abcdef" "bcdefa"

Note that this is essentially the following algorithm, but where we don't store the costs.

:{
import Data.Foldable
import Data.Function

data Weights = Weights { subst :: Int, delete :: Int, insert :: Int }

defaultWeights :: Weights
defaultWeights = Weights { subst = 1, delete = 1, insert = 1 }

data Edit a = Ok a | Subst a a | Insert a | Delete a
  deriving (Show)

cost :: Weights -> Edit a -> Int
cost w (Ok _) = 0
cost w (Subst _ _) = subst w
cost w (Insert _) = insert w
cost w (Delete _) = delete w

edits :: (Eq a) => Weights -> [a] -> [a] -> [Edit a]
edits w [] bs = fmap Insert bs
edits w as [] = fmap Delete as
edits w (a:as) (b:bs) | a == b = Ok a : edits w as bs
                      | otherwise = minimumBy (compare `on` (sum . fmap (cost w))) $ [Delete a : edits w as (b:bs), Insert b : edits w (a:as) bs, Subst a b : edits w as bs]
:}

edits (defaultWeights { delete = 2 }) "thatch" "hatch"