Weighted edit distance
The weighted edit distance is a generalization of the Levenshtien distance, where we assign different costs to substitutions, insertions, and deletions.
:{
data Weights = Weights { subst :: Int, delete :: Int, insert :: Int }
defaultWeights :: Weights
defaultWeights = Weights { subst = 1, delete = 1, insert = 1 }
wlev :: (Eq a) => Weights -> [a] -> [a] -> Int
wlev w [] b = insert w * length b
wlev w a [] = delete w * length a
wlev w (a:as) (b:bs) | a == b = wlev w as bs
| otherwise = minimum [delete w + wlev w as (b:bs), insert w + wlev w (a:as) bs, subst w + wlev w as bs]
:}
wlev (defaultWeights { delete = 2 }) "thatch" "hatch"
wlev (defaultWeights { insert = 2 }) "abcdef" "bcdefa"Note that this is essentially the following algorithm, but where we don't store the costs.
:{
import Data.Foldable
import Data.Function
data Weights = Weights { subst :: Int, delete :: Int, insert :: Int }
defaultWeights :: Weights
defaultWeights = Weights { subst = 1, delete = 1, insert = 1 }
data Edit a = Ok a | Subst a a | Insert a | Delete a
deriving (Show)
cost :: Weights -> Edit a -> Int
cost w (Ok _) = 0
cost w (Subst _ _) = subst w
cost w (Insert _) = insert w
cost w (Delete _) = delete w
edits :: (Eq a) => Weights -> [a] -> [a] -> [Edit a]
edits w [] bs = fmap Insert bs
edits w as [] = fmap Delete as
edits w (a:as) (b:bs) | a == b = Ok a : edits w as bs
| otherwise = minimumBy (compare `on` (sum . fmap (cost w))) $ [Delete a : edits w as (b:bs), Insert b : edits w (a:as) bs, Subst a b : edits w as bs]
:}
edits (defaultWeights { delete = 2 }) "thatch" "hatch"