Weighted edit distance
The weighted edit distance is a generalization of the Levenshtien distance, where we assign different costs to substitutions, insertions, and deletions.
:{
data Weights = Weights { subst :: Int, delete :: Int, insert :: Int }
defaultWeights :: Weights
= Weights { subst = 1, delete = 1, insert = 1 }
defaultWeights
wlev :: (Eq a) => Weights -> [a] -> [a] -> Int
= insert w * length b
wlev w [] b = delete w * length a
wlev w a [] :as) (b:bs) | a == b = wlev w as bs
wlev w (a| otherwise = minimum [delete w + wlev w as (b:bs), insert w + wlev w (a:as) bs, subst w + wlev w as bs]
:}
= 2 }) "thatch" "hatch"
wlev (defaultWeights { delete = 2 }) "abcdef" "bcdefa" wlev (defaultWeights { insert
Note that this is essentially the following algorithm, but where we don't store the costs.
:{
import Data.Foldable
import Data.Function
data Weights = Weights { subst :: Int, delete :: Int, insert :: Int }
defaultWeights :: Weights
= Weights { subst = 1, delete = 1, insert = 1 }
defaultWeights
data Edit a = Ok a | Subst a a | Insert a | Delete a
deriving (Show)
cost :: Weights -> Edit a -> Int
Ok _) = 0
cost w (Subst _ _) = subst w
cost w (Insert _) = insert w
cost w (Delete _) = delete w
cost w (
edits :: (Eq a) => Weights -> [a] -> [a] -> [Edit a]
= fmap Insert bs
edits w [] bs = fmap Delete as
edits w as [] :as) (b:bs) | a == b = Ok a : edits w as bs
edits w (a| otherwise = minimumBy (compare `on` (sum . fmap (cost w))) $ [Delete a : edits w as (b:bs), Insert b : edits w (a:as) bs, Subst a b : edits w as bs]
:}
= 2 }) "thatch" "hatch" edits (defaultWeights { delete