Boyer-Moore string matching
Implementation
The following gives a naive specification of substring matching, taken from "Pearls of functional algorith design".
:{
import Data.List
isPrefix :: (Eq a) => [a] -> [a] -> Bool
isPrefix [] ys = True
isPrefix xs [] = False
isPrefix (x:xs) (y:ys) = x == y && isPrefix xs ys
isSuffix :: (Eq a) => [a] -> [a] -> Bool
isSuffix xs ys = isPrefix (reverse xs) (reverse ys)
naiveMatch :: (Eq a) => [a] -> [a] -> [Int]
naiveMatch pat = map length . filter (isSuffix pat) . inits
:}
naiveMatch "abcab" "ababcabcab"We can massage this a bit to get the following.
:{
scanMatch :: (Eq a) => [a] -> [a] -> [Int]
scanMatch pat = map fst . filter (isPrefix (reverse pat) . snd) . scanl step (0, [])
where
step (n, sx) x = (n + 1, x : sx)
:}
scanMatch "ana" "banana"
Here, the scanl call generates increasing windows of
reversed portions of the text, along with their starting
positions.
scanl (\(n, sx) x -> (n + 1, x : sx)) (0, []) "banana"Doodles
According to Wikipedia, we need two arrays and for a pattern :
- is the largest position such that is a non-extendable suffix of .
- is the length of the largest suffix of that is a prefix of . I think that this is the same as a border array of the reversal ?
:{
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE BlockArguments #-}
import Data.Functor
import Data.List
-- | Length of the longest common prefix.
llcp :: (Eq a) => [a] -> [a] -> Int
llcp [] ys = 0
llcp xs [] = 0
llcp (x : xs) (y : ys) = if x == y then 1 + llcp xs ys else 0
lcp :: (Eq a) => [a] -> [a] -> [a]
lcp [] ys = []
lcp xs [] = []
lcp (x : xs) (y : ys) = if x == y then x : lcp xs ys else []
prefix :: (Eq a) => [a] -> [Int]
prefix xs = map (llcp xs) (tails xs)
prefixes :: (Eq a) => [a] -> [[a]]
prefixes xs = map (lcp xs) (tails xs)
border :: (Eq a) => [a] -> [Int]
border xs = scanl step 0 (drop 1 xs)
where
step b x = if x == xs !! b then b + 1 else 0
loop :: Int -> (forall r. (Int -> a -> r) -> Int -> r) -> [Maybe a]
loop n set =
let ixs = [n-1, n-2..0] <&> \i -> set (\j a -> (j, a)) i
in [0..n-1] <&> \i -> lookup i ixs
shifts :: (Eq a) => [a] -> [Maybe Int]
shifts xs =
let n = length xs
ps = reverse (prefix (reverse xs))
in loop n \set i ->
let j = ps !! i - 1
in set j i
:}
inits "cabdabdab"
fmap reverse $ reverse (prefixes (reverse "cabdabdab"))
reverse (prefix (reverse "cabdabdab"))
shifts "abcd"