Boyer-Moore string matching

Implementation

The following gives a naive specification of substring matching, taken from "Pearls of functional algorith design".

:{
import Data.List

isPrefix :: (Eq a) => [a] -> [a] -> Bool
isPrefix [] ys = True
isPrefix xs [] = False
isPrefix (x:xs) (y:ys) = x == y && isPrefix xs ys

isSuffix :: (Eq a) => [a] -> [a] -> Bool
isSuffix xs ys = isPrefix (reverse xs) (reverse ys)

naiveMatch :: (Eq a) => [a] -> [a] -> [Int]
naiveMatch pat = map length . filter (isSuffix pat) . inits
:}

naiveMatch "abcab" "ababcabcab"

We can massage this a bit to get the following.

:{
scanMatch :: (Eq a) => [a] -> [a] -> [Int]
scanMatch pat = map fst . filter (isPrefix (reverse pat) . snd) . scanl step (0, [])
  where
    step (n, sx) x = (n + 1, x : sx)
:}

scanMatch "ana" "banana"

Here, the scanl call generates increasing windows of reversed portions of the text, along with their starting positions.

scanl (\(n, sx) x -> (n + 1, x : sx)) (0, []) "banana"

Doodles

According to Wikipedia, we need two arrays L 𝐿 L and H 𝐻 H for a pattern P 𝑃 P :

:{
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE BlockArguments #-}
import Data.Functor
import Data.List

-- | Length of the longest common prefix.
llcp :: (Eq a) => [a] -> [a] -> Int
llcp [] ys = 0
llcp xs [] = 0
llcp (x : xs) (y : ys) = if x == y then 1 + llcp xs ys else 0

lcp :: (Eq a) => [a] -> [a] -> [a]
lcp [] ys = []
lcp xs [] = []
lcp (x : xs) (y : ys) = if x == y then x : lcp xs ys else []

prefix :: (Eq a) => [a] -> [Int]
prefix xs = map (llcp xs) (tails xs)

prefixes :: (Eq a) => [a] -> [[a]]
prefixes xs = map (lcp xs) (tails xs)

border :: (Eq a) => [a] -> [Int]
border xs = scanl step 0 (drop 1 xs)
  where
    step b x = if x == xs !! b then b + 1 else 0

loop :: Int -> (forall r. (Int -> a -> r) -> Int -> r) -> [Maybe a]
loop n set =
  let ixs = [n-1, n-2..0] <&> \i -> set (\j a -> (j, a)) i
  in [0..n-1] <&> \i -> lookup i ixs

shifts :: (Eq a) => [a] -> [Maybe Int]
shifts xs =
  let n = length xs
      ps = reverse (prefix (reverse xs))
  in loop n \set i ->
    let j = ps !! i - 1
    in set j i
:}

inits "cabdabdab"
fmap reverse $ reverse (prefixes (reverse "cabdabdab"))
reverse (prefix (reverse "cabdabdab"))
shifts "abcd"