-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Weighted Regular Expression Matcher
--   
--   Haskell implementation of a weighted regular expression matcher with
--   linear worst-case time and space bounds.
@package weighted-regexp
@version 0.3.1.2


-- | This library provides a type class for semirings and instances for
--   standard data types.
module Data.Semiring

-- | A semiring is an additive commutative monoid with identity
--   <a>zero</a>:
--   
--   <pre>
--           a .+. b  ==  b .+. a
--        zero .+. a  ==  a
--   (a .+. b) .+. c  ==  a .+. (b .+. c)
--   </pre>
--   
--   A semiring is a multiplicative monoid with identity <a>one</a>:
--   
--   <pre>
--         one .*. a  ==  a
--         a .*. one  ==  a
--   (a .*. b) .*. c  ==  a .*. (b .*. c)
--   </pre>
--   
--   Multiplication distributes over addition:
--   
--   <pre>
--   a .*. (b .+. c)  ==  (a .*. b) .+. (a .*. c)
--   (a .+. b) .*. c  ==  (a .*. c) .+. (b .*. c)
--   </pre>
--   
--   <a>zero</a> annihilates a semiring with respect to multiplication:
--   
--   <pre>
--   zero .*. a  ==  zero
--   a .*. zero  ==  zero
--   </pre>
--   
--   All laws should hold with respect to the required <a>Eq</a> instance.
--   
--   For example, the Booleans form a semiring.
--   
--   <ul>
--   <li><tt>False</tt> is an identity of disjunction which is commutative
--   and associative,</li>
--   <li><tt>True</tt> is an identity of conjunction which is
--   associative,</li>
--   <li>conjunction distributes over disjunction, and</li>
--   <li><tt>False</tt> annihilates the Booleans with respect to
--   conjunction.</li>
--   </ul>
class Eq s => Semiring s
zero, one :: Semiring s => s
(.+., .*.) :: Semiring s => s -> s -> s

-- | Auxiliary function to convert Booleans to an arbitrary semiring.
fromBool :: Semiring s => Bool -> s

-- | Wrapper for numeric types.
--   
--   Every numeric type that satisfies the semiring laws (as all predefined
--   numeric types do) is a semiring.
newtype Numeric a
Numeric :: a -> Numeric a
getNumeric :: Numeric a -> a
instance Eq a => Eq (Numeric a)
instance Num a => Num (Numeric a)
instance (Eq a, Num a) => Semiring (Numeric a)
instance Show a => Show (Numeric a)
instance Semiring Bool


-- | This module exports internal data types and matching functions. You do
--   not need to import it unless you want to write your own matching
--   algorithms.
module Text.RegExp.Internal

-- | Regular expressions are represented as values of type <a>RegExp</a>
--   <tt>c</tt> where <tt>c</tt> is the character type of the underlying
--   alphabet. Values of type <tt>RegExp</tt> <tt>c</tt> can be matched
--   against lists of type <tt>[c]</tt>.
newtype RegExp c
RegExp :: (forall w. Semiring w => RegW w c) -> RegExp c
data RegW w c
RegW :: !Bool -> !w -> !w -> !(Reg w c) -> RegW w c
active :: RegW w c -> !Bool
empty :: RegW w c -> !w
final_ :: RegW w c -> !w
reg :: RegW w c -> !(Reg w c)
final :: Semiring w => RegW w c -> w
data Reg w c
Eps :: Reg w c
Sym :: String -> (c -> w) -> Reg w c
Alt :: (RegW w c) -> (RegW w c) -> Reg w c
Seq :: (RegW w c) -> (RegW w c) -> Reg w c
Rep :: (RegW w c) -> Reg w c
class Semiring w => Weight a b w
symWeight :: Weight a b w => (a -> w) -> b -> w
defaultSymWeight :: (a -> w) -> a -> w
weighted :: Weight a b w => RegW w a -> RegW w b

-- | Matches the empty word. <a>eps</a> has no direct string representation
--   but is used to implement other constructs such as optional components
--   like <tt>a?</tt>.
eps :: RegExp c
epsW :: Semiring w => RegW w c

-- | Matches the given character.
char :: Char -> RegExp Char

-- | Matches the given symbol.
sym :: (Eq c, Show c) => c -> RegExp c
quote :: Char -> String

-- | Matches a symbol that satisfies the given predicate.
psym :: String -> (c -> Bool) -> RegExp c
symW :: Semiring w => String -> (c -> w) -> RegW w c

-- | Matches an arbitrary symbol.
anySym :: RegExp c

-- | Does not match anything. <a>noMatch</a> is an identity for <a>alt</a>.
noMatch :: RegExp c

-- | Matches either of two regular expressions. For example <tt>a+b</tt>
--   matches either the character <tt>a</tt> or the character <tt>b</tt>.
alt :: RegExp c -> RegExp c -> RegExp c
altW :: Semiring w => RegW w c -> RegW w c -> RegW w c

-- | Matches the sequence of two regular expressions. For example the
--   regular expressions <tt>ab</tt> matches the word <tt>ab</tt>.
seq_ :: RegExp c -> RegExp c -> RegExp c
seqW :: Semiring w => RegW w c -> RegW w c -> RegW w c

-- | Matches zero or more occurrences of the given regular expression. For
--   example <tt>a*</tt> matches the character <tt>a</tt> zero or more
--   times.
rep :: RegExp c -> RegExp c
repW :: Semiring w => RegW w c -> RegW w c

-- | Matches one or more occurrences of the given regular expression. For
--   example <tt>a+</tt> matches the character <tt>a</tt> one or more
--   times.
rep1 :: RegExp c -> RegExp c

-- | Matches the given regular expression or the empty word. Optional
--   expressions are usually written <tt>a?</tt> but could also be written
--   <tt>(|a)</tt>, that is, as alternative between <a>eps</a> and
--   <tt>a</tt>.
opt :: RegExp c -> RegExp c

-- | Matches a regular expression a given number of times. For example, the
--   regular expression <tt>a{4,7}</tt> matches the character <tt>a</tt>
--   four to seven times. If the minimal and maximal occurences are
--   identical, one can be left out, that is, <tt>a{2}</tt> matches two
--   occurrences of the character <tt>a</tt>.
--   
--   Numerical bounds are implemented via translation into ordinary regular
--   expressions. For example, <tt>a{4,7}</tt> is translated into
--   <tt>aaaa(a(a(a)?)?)?</tt>.
brep :: (Int, Int) -> RegExp c -> RegExp c
regW :: Semiring w => RegExp c -> RegW w c

-- | Checks whether a regular expression matches the given word. For
--   example, <tt>acceptFull (fromString "b|abc") "b"</tt> yields
--   <tt>True</tt> because the first alternative of <tt>b|abc</tt> matches
--   the string <tt>"b"</tt>.
acceptFull :: RegExp c -> [c] -> Bool

-- | Checks whether a regular expression matches a subword of the given
--   word. For example, <tt>acceptPartial (fromString "b") "abc"</tt>
--   yields <tt>True</tt> because <tt>"abc"</tt> contains the substring
--   <tt>"b"</tt>.
acceptPartial :: RegExp c -> [c] -> Bool

-- | Computes in how many ways a word can be matched against a regular
--   expression.
matchingCount :: (Eq a, Num a) => RegExp c -> [c] -> a

-- | Matches a regular expression against a word computing a weight in an
--   arbitrary semiring.
--   
--   The symbols can have associated weights associated by the
--   <a>symWeight</a> function of the <a>Weight</a> class. This function
--   also allows to adjust the type of the used alphabet such that, for
--   example, positional information can be taken into account by
--   <a>zip</a>ping the word with positions.
fullMatch :: Weight a b w => RegExp a -> [b] -> w

-- | Matches a regular expression against substrings of a word computing a
--   weight in an arbitrary semiring. Similar to <a>fullMatch</a> the
--   <a>Weight</a> class is used to associate weights to the symbols of the
--   regular expression.
partialMatch :: Weight a b w => RegExp a -> [b] -> w
matchW :: Semiring w => RegW w c -> [c] -> w
shiftW :: Semiring w => w -> RegW w c -> c -> RegW w c
shift :: Semiring w => w -> Reg w c -> c -> RegW w c


-- | This library provides properties for the <a>Semiring</a> type class
--   that can be checked using libraries like QuickCheck or SmallCheck.
module Data.Semiring.Properties

-- | <pre>
--   a .+. b  ==  b .+. a
--   </pre>
plus'comm :: Semiring s => s -> s -> Bool

-- | <pre>
--   zero .+. a  ==  a
--   </pre>
left'zero :: Semiring s => s -> Bool

-- | <pre>
--   (a .+. b) .+. c  ==  a .+. (b .+. c)
--   </pre>
add'assoc :: Semiring s => s -> s -> s -> Bool

-- | <pre>
--   one .*. a  ==  a
--   </pre>
left'one :: Semiring s => s -> Bool

-- | <pre>
--   a .*. one  ==  a
--   </pre>
right'one :: Semiring s => s -> Bool

-- | <pre>
--   (a .*. b) .*. c  ==  a .*. (b .*. c)
--   </pre>
mul'assoc :: Semiring s => s -> s -> s -> Bool

-- | <pre>
--   a .*. (b .+. c)  ==  (a .*. b) .+. (a .*. c)
--   </pre>
left'distr :: Semiring s => s -> s -> s -> Bool

-- | <pre>
--   (a .+. b) .*. c  ==  (a .*. c) .+. (b .*. c)
--   </pre>
right'distr :: Semiring s => s -> s -> s -> Bool

-- | <pre>
--   zero .*. a  ==  zero
--   </pre>
left'ann :: Semiring s => s -> Bool

-- | <pre>
--   a .*. zero  ==  zero
--   </pre>
right'ann :: Semiring s => s -> Bool


-- | This library provides a simple and fast regular expression matcher
--   that is implemented in Haskell without binding to external libraries.
--   
--   There are different ways to implement regular expression matching.
--   Backtracking algorithms are simple but need bookkeeping overhead for
--   nondeterministic search. One can use deterministic finite automata
--   (DFA, see <a>http://swtch.com/~rsc/regexp/regexp1.html</a>) to match
--   regular expressions faster. But for certain regular expressions these
--   DFA are exponentially large which sometimes leads to prohibitive
--   memory requirements.
--   
--   We use a smart and simple algorithm to generate a DFA from a regular
--   expression and do not generate the DFA completely but on the fly while
--   parsing. This leads to a linear-time deterministic algorithm with
--   constant space requirements. More specifically, the run time is
--   limited by the product of the sizes of the regular expression and the
--   string and the memory is limited by the size of the regular
--   expression.
module Text.RegExp
class Semiring w => Weight a b w
symWeight :: Weight a b w => (a -> w) -> b -> w

-- | Regular expressions are represented as values of type <a>RegExp</a>
--   <tt>c</tt> where <tt>c</tt> is the character type of the underlying
--   alphabet. Values of type <tt>RegExp</tt> <tt>c</tt> can be matched
--   against lists of type <tt>[c]</tt>.
data RegExp c

-- | Parses a regular expression from its string representation. If the
--   <tt>OverloadedStrings</tt> language extension is enabled, string
--   literals can be used as regular expressions without using
--   <a>fromString</a> explicitly. Implicit conversion is especially useful
--   in combination with functions like <a>=~</a> that take a value of type
--   <tt>RegExp Char</tt> as argument.
--   
--   Here are some examples of supported regular expressions along with an
--   explanation what they mean:
--   
--   <ul>
--   <li><tt>a</tt> matches the character <tt>a</tt></li>
--   <li><tt>[abc]</tt> matches any of the characters <tt>a</tt>,
--   <tt>b</tt>, or <tt>c</tt>. It is equivalent to <tt>(a|b|c)</tt>, but
--   <tt>|</tt> can be used to specify alternatives between arbitrary
--   regular expressions, not only characters.</li>
--   <li><tt>[^abc]</tt> matches anything but the characters <tt>a</tt>,
--   <tt>b</tt>, or <tt>c</tt>.</li>
--   <li><tt>\d</tt> matches a digit and is equivalent to <tt>[0-9]</tt>.
--   Moreover, <tt>\D</tt> matches any non-digit character, <tt>\s</tt> and
--   <tt>\S</tt> match space and non-space characters and <tt>\w</tt> and
--   <tt>\W</tt> match word characters and non-word characters, that is,
--   <tt>\w</tt> abbreviates <tt>[a-zA-Z_]</tt>.</li>
--   <li><tt>a?</tt> matches the empty word or the character <tt>a</tt>,
--   <tt>a*</tt> matches zero or more occurrences of <tt>a</tt>, and
--   <tt>a+</tt> matches one or more <tt>a</tt>'s.</li>
--   <li><tt>.</tt> (the dot) matches one arbitrary character.</li>
--   <li><tt>a{4,7}</tt> matches four to seven occurrences of <tt>a</tt>,
--   <tt>a{2}</tt> matches two.</li>
--   </ul>
fromString :: String -> RegExp Char

-- | Matches the empty word. <a>eps</a> has no direct string representation
--   but is used to implement other constructs such as optional components
--   like <tt>a?</tt>.
eps :: RegExp c

-- | Matches the given character.
char :: Char -> RegExp Char

-- | Matches the given symbol.
sym :: (Eq c, Show c) => c -> RegExp c

-- | Matches a symbol that satisfies the given predicate.
psym :: String -> (c -> Bool) -> RegExp c

-- | Matches an arbitrary symbol.
anySym :: RegExp c

-- | Does not match anything. <a>noMatch</a> is an identity for <a>alt</a>.
noMatch :: RegExp c

-- | Matches either of two regular expressions. For example <tt>a+b</tt>
--   matches either the character <tt>a</tt> or the character <tt>b</tt>.
alt :: RegExp c -> RegExp c -> RegExp c

-- | Matches the sequence of two regular expressions. For example the
--   regular expressions <tt>ab</tt> matches the word <tt>ab</tt>.
seq_ :: RegExp c -> RegExp c -> RegExp c

-- | Matches zero or more occurrences of the given regular expression. For
--   example <tt>a*</tt> matches the character <tt>a</tt> zero or more
--   times.
rep :: RegExp c -> RegExp c

-- | Matches one or more occurrences of the given regular expression. For
--   example <tt>a+</tt> matches the character <tt>a</tt> one or more
--   times.
rep1 :: RegExp c -> RegExp c

-- | Matches the given regular expression or the empty word. Optional
--   expressions are usually written <tt>a?</tt> but could also be written
--   <tt>(|a)</tt>, that is, as alternative between <a>eps</a> and
--   <tt>a</tt>.
opt :: RegExp c -> RegExp c

-- | Matches a regular expression a given number of times. For example, the
--   regular expression <tt>a{4,7}</tt> matches the character <tt>a</tt>
--   four to seven times. If the minimal and maximal occurences are
--   identical, one can be left out, that is, <tt>a{2}</tt> matches two
--   occurrences of the character <tt>a</tt>.
--   
--   Numerical bounds are implemented via translation into ordinary regular
--   expressions. For example, <tt>a{4,7}</tt> is translated into
--   <tt>aaaa(a(a(a)?)?)?</tt>.
brep :: (Int, Int) -> RegExp c -> RegExp c

-- | Matches a sequence of the given regular expressions in any order. For
--   example, the regular expression
--   
--   <pre>
--   perm (map char "abc")
--   </pre>
--   
--   has the same meaning as
--   
--   <pre>
--   abc|acb|bca|bac|cba|cab
--   </pre>
--   
--   and is represented as
--   
--   <pre>
--   a(bc|cb)|b(ca|ac)|c(ba|ab)
--   </pre>
perm :: [RegExp c] -> RegExp c

-- | Alias for <a>acceptFull</a> specialized for Strings. Useful in
--   combination with the <tt>IsString</tt> instance for <a>RegExp</a>
--   <a>Char</a>
(=~) :: RegExp Char -> String -> Bool

-- | Checks whether a regular expression matches the given word. For
--   example, <tt>acceptFull (fromString "b|abc") "b"</tt> yields
--   <tt>True</tt> because the first alternative of <tt>b|abc</tt> matches
--   the string <tt>"b"</tt>.
acceptFull :: RegExp c -> [c] -> Bool

-- | Checks whether a regular expression matches a subword of the given
--   word. For example, <tt>acceptPartial (fromString "b") "abc"</tt>
--   yields <tt>True</tt> because <tt>"abc"</tt> contains the substring
--   <tt>"b"</tt>.
acceptPartial :: RegExp c -> [c] -> Bool

-- | Computes in how many ways a word can be matched against a regular
--   expression.
matchingCount :: (Eq a, Num a) => RegExp c -> [c] -> a

-- | Matches a regular expression against a word computing a weight in an
--   arbitrary semiring.
--   
--   The symbols can have associated weights associated by the
--   <a>symWeight</a> function of the <a>Weight</a> class. This function
--   also allows to adjust the type of the used alphabet such that, for
--   example, positional information can be taken into account by
--   <a>zip</a>ping the word with positions.
fullMatch :: Weight a b w => RegExp a -> [b] -> w

-- | Matches a regular expression against substrings of a word computing a
--   weight in an arbitrary semiring. Similar to <a>fullMatch</a> the
--   <a>Weight</a> class is used to associate weights to the symbols of the
--   regular expression.
partialMatch :: Weight a b w => RegExp a -> [b] -> w
instance IsString (RegExp Char)


-- | This module implements leftmost matching based on weighted regular
--   expressions. It should be imported qualified as the interface
--   resembles that provided by other matching modules.
module Text.RegExp.Matching.Leftmost

-- | Returns the leftmost of all non-empty matchings for a regular
--   expression in a given word. If the empty word is the only matching its
--   position is zero.
matching :: RegExp c -> [c] -> Maybe Matching

-- | A <a>Matching</a> records the leftmost start index of a matching
--   subword.
data Matching

-- | Start index of the matching subword in the queried word.
matchingIndex :: Matching -> Int

-- | Semiring used for leftmost matching.
data Leftmost
getLeftmost :: Leftmost -> Maybe Matching
instance Eq Matching
instance Show Matching


-- | This module implements longest matching based on weighted regular
--   expressions. It should be imported qualified as the interface
--   resembles that provided by other matching modules.
module Text.RegExp.Matching.Longest

-- | Returns the longest of all matchings for a regular expression in a
--   given word.
matching :: RegExp c -> [c] -> Maybe Matching

-- | A <a>Matching</a> records the largest length of a matching subword.
data Matching

-- | Length of the matching subword in the queried word.
matchingLength :: Matching -> Int

-- | Semiring used for longest matching.
data Longest
getLongest :: Longest -> Maybe Matching
instance Eq Matching
instance Show Matching


-- | This module implements leftmost longest matching based on weighted
--   regular expressions. It should be imported qualified as the interface
--   resembles that provided by other matching modules.
module Text.RegExp.Matching.LeftLong

-- | Returns the leftmost longest of all non-empty matchings for a regular
--   expression in a given word. If the empty word is the only matching its
--   position is zero.
matching :: RegExp c -> [c] -> Maybe Matching

-- | Subwords of words that match a regular expression are represented as
--   values of type <a>Matching</a>.
data Matching

-- | Start index of the matching subword in the queried word.
matchingIndex :: Matching -> Int

-- | Length of the matching subword.
matchingLength :: Matching -> Int

-- | Semiring used for leftmost longest matching.
--   
--   The <a>LeftLong</a> type satisfies the distributive laws only with a
--   precondition on all involved multiplications: multiplied matches must
--   be adjacent and the start position must be smaller than the end
--   position. This precondition is satisfied for all multiplications
--   during regular expression matching.
data LeftLong
getLeftLong :: LeftLong -> Maybe Matching
instance Eq Matching
instance Show Matching
