Initial checkin of Boost.Algorithm searching and clamp code and tests; docs and more Algos coming

[SVN r76388]
This commit is contained in:
Marshall Clow
2012-01-09 17:21:04 +00:00
parent ba417e875a
commit d518994247
27 changed files with 45652 additions and 0 deletions

175
include/boost/algorithm/clamp.hpp Executable file
View File

@ -0,0 +1,175 @@
/*
Copyright (c) Marshall Clow 2008-2012.
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
Revision history:
27 June 2009 mtc First version
23 Oct 2010 mtc Added predicate version
*/
/// \file clamp.hpp
/// \brief Clamp algorithm
/// \author Marshall Clow
///
/// Suggested by olafvdspek in https://svn.boost.org/trac/boost/ticket/3215
#ifndef BOOST_ALGORITHM_CLAMP_HPP
#define BOOST_ALGORITHM_CLAMP_HPP
#include <functional> // For std::less
#include <iterator> // For std::iterator_traits
#include <cassert>
#include <boost/range/begin.hpp>
#include <boost/range/end.hpp>
#include <boost/mpl/identity.hpp> // for identity
#include <boost/utility/enable_if.hpp> // for boost::disable_if
namespace boost { namespace algorithm {
/// \fn clamp ( T const& val,
/// typename boost::mpl::identity<T>::type const& lo,
/// typename boost::mpl::identity<T>::type const& hi, Pred p )
/// \return the value "val" brought into the range [ lo, hi ]
/// using the comparison predicate p.
/// If p ( val, lo ) return lo.
/// If p ( hi, val ) return hi.
/// Otherwise, return the original value.
///
/// \param val The value to be clamped
/// \param lo The lower bound of the range to be clamped to
/// \param hi The upper bound of the range to be clamped to
/// \param p A predicate to use to compare the values.
/// p ( a, b ) returns a boolean.
///
template<typename T, typename Pred>
T const & clamp ( T const& val,
typename boost::mpl::identity<T>::type const & lo,
typename boost::mpl::identity<T>::type const & hi, Pred p )
{
// assert ( !p ( hi, lo )); // Can't assert p ( lo, hi ) b/c they might be equal
return p ( val, lo ) ? lo : p ( hi, val ) ? hi : val;
}
/// \fn clamp ( T const& val,
/// typename boost::mpl::identity<T>::type const& lo,
/// typename boost::mpl::identity<T>::type const& hi )
/// \return the value "val" brought into the range [ lo, hi ].
/// If the value is less than lo, return lo.
/// If the value is greater than "hi", return hi.
/// Otherwise, return the original value.
///
/// \param val The value to be clamped
/// \param lo The lower bound of the range to be clamped to
/// \param hi The upper bound of the range to be clamped to
///
template<typename T>
T const& clamp ( const T& val,
typename boost::mpl::identity<T>::type const & lo,
typename boost::mpl::identity<T>::type const & hi )
{
return (clamp) ( val, lo, hi, std::less<T>());
}
/// \fn clamp_range ( InputIterator first, InputIterator last, OutputIterator out,
/// std::iterator_traits<InputIterator>::value_type lo,
/// std::iterator_traits<InputIterator>::value_type hi )
/// \return clamp the sequence of values [first, last) into [ lo, hi ]
///
/// \param first The start of the range of values
/// \param last One past the end of the range of input values
/// \param out An output iterator to write the clamped values into
/// \param lo The lower bound of the range to be clamped to
/// \param hi The upper bound of the range to be clamped to
///
template<typename InputIterator, typename OutputIterator>
OutputIterator clamp_range ( InputIterator first, InputIterator last, OutputIterator out,
typename std::iterator_traits<InputIterator>::value_type lo,
typename std::iterator_traits<InputIterator>::value_type hi )
{
// this could also be written with bind and std::transform
while ( first != last )
*out++ = clamp ( *first++, lo, hi );
return out;
}
/// \fn clamp_range ( const Range &r, OutputIterator out,
/// typename std::iterator_traits<typename boost::range_iterator<const Range>::type>::value_type lo,
/// typename std::iterator_traits<typename boost::range_iterator<const Range>::type>::value_type hi )
/// \return clamp the sequence of values [first, last) into [ lo, hi ]
///
/// \param r The range of values to be clamped
/// \param out An output iterator to write the clamped values into
/// \param lo The lower bound of the range to be clamped to
/// \param hi The upper bound of the range to be clamped to
///
template<typename Range, typename OutputIterator>
typename boost::disable_if_c<boost::is_same<Range, OutputIterator>::value, OutputIterator>::type
clamp_range ( const Range &r, OutputIterator out,
typename std::iterator_traits<typename boost::range_iterator<const Range>::type>::value_type lo,
typename std::iterator_traits<typename boost::range_iterator<const Range>::type>::value_type hi )
{
return clamp_range ( boost::begin ( r ), boost::end ( r ), out, lo, hi );
}
/// \fn clamp_range ( InputIterator first, InputIterator last, OutputIterator out,
/// std::iterator_traits<InputIterator>::value_type lo,
/// std::iterator_traits<InputIterator>::value_type hi, Pred p )
/// \return clamp the sequence of values [first, last) into [ lo, hi ]
/// using the comparison predicate p.
///
/// \param first The start of the range of values
/// \param last One past the end of the range of input values
/// \param out An output iterator to write the clamped values into
/// \param lo The lower bound of the range to be clamped to
/// \param hi The upper bound of the range to be clamped to
/// \param p A predicate to use to compare the values.
/// p ( a, b ) returns a boolean.
///
template<typename InputIterator, typename OutputIterator, typename Pred>
OutputIterator clamp_range ( InputIterator first, InputIterator last, OutputIterator out,
typename std::iterator_traits<InputIterator>::value_type lo,
typename std::iterator_traits<InputIterator>::value_type hi, Pred p )
{
// this could also be written with bind and std::transform
while ( first != last )
*out++ = clamp ( *first++, lo, hi, p );
return out;
}
/// \fn clamp_range ( const Range &r, OutputIterator out,
/// typename std::iterator_traits<typename boost::range_iterator<const Range>::type>::value_type lo,
/// typename std::iterator_traits<typename boost::range_iterator<const Range>::type>::value_type hi,
/// Pred p )
/// \return clamp the sequence of values [first, last) into [ lo, hi ]
/// using the comparison predicate p.
///
/// \param r The range of values to be clamped
/// \param out An output iterator to write the clamped values into
/// \param lo The lower bound of the range to be clamped to
/// \param hi The upper bound of the range to be clamped to
/// \param p A predicate to use to compare the values.
/// p ( a, b ) returns a boolean.
//
// Disable this template if the first two parameters are the same type;
// In that case, the user will get the two iterator version.
template<typename Range, typename OutputIterator, typename Pred>
typename boost::disable_if_c<boost::is_same<Range, OutputIterator>::value, OutputIterator>::type
clamp_range ( const Range &r, OutputIterator out,
typename std::iterator_traits<typename boost::range_iterator<const Range>::type>::value_type lo,
typename std::iterator_traits<typename boost::range_iterator<const Range>::type>::value_type hi,
Pred p )
{
return clamp_range ( boost::begin ( r ), boost::end ( r ), out, lo, hi, p );
}
}}
#endif // BOOST_ALGORITHM_CLAMP_HPP

View File

@ -0,0 +1,268 @@
/*
Copyright (c) Marshall Clow 2010-2012.
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
For more information, see http://www.boost.org
*/
#ifndef BOOST_ALGORITHM_BOYER_MOORE_SEARCH_HPP
#define BOOST_ALGORITHM_BOYER_MOORE_SEARCH_HPP
#include <iterator> // for std::iterator_traits
#include <boost/assert.hpp>
#include <boost/static_assert.hpp>
#include <boost/range/begin.hpp>
#include <boost/range/end.hpp>
#include <boost/utility/enable_if.hpp>
#include <boost/type_traits/is_same.hpp>
#include <boost/algorithm/searching/detail/bm_traits.hpp>
#include <boost/algorithm/searching/detail/debugging.hpp>
namespace boost { namespace algorithm {
/*
A templated version of the boyer-moore searching algorithm.
References:
http://www.cs.utexas.edu/users/moore/best-ideas/string-searching/
http://www.cs.utexas.edu/~moore/publications/fstrpos.pdf
Explanations: boostinspect:noascii (test tool complains)
http://en.wikipedia.org/wiki/BoyerMoore_string_search_algorithm
http://www.movsd.com/bm.htm
http://www.cs.ucdavis.edu/~gusfield/cs224f09/bnotes.pdf
The Boyer-Moore search algorithm uses two tables, a "bad character" table
to tell how far to skip ahead when it hits a character that is not in the pattern,
and a "good character" table to tell how far to skip ahead when it hits a
mismatch on a character that _is_ in the pattern.
Requirements:
* Random access iterators
* The two iterator types (patIter and corpusIter) must
"point to" the same underlying type and be comparable.
* Additional requirements may be imposed but the skip table, such as:
** Numeric type (array-based skip table)
** Hashable type (map-based skip table)
*/
template <typename patIter, typename traits = detail::BM_traits<patIter> >
class boyer_moore {
typedef typename std::iterator_traits<patIter>::difference_type difference_type;
public:
boyer_moore ( patIter first, patIter last )
: pat_first ( first ), pat_last ( last ),
k_pattern_length ( std::distance ( pat_first, pat_last )),
skip_ ( k_pattern_length, -1 ),
suffix_ ( k_pattern_length + 1 )
{
this->build_skip_table ( first, last );
this->build_suffix_table ( first, last );
}
~boyer_moore () {}
/// \fn operator ( corpusIter corpus_first, corpusIter corpus_last )
/// \brief Searches the corpus for the pattern that was passed into the constructor
///
/// \param corpus_first The start of the data to search (Random Access Iterator)
/// \param corpus_last One past the end of the data to search
///
template <typename corpusIter>
corpusIter operator () ( corpusIter corpus_first, corpusIter corpus_last ) const {
BOOST_STATIC_ASSERT (( boost::is_same<
typename std::iterator_traits<patIter>::value_type,
typename std::iterator_traits<corpusIter>::value_type>::value ));
if ( corpus_first == corpus_last ) return corpus_last; // if nothing to search, we didn't find it!
if ( pat_first == pat_last ) return corpus_first; // empty pattern matches at start
const difference_type k_corpus_length = std::distance ( corpus_first, corpus_last );
// If the pattern is larger than the corpus, we can't find it!
if ( k_corpus_length < k_pattern_length )
return corpus_last;
// Do the search
return this->do_search ( corpus_first, corpus_last );
}
template <typename Range>
typename boost::range_iterator<Range>::type operator () ( Range &r ) const {
return (*this) (boost::begin(r), boost::end(r));
}
private:
/// \cond DOXYGEN_HIDE
patIter pat_first, pat_last;
const difference_type k_pattern_length;
typename traits::skip_table_t skip_;
std::vector <difference_type> suffix_;
/// \fn operator ( corpusIter corpus_first, corpusIter corpus_last, Pred p )
/// \brief Searches the corpus for the pattern that was passed into the constructor
///
/// \param corpus_first The start of the data to search (Random Access Iterator)
/// \param corpus_last One past the end of the data to search
/// \param p A predicate used for the search comparisons.
///
template <typename corpusIter>
corpusIter do_search ( corpusIter corpus_first, corpusIter corpus_last ) const {
/* ---- Do the matching ---- */
corpusIter curPos = corpus_first;
const corpusIter lastPos = corpus_last - k_pattern_length;
difference_type j, k, m;
while ( curPos <= lastPos ) {
/* while ( std::distance ( curPos, corpus_last ) >= k_pattern_length ) { */
// Do we match right where we are?
j = k_pattern_length;
while ( pat_first [j-1] == curPos [j-1] ) {
j--;
// We matched - we're done!
if ( j == 0 )
return curPos;
}
// Since we didn't match, figure out how far to skip forward
k = skip_ [ curPos [ j - 1 ]];
m = j - k - 1;
if ( k < j && m > suffix_ [ j ] )
curPos += m;
else
curPos += suffix_ [ j ];
}
return corpus_last; // We didn't find anything
}
void build_skip_table ( patIter first, patIter last ) {
for ( std::size_t i = 0; first != last; ++first, ++i )
skip_.insert ( *first, i );
}
template<typename Iter, typename Container>
void compute_bm_prefix ( Iter pat_first, Iter pat_last, Container &prefix ) {
const std::size_t count = std::distance ( pat_first, pat_last );
BOOST_ASSERT ( count > 0 );
BOOST_ASSERT ( prefix.size () == count );
prefix[0] = 0;
std::size_t k = 0;
for ( std::size_t i = 1; i < count; ++i ) {
BOOST_ASSERT ( k < count );
while ( k > 0 && ( pat_first[k] != pat_first[i] )) {
BOOST_ASSERT ( k < count );
k = prefix [ k - 1 ];
}
if ( pat_first[k] == pat_first[i] )
k++;
prefix [ i ] = k;
}
}
void build_suffix_table ( patIter pat_first, patIter pat_last ) {
const std::size_t count = (std::size_t) std::distance ( pat_first, pat_last );
if ( count > 0 ) { // empty pattern
std::vector<typename std::iterator_traits<patIter>::value_type> reversed(count);
(void) std::reverse_copy ( pat_first, pat_last, reversed.begin ());
std::vector<difference_type> prefix (count);
compute_bm_prefix ( pat_first, pat_last, prefix );
std::vector<difference_type> prefix_reversed (count);
compute_bm_prefix ( reversed.begin (), reversed.end (), prefix_reversed );
for ( std::size_t i = 0; i <= count; i++ )
suffix_[i] = count - prefix [count-1];
for ( std::size_t i = 0; i < count; i++ ) {
const std::size_t j = count - prefix_reversed[i];
const difference_type k = i - prefix_reversed[i] + 1;
if (suffix_[j] > k)
suffix_[j] = k;
}
}
}
/// \endcond
};
/* Two ranges as inputs gives us four possibilities; with 2,3,3,4 parameters
Use a bit of TMP to disambiguate the 3-argument templates */
/// \fn boyer_moore_search ( corpusIter corpus_first, corpusIter corpus_last,
/// patIter pat_first, patIter pat_last )
/// \brief Searches the corpus for the pattern.
///
/// \param corpus_first The start of the data to search (Random Access Iterator)
/// \param corpus_last One past the end of the data to search
/// \param pat_first The start of the pattern to search for (Random Access Iterator)
/// \param pat_last One past the end of the data to search for
///
template <typename patIter, typename corpusIter>
corpusIter boyer_moore_search (
corpusIter corpus_first, corpusIter corpus_last,
patIter pat_first, patIter pat_last )
{
boyer_moore<patIter> bm ( pat_first, pat_last );
return bm ( corpus_first, corpus_last );
}
template <typename PatternRange, typename corpusIter>
corpusIter boyer_moore_search (
corpusIter corpus_first, corpusIter corpus_last, const PatternRange &pattern )
{
typedef typename boost::range_iterator<PatternRange> pattern_iterator;
boyer_moore<pattern_iterator> bm ( boost::begin(pattern), boost::end (pattern));
return bm ( corpus_first, corpus_last );
}
template <typename patIter, typename CorpusRange>
typename boost::lazy_disable_if_c<
boost::is_same<CorpusRange, patIter>::value, typename boost::range_iterator<CorpusRange> >
::type
boyer_moore_search ( CorpusRange &corpus, patIter pat_first, patIter pat_last )
{
boyer_moore<patIter> bm ( pat_first, pat_last );
return bm (boost::begin (corpus), boost::end (corpus));
}
template <typename PatternRange, typename CorpusRange>
typename boost::range_iterator<CorpusRange>::type
boyer_moore_search ( CorpusRange &corpus, const PatternRange &pattern )
{
typedef typename boost::range_iterator<PatternRange> pattern_iterator;
boyer_moore<pattern_iterator> bm ( boost::begin(pattern), boost::end (pattern));
return bm (boost::begin (corpus), boost::end (corpus));
}
// Creator functions -- take a pattern range, return an object
template <typename Range>
boost::algorithm::boyer_moore<typename boost::range_iterator<const Range>::type>
make_boyer_moore ( const Range &r ) {
return boost::algorithm::boyer_moore
<typename boost::range_iterator<const Range>::type> (boost::begin(r), boost::end(r));
}
template <typename Range>
boost::algorithm::boyer_moore<typename boost::range_iterator<Range>::type>
make_boyer_moore ( Range &r ) {
return boost::algorithm::boyer_moore
<typename boost::range_iterator<Range>::type> (boost::begin(r), boost::end(r));
}
}}
#endif // BOOST_ALGORITHM_BOYER_MOORE_SEARCH_HPP

View File

@ -0,0 +1,141 @@
/*
Copyright (c) Marshall Clow 2010-2012.
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
For more information, see http://www.boost.org
*/
#ifndef BOOST_ALGORITHM_BOYER_MOORE_HORSPOOOL_SEARCH_HPP
#define BOOST_ALGORITHM_BOYER_MOORE_HORSPOOOL_SEARCH_HPP
#include <iterator> // for std::iterator_traits
#include <boost/assert.hpp>
#include <boost/static_assert.hpp>
#include <boost/type_traits/is_same.hpp>
#include <boost/algorithm/searching/detail/bm_traits.hpp>
#include <boost/algorithm/searching/detail/debugging.hpp>
// #define BOOST_ALGORITHM_BOYER_MOORE_HORSPOOL_DEBUG_HPP
namespace boost { namespace algorithm {
/*
A templated version of the boyer-moore-horspool searching algorithm.
Requirements:
* Random access iterators
* The two iterator types (patIter and corpusIter) must
"point to" the same underlying type.
* Additional requirements may be imposed buy the skip table, such as:
** Numeric type (array-based skip table)
** Hashable type (map-based skip table)
http://www-igm.univ-mlv.fr/%7Elecroq/string/node18.html
*/
template <typename patIter, typename traits = detail::BM_traits<patIter> >
class boyer_moore_horspool {
typedef typename std::iterator_traits<patIter>::difference_type difference_type;
public:
boyer_moore_horspool ( patIter first, patIter last )
: pat_first ( first ), pat_last ( last ),
k_pattern_length ( std::distance ( pat_first, pat_last )),
skip_ ( k_pattern_length, k_pattern_length ) {
// Build the skip table
std::size_t i = 0;
if ( first != last ) // empty pattern?
for ( patIter iter = first; iter != last-1; ++iter, ++i )
skip_.insert ( *iter, k_pattern_length - 1 - i );
#ifdef BOOST_ALGORITHM_BOYER_MOORE_HORSPOOL_DEBUG_HPP
skip_.PrintSkipTable ();
#endif
}
~boyer_moore_horspool () {}
/// \fn operator ( corpusIter corpus_first, corpusIter corpus_last, Pred p )
/// \brief Searches the corpus for the pattern that was passed into the constructor
///
/// \param corpus_first The start of the data to search (Random Access Iterator)
/// \param corpus_last One past the end of the data to search
/// \param p A predicate used for the search comparisons.
///
template <typename corpusIter>
corpusIter operator () ( corpusIter corpus_first, corpusIter corpus_last ) const {
BOOST_STATIC_ASSERT (( boost::is_same<
typename std::iterator_traits<patIter>::value_type,
typename std::iterator_traits<corpusIter>::value_type>::value ));
if ( corpus_first == corpus_last ) return corpus_last; // if nothing to search, we didn't find it!
if ( pat_first == pat_last ) return corpus_first; // empty pattern matches at start
const difference_type k_corpus_length = std::distance ( corpus_first, corpus_last );
// If the pattern is larger than the corpus, we can't find it!
if ( k_corpus_length < k_pattern_length )
return corpus_last;
// Do the search
return this->do_search ( corpus_first, corpus_last );
}
private:
/// \cond DOXYGEN_HIDE
patIter pat_first, pat_last;
const difference_type k_pattern_length;
typename traits::skip_table_t skip_;
/// \fn do_search ( corpusIter corpus_first, corpusIter corpus_last )
/// \brief Searches the corpus for the pattern that was passed into the constructor
///
/// \param corpus_first The start of the data to search (Random Access Iterator)
/// \param corpus_last One past the end of the data to search
/// \param k_corpus_length The length of the corpus to search
///
template <typename corpusIter>
corpusIter do_search ( corpusIter corpus_first, corpusIter corpus_last ) const {
corpusIter curPos = corpus_first;
const corpusIter lastPos = corpus_last - k_pattern_length;
while ( curPos <= lastPos ) {
// Do we match right where we are?
std::size_t j = k_pattern_length - 1;
while ( pat_first [j] == curPos [j] ) {
// We matched - we're done!
if ( j == 0 )
return curPos;
j--;
}
curPos += skip_ [ curPos [ k_pattern_length - 1 ]];
}
return corpus_last;
}
// \endcond
};
/// \fn boyer_moore_horspool_search ( corpusIter corpus_first, corpusIter corpus_last,
/// patIter pat_first, patIter pat_last )
/// \brief Searches the corpus for the pattern.
///
/// \param corpus_first The start of the data to search (Random Access Iterator)
/// \param corpus_last One past the end of the data to search
/// \param pat_first The start of the pattern to search for (Random Access Iterator)
/// \param pat_last One past the end of the data to search for
///
template <typename patIter, typename corpusIter>
corpusIter boyer_moore_horspool_search (
corpusIter corpus_first, corpusIter corpus_last,
patIter pat_first, patIter pat_last ) {
boyer_moore_horspool<patIter> bmh ( pat_first, pat_last );
return bmh ( corpus_first, corpus_last );
}
}}
#endif // BOOST_ALGORITHM_BOYER_MOORE_HORSPOOOL_SEARCH_HPP

View File

@ -0,0 +1,105 @@
/*
Copyright (c) Marshall Clow 2010-2012.
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
For more information, see http://www.boost.org
*/
#ifndef BOOST_ALGORITHM_SEARCH_DETAIL_BM_TRAITS_HPP
#define BOOST_ALGORITHM_SEARCH_DETAIL_BM_TRAITS_HPP
#include <climits> // for CHAR_BIT
#include <vector>
#include <iterator> // for std::iterator_traits
#include <boost/type_traits/make_unsigned.hpp>
#include <boost/type_traits/is_integral.hpp>
#include <boost/type_traits/remove_pointer.hpp>
#include <boost/type_traits/remove_const.hpp>
#include <boost/array.hpp>
#include <boost/tr1/tr1/unordered_map>
#include <boost/algorithm/searching/detail/debugging.hpp>
namespace boost { namespace algorithm { namespace detail {
//
// Default implementations of the skip tables for B-M and B-M-H
//
template<typename key_type, typename value_type, bool /*useArray*/> class skip_table;
// General case for data searching other than bytes; use a map
template<typename key_type, typename value_type>
class skip_table<key_type, value_type, false> {
private:
typedef std::tr1::unordered_map<key_type, value_type> skip_map;
const value_type k_default_value;
skip_map skip_;
public:
skip_table ( std::size_t patSize, value_type default_value )
: k_default_value ( default_value ), skip_ ( patSize ) {}
void insert ( key_type key, value_type val ) {
skip_ [ key ] = val; // Would skip_.insert (val) be better here?
}
value_type operator [] ( key_type key ) const {
typename skip_map::const_iterator it = skip_.find ( key );
return it == skip_.end () ? k_default_value : it->second;
}
void PrintSkipTable () const {
std::cout << "BM(H) Skip Table <unordered_map>:" << std::endl;
for ( typename skip_map::const_iterator it = skip_.begin (); it != skip_.end (); ++it )
if ( it->second != k_default_value )
std::cout << " " << it->first << ": " << it->second << std::endl;
std::cout << std::endl;
}
};
// Special case small numeric values; use an array
template<typename key_type, typename value_type>
class skip_table<key_type, value_type, true> {
private:
typedef typename boost::make_unsigned<key_type>::type unsigned_key_type;
typedef boost::array<value_type, 1U << (CHAR_BIT * sizeof(key_type))> skip_map;
skip_map skip_;
const value_type k_default_value;
public:
skip_table ( std::size_t patSize, value_type default_value ) : k_default_value ( default_value ) {
std::fill_n ( skip_.begin(), skip_.size(), default_value );
}
void insert ( key_type key, value_type val ) {
skip_ [ static_cast<unsigned_key_type> ( key ) ] = val;
}
value_type operator [] ( key_type key ) const {
return skip_ [ static_cast<unsigned_key_type> ( key ) ];
}
void PrintSkipTable () const {
std::cout << "BM(H) Skip Table <boost:array>:" << std::endl;
for ( typename skip_map::const_iterator it = skip_.begin (); it != skip_.end (); ++it )
if ( *it != k_default_value )
std::cout << " " << std::distance (skip_.begin (), it) << ": " << *it << std::endl;
std::cout << std::endl;
}
};
template<typename Iterator>
struct BM_traits {
typedef typename std::iterator_traits<Iterator>::difference_type value_type;
typedef typename std::iterator_traits<Iterator>::value_type key_type;
typedef boost::algorithm::detail::skip_table<key_type, value_type,
boost::is_integral<key_type>::value && (sizeof(key_type)==1)> skip_table_t;
};
}}} // namespaces
#endif // BOOST_ALGORITHM_SEARCH_DETAIL_BM_TRAITS_HPP

View File

@ -0,0 +1,30 @@
/*
Copyright (c) Marshall Clow 2010-2012.
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
For more information, see http://www.boost.org
*/
#ifndef BOOST_ALGORITHM_SEARCH_DETAIL_DEBUG_HPP
#define BOOST_ALGORITHM_SEARCH_DETAIL_DEBUG_HPP
#include <iostream>
/// \cond DOXYGEN_HIDE
namespace boost { namespace algorithm { namespace detail {
// Debugging support
template <typename Iter>
void PrintTable ( Iter first, Iter last ) {
std::cout << std::distance ( first, last ) << ": { ";
for ( Iter iter = first; iter != last; ++iter )
std::cout << *iter << " ";
std::cout << "}" << std::endl;
}
}}}
/// \endcond
#endif // BOOST_ALGORITHM_SEARCH_DETAIL_DEBUG_HPP

View File

@ -0,0 +1,200 @@
/*
Copyright (c) Marshall Clow 2010-2012.
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
For more information, see http://www.boost.org
*/
#ifndef BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP
#define BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP
#include <vector>
#include <iterator> // for std::iterator_traits
#include <boost/assert.hpp>
#include <boost/static_assert.hpp>
#include <boost/type_traits/is_same.hpp>
#include <boost/algorithm/searching/detail/debugging.hpp>
// #define BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_DEBUG
namespace boost { namespace algorithm {
// #define NEW_KMP
/*
A templated version of the Knuth-Morris-Pratt searching algorithm.
Requirements:
* Random-access iterators
* The two iterator types (I1 and I2) must "point to" the same underlying type.
http://en.wikipedia.org/wiki/KnuthMorrisPratt_algorithm
http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm
*/
template <typename patIter>
class knuth_morris_pratt {
typedef typename std::iterator_traits<patIter>::difference_type difference_type;
public:
knuth_morris_pratt ( patIter first, patIter last )
: pat_first ( first ), pat_last ( last ),
k_pattern_length ( std::distance ( pat_first, pat_last )),
skip_ ( k_pattern_length + 1 ) {
#ifdef NEW_KMP
preKmp ( pat_first, pat_last );
#else
init_skip_table ( pat_first, pat_last );
#endif
#ifdef BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_DEBUG
detail::PrintTable ( skip_.begin (), skip_.end ());
#endif
}
~knuth_morris_pratt () {}
/// \fn operator ( corpusIter corpus_first, corpusIter corpus_last, Pred p )
/// \brief Searches the corpus for the pattern that was passed into the constructor
///
/// \param corpus_first The start of the data to search (Random Access Iterator)
/// \param corpus_last One past the end of the data to search
/// \param p A predicate used for the search comparisons.
///
template <typename corpusIter>
corpusIter operator () ( corpusIter corpus_first, corpusIter corpus_last ) const {
BOOST_STATIC_ASSERT (( boost::is_same<
typename std::iterator_traits<patIter>::value_type,
typename std::iterator_traits<corpusIter>::value_type>::value ));
if ( corpus_first == corpus_last ) return corpus_last; // if nothing to search, we didn't find it!
if ( pat_first == pat_last ) return corpus_first; // empty pattern matches at start
const difference_type k_corpus_length = std::distance ( corpus_first, corpus_last );
// If the pattern is larger than the corpus, we can't find it!
if ( k_corpus_length < k_pattern_length )
return corpus_last;
return do_search ( corpus_first, corpus_last, k_corpus_length );
}
private:
/// \cond DOXYGEN_HIDE
patIter pat_first, pat_last;
const difference_type k_pattern_length;
std::vector <difference_type> skip_;
/// \fn operator ( corpusIter corpus_first, corpusIter corpus_last, Pred p )
/// \brief Searches the corpus for the pattern that was passed into the constructor
///
/// \param corpus_first The start of the data to search (Random Access Iterator)
/// \param corpus_last One past the end of the data to search
/// \param p A predicate used for the search comparisons.
///
template <typename corpusIter>
corpusIter do_search ( corpusIter corpus_first, corpusIter corpus_last,
difference_type k_corpus_length ) const {
difference_type match_start = 0; // position in the corpus that we're matching
#ifdef NEW_KMP
int patternIdx = 0;
while ( match_start < k_corpus_length ) {
while ( patternIdx > -1 && pat_first[patternIdx] != corpus_first [match_start] )
patternIdx = skip_ [patternIdx]; //<--- Shifting the pattern on mismatch
patternIdx++;
match_start++; //<--- corpus is always increased by 1
if ( patternIdx >= (int) k_pattern_length )
return corpus_first + match_start - patternIdx;
}
#else
// At this point, we know:
// k_pattern_length <= k_corpus_length
// for all elements of skip, it holds -1 .. k_pattern_length
//
// In the loop, we have the following invariants
// idx is in the range 0 .. k_pattern_length
// match_start is in the range 0 .. k_corpus_length - k_pattern_length + 1
const difference_type last_match = k_corpus_length - k_pattern_length;
difference_type idx = 0; // position in the pattern we're comparing
while ( match_start <= last_match ) {
while ( pat_first [ idx ] == corpus_first [ match_start + idx ] ) {
if ( ++idx == k_pattern_length )
return corpus_first + match_start;
}
// Figure out where to start searching again
// assert ( idx - skip_ [ idx ] > 0 ); // we're always moving forward
match_start += idx - skip_ [ idx ];
idx = skip_ [ idx ] >= 0 ? skip_ [ idx ] : 0;
// assert ( idx >= 0 && idx < k_pattern_length );
}
#endif
// We didn't find anything
return corpus_last;
}
void preKmp ( patIter first, patIter last ) {
const /*std::size_t*/ int count = std::distance ( first, last );
int i, j;
i = 0;
j = skip_[0] = -1;
while (i < count) {
while (j > -1 && first[i] != first[j])
j = skip_[j];
i++;
j++;
if (first[i] == first[j])
skip_[i] = skip_[j];
else
skip_[i] = j;
}
}
void init_skip_table ( patIter first, patIter last ) {
const difference_type count = std::distance ( first, last );
int j;
skip_ [ 0 ] = -1;
for ( int i = 1; i <= count; ++i ) {
j = skip_ [ i - 1 ];
while ( j >= 0 ) {
if ( first [ j ] == first [ i - 1 ] )
break;
j = skip_ [ j ];
}
skip_ [ i ] = j + 1;
}
}
// \endcond
};
/// \fn knuth_morris_pratt_search ( corpusIter corpus_first, corpusIter corpus_last,
/// patIter pat_first, patIter pat_last )
/// \brief Searches the corpus for the pattern.
///
/// \param corpus_first The start of the data to search (Random Access Iterator)
/// \param corpus_last One past the end of the data to search
/// \param pat_first The start of the pattern to search for (Random Access Iterator)
/// \param pat_last One past the end of the data to search for
///
template <typename patIter, typename corpusIter>
corpusIter knuth_morris_pratt_search (
corpusIter corpus_first, corpusIter corpus_last,
patIter pat_first, patIter pat_last ) {
knuth_morris_pratt<patIter> kmp ( pat_first, pat_last );
return kmp ( corpus_first, corpus_last );
}
}}
#endif // BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP