Fixed range-based versions of Boyer-Moore; Added range-based versions of Boyer-Moore-Horspool and Knuth-Pratt-Morris; Refs #7104; will close when merged to release. Thanks to Akira Takahashi for the bug report!

[SVN r79380]
This commit is contained in:
Marshall Clow
2012-07-09 14:27:00 +00:00
parent 681c5827d3
commit a0ee93ff89
5 changed files with 229 additions and 6 deletions

View File

@ -223,7 +223,7 @@ Requirements:
corpusIter boyer_moore_search (
corpusIter corpus_first, corpusIter corpus_last, const PatternRange &pattern )
{
typedef typename boost::range_iterator<PatternRange> pattern_iterator;
typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
boyer_moore<pattern_iterator> bm ( boost::begin(pattern), boost::end (pattern));
return bm ( corpus_first, corpus_last );
}
@ -242,7 +242,7 @@ Requirements:
typename boost::range_iterator<CorpusRange>::type
boyer_moore_search ( CorpusRange &corpus, const PatternRange &pattern )
{
typedef typename boost::range_iterator<PatternRange> pattern_iterator;
typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
boyer_moore<pattern_iterator> bm ( boost::begin(pattern), boost::end (pattern));
return bm (boost::begin (corpus), boost::end (corpus));
}

View File

@ -84,6 +84,11 @@ http://www-igm.univ-mlv.fr/%7Elecroq/string/node18.html
return this->do_search ( corpus_first, corpus_last );
}
template <typename Range>
typename boost::range_iterator<Range>::type operator () ( Range &r ) const {
return (*this) (boost::begin(r), boost::end(r));
}
private:
/// \cond DOXYGEN_HIDE
patIter pat_first, pat_last;
@ -119,6 +124,9 @@ http://www-igm.univ-mlv.fr/%7Elecroq/string/node18.html
// \endcond
};
/* Two ranges as inputs gives us four possibilities; with 2,3,3,4 parameters
Use a bit of TMP to disambiguate the 3-argument templates */
/// \fn boyer_moore_horspool_search ( corpusIter corpus_first, corpusIter corpus_last,
/// patIter pat_first, patIter pat_last )
/// \brief Searches the corpus for the pattern.
@ -131,11 +139,56 @@ http://www-igm.univ-mlv.fr/%7Elecroq/string/node18.html
template <typename patIter, typename corpusIter>
corpusIter boyer_moore_horspool_search (
corpusIter corpus_first, corpusIter corpus_last,
patIter pat_first, patIter pat_last ) {
patIter pat_first, patIter pat_last )
{
boyer_moore_horspool<patIter> bmh ( pat_first, pat_last );
return bmh ( corpus_first, corpus_last );
}
template <typename PatternRange, typename corpusIter>
corpusIter boyer_moore_horspool_search (
corpusIter corpus_first, corpusIter corpus_last, const PatternRange &pattern )
{
typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
boyer_moore_horspool<pattern_iterator> bmh ( boost::begin(pattern), boost::end (pattern));
return bmh ( corpus_first, corpus_last );
}
template <typename patIter, typename CorpusRange>
typename boost::lazy_disable_if_c<
boost::is_same<CorpusRange, patIter>::value, typename boost::range_iterator<CorpusRange> >
::type
boyer_moore_horspool_search ( CorpusRange &corpus, patIter pat_first, patIter pat_last )
{
boyer_moore_horspool<patIter> bmh ( pat_first, pat_last );
return bm (boost::begin (corpus), boost::end (corpus));
}
template <typename PatternRange, typename CorpusRange>
typename boost::range_iterator<CorpusRange>::type
boyer_moore_horspool_search ( CorpusRange &corpus, const PatternRange &pattern )
{
typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
boyer_moore_horspool<pattern_iterator> bmh ( boost::begin(pattern), boost::end (pattern));
return bmh (boost::begin (corpus), boost::end (corpus));
}
// Creator functions -- take a pattern range, return an object
template <typename Range>
boost::algorithm::boyer_moore_horspool<typename boost::range_iterator<const Range>::type>
make_boyer_moore_horspool ( const Range &r ) {
return boost::algorithm::boyer_moore_horspool
<typename boost::range_iterator<const Range>::type> (boost::begin(r), boost::end(r));
}
template <typename Range>
boost::algorithm::boyer_moore_horspool<typename boost::range_iterator<Range>::type>
make_boyer_moore_horspool ( Range &r ) {
return boost::algorithm::boyer_moore_horspool
<typename boost::range_iterator<Range>::type> (boost::begin(r), boost::end(r));
}
}}
#endif // BOOST_ALGORITHM_BOYER_MOORE_HORSPOOOL_SEARCH_HPP

View File

@ -79,6 +79,11 @@ namespace boost { namespace algorithm {
return do_search ( corpus_first, corpus_last, k_corpus_length );
}
template <typename Range>
typename boost::range_iterator<Range>::type operator () ( Range &r ) const {
return (*this) (boost::begin(r), boost::end(r));
}
private:
/// \cond DOXYGEN_HIDE
patIter pat_first, pat_last;
@ -179,6 +184,9 @@ namespace boost { namespace algorithm {
};
/* Two ranges as inputs gives us four possibilities; with 2,3,3,4 parameters
Use a bit of TMP to disambiguate the 3-argument templates */
/// \fn knuth_morris_pratt_search ( corpusIter corpus_first, corpusIter corpus_last,
/// patIter pat_first, patIter pat_last )
/// \brief Searches the corpus for the pattern.
@ -191,10 +199,55 @@ namespace boost { namespace algorithm {
template <typename patIter, typename corpusIter>
corpusIter knuth_morris_pratt_search (
corpusIter corpus_first, corpusIter corpus_last,
patIter pat_first, patIter pat_last ) {
patIter pat_first, patIter pat_last )
{
knuth_morris_pratt<patIter> kmp ( pat_first, pat_last );
return kmp ( corpus_first, corpus_last );
}
template <typename PatternRange, typename corpusIter>
corpusIter knuth_morris_pratt_search (
corpusIter corpus_first, corpusIter corpus_last, const PatternRange &pattern )
{
typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
knuth_morris_pratt<pattern_iterator> kmp ( boost::begin(pattern), boost::end (pattern));
return kmp ( corpus_first, corpus_last );
}
template <typename patIter, typename CorpusRange>
typename boost::lazy_disable_if_c<
boost::is_same<CorpusRange, patIter>::value, typename boost::range_iterator<CorpusRange> >
::type
knuth_morris_pratt_search ( CorpusRange &corpus, patIter pat_first, patIter pat_last )
{
knuth_morris_pratt<patIter> kmp ( pat_first, pat_last );
return kmp (boost::begin (corpus), boost::end (corpus));
}
template <typename PatternRange, typename CorpusRange>
typename boost::range_iterator<CorpusRange>::type
knuth_morris_pratt_search ( CorpusRange &corpus, const PatternRange &pattern )
{
typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
knuth_morris_pratt<pattern_iterator> kmp ( boost::begin(pattern), boost::end (pattern));
return kmp (boost::begin (corpus), boost::end (corpus));
}
// Creator functions -- take a pattern range, return an object
template <typename Range>
boost::algorithm::knuth_morris_pratt<typename boost::range_iterator<const Range>::type>
make_knuth_morris_pratt ( const Range &r ) {
return boost::algorithm::knuth_morris_pratt
<typename boost::range_iterator<const Range>::type> (boost::begin(r), boost::end(r));
}
template <typename Range>
boost::algorithm::knuth_morris_pratt<typename boost::range_iterator<Range>::type>
make_knuth_morris_pratt ( Range &r ) {
return boost::algorithm::knuth_morris_pratt
<typename boost::range_iterator<Range>::type> (boost::begin(r), boost::end(r));
}
}}
#endif // BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP

View File

@ -16,6 +16,7 @@ import testing ;
[ run search_test1.cpp : : : : search_test1 ]
[ run search_test2.cpp : : : : search_test2 ]
[ run search_test3.cpp : : : : search_test3 ]
[ run search_test4.cpp : : : : search_test4 ]
[ compile-fail search_fail1.cpp : : : : ]
[ compile-fail search_fail2.cpp : : : : ]
[ compile-fail search_fail3.cpp : : : : ]

116
test/search_test4.cpp Normal file
View File

@ -0,0 +1,116 @@
/*
Copyright (c) Marshall Clow 2010-2012.
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
For more information, see http://www.boost.org
Testing the range-based interfaces
*/
#include <boost/algorithm/searching/boyer_moore.hpp>
#include <boost/algorithm/searching/boyer_moore_horspool.hpp>
#include <boost/algorithm/searching/knuth_morris_pratt.hpp>
#include <boost/test/included/test_exec_monitor.hpp>
#include <iostream>
#include <algorithm>
#include <vector>
#include <string>
typedef std::vector<std::string> vec;
#define NUM_TRIES 100
#define runOne(call, refDiff) { \
res = boost::algorithm::call ( haystack, needle ); \
if ( res != exp ) { \
std::cout << "Expected " \
<< exp - haystack.begin () << " got " \
<< res - haystack.begin () << std::endl; \
throw std::runtime_error \
( "Unexpected result from " #call ); \
} \
}
#define runObject(obj, refDiff) { \
boost::algorithm::obj <vec::const_iterator> s_o = \
boost::algorithm::make_##obj ( needle ); \
res = s_o ( haystack ); \
if ( res != exp ) { \
std::cout << "Expected " \
<< exp - haystack.begin () << " got " \
<< res - haystack.begin () << std::endl; \
throw std::runtime_error \
( "Unexpected result from " #obj " object" ); \
} \
}
namespace {
vec ReadFromFile ( const char *name ) {
std::ifstream in ( name, std::ios_base::binary | std::ios_base::in );
std::string temp;
vec retVal;
while ( std::getline ( in, temp ))
retVal.push_back ( temp );
return retVal;
}
void check_one ( const vec &haystack, const vec &needle, int expected ) {
vec::const_iterator res;
vec::const_iterator exp; // the expected result
if ( expected >= 0 )
exp = haystack.begin () + expected;
else if ( expected == -1 )
exp = haystack.end (); // we didn't find it1
else if ( expected == -2 )
exp = std::search ( haystack.begin (), haystack.end (), needle.begin (), needle.end ());
else
throw std::logic_error ( "Expected must be -2, -1, or >= 0" );
std::cout << "Pattern is " << needle.size () << " entries long" << std::endl;
std::cout << "Corpus is " << haystack.size () << " entries long" << std::endl;
// First, the std library search
res = std::search ( haystack.begin (), haystack.end (), needle.begin (), needle.end ());
if ( res != exp ) {
std::cout << "Expected " << exp - haystack.begin () << " got " << res - haystack.begin () << std::endl;
throw std::runtime_error ( "Unexpected result from std::search" );
}
runOne ( boyer_moore_search, stdDiff );
runObject ( boyer_moore, stdDiff );
runOne ( boyer_moore_horspool_search, stdDiff );
runObject ( boyer_moore_horspool, stdDiff );
runOne ( knuth_morris_pratt_search, stdDiff );
runObject ( knuth_morris_pratt, stdDiff );
}
}
int test_main( int , char* [] )
{
vec c1 = ReadFromFile ( "search_test_data/0001.corpus" );
vec p1b = ReadFromFile ( "search_test_data/0002b.pat" );
vec p1e = ReadFromFile ( "search_test_data/0002e.pat" );
vec p1n = ReadFromFile ( "search_test_data/0002n.pat" );
vec p1f = ReadFromFile ( "search_test_data/0002f.pat" );
std::cout << std::ios::fixed << std::setprecision(4);
// std::cout << "Corpus is " << c1.size () << " entries long\n";
std::cout << "--- Beginning ---" << std::endl;
check_one ( c1, p1b, 0 ); // Find it at position zero
std::cout << "---- Middle -----" << std::endl;
check_one ( c1, p1f, -2 ); // Don't know answer
std::cout << "------ End ------" << std::endl;
check_one ( c1, p1e, c1.size() - p1e.size ());
std::cout << "--- Not found ---" << std::endl;
check_one ( c1, p1n, -1 ); // Not found
return 0;
}