2002-04-24 10:50:23 +00:00
|
|
|
/*
|
|
|
|
*
|
|
|
|
* Copyright (c) 1998-2002
|
|
|
|
* Dr John Maddock
|
|
|
|
*
|
|
|
|
* Permission to use, copy, modify, distribute and sell this software
|
|
|
|
* and its documentation for any purpose is hereby granted without fee,
|
|
|
|
* provided that the above copyright notice appear in all copies and
|
|
|
|
* that both that copyright notice and this permission notice appear
|
|
|
|
* in supporting documentation. Dr John Maddock makes no representations
|
|
|
|
* about the suitability of this software for any purpose.
|
|
|
|
* It is provided "as is" without express or implied warranty.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* LOCATION: see http://www.boost.org for most recent version.
|
|
|
|
* FILE partial_regex_grep.cpp
|
|
|
|
* VERSION see <boost/version.hpp>
|
|
|
|
* DESCRIPTION: Search example using partial matches.
|
|
|
|
*/
|
2001-03-03 11:37:50 +00:00
|
|
|
|
|
|
|
#include <iostream>
|
|
|
|
#include <fstream>
|
|
|
|
#include <sstream>
|
|
|
|
#include <string>
|
|
|
|
#include <boost/regex.hpp>
|
|
|
|
|
|
|
|
// match some kind of html tag:
|
|
|
|
boost::regex e("<[^>]*>");
|
|
|
|
// count how many:
|
|
|
|
unsigned int tags = 0;
|
|
|
|
// saved position of partial match:
|
2001-12-16 11:33:27 +00:00
|
|
|
const char* next_pos = 0;
|
2001-03-03 11:37:50 +00:00
|
|
|
|
2001-12-16 11:33:27 +00:00
|
|
|
bool grep_callback(const boost::match_results<const char*>& m)
|
2001-03-03 11:37:50 +00:00
|
|
|
{
|
|
|
|
if(m[0].matched == false)
|
|
|
|
{
|
|
|
|
// save position and return:
|
|
|
|
next_pos = m[0].first;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
++tags;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void search(std::istream& is)
|
|
|
|
{
|
|
|
|
char buf[4096];
|
|
|
|
next_pos = buf + sizeof(buf);
|
|
|
|
bool have_more = true;
|
|
|
|
while(have_more)
|
|
|
|
{
|
|
|
|
// how much do we copy forward from last try:
|
|
|
|
unsigned leftover = (buf + sizeof(buf)) - next_pos;
|
|
|
|
// and how much is left to fill:
|
|
|
|
unsigned size = next_pos - buf;
|
|
|
|
// copy forward whatever we have left:
|
2001-04-28 10:12:52 +00:00
|
|
|
memmove(buf, next_pos, leftover);
|
2001-03-03 11:37:50 +00:00
|
|
|
// fill the rest from the stream:
|
2001-12-16 11:33:27 +00:00
|
|
|
is.read(buf + leftover, size);
|
|
|
|
unsigned read = is.gcount();
|
2001-03-03 11:37:50 +00:00
|
|
|
// check to see if we've run out of text:
|
|
|
|
have_more = read == size;
|
|
|
|
// reset next_pos:
|
|
|
|
next_pos = buf + sizeof(buf);
|
|
|
|
// and then grep:
|
2001-12-16 11:33:27 +00:00
|
|
|
boost::regex_grep<bool(*)(const boost::cmatch&), const char*>(grep_callback,
|
|
|
|
static_cast<const char*>(buf),
|
|
|
|
static_cast<const char*>(buf + read + leftover),
|
2001-03-03 11:37:50 +00:00
|
|
|
e,
|
|
|
|
boost::match_default | boost::match_partial);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int main(int argc, char* argv[])
|
|
|
|
{
|
|
|
|
if(argc > 1)
|
|
|
|
{
|
2003-05-19 11:56:17 +00:00
|
|
|
for(int i = 1; i < argc; ++i)
|
2001-03-03 11:37:50 +00:00
|
|
|
{
|
|
|
|
std::ifstream fs(argv[i]);
|
|
|
|
if(fs.bad()) continue;
|
|
|
|
search(fs);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
std::string one("<META NAME=\"keywords\" CONTENT=\"regex++, regular expressions, regular expression library, C++\">");
|
|
|
|
std::string what;
|
|
|
|
while(what.size() < 10000)
|
|
|
|
{
|
|
|
|
what.append(one);
|
|
|
|
what.append(13, ' ');
|
|
|
|
}
|
|
|
|
std::stringstream ss;
|
|
|
|
ss.str(what);
|
|
|
|
search(ss);
|
|
|
|
}
|
|
|
|
std::cout << "total tag count was " << tags << std::endl;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2001-04-28 10:12:52 +00:00
|
|
|
|
2002-04-24 10:50:23 +00:00
|
|
|
|