Added performance test files

[SVN r18134]
This commit is contained in:
John Maddock
2003-03-30 11:02:40 +00:00
parent 18213d9661
commit a65a747866
11 changed files with 2172 additions and 0 deletions

View File

@ -0,0 +1,285 @@
#include <iostream>
#include <iomanip>
#include <fstream>
#include <deque>
#include <sstream>
#include <stdexcept>
#include <iterator>
#include "regex_comparison.hpp"
//
// globals:
//
bool time_boost = false;
bool time_localised_boost = false;
bool time_greta = false;
bool time_safe_greta = false;
bool time_posix = false;
bool time_pcre = false;
bool test_matches = false;
bool test_code = false;
bool test_html = false;
bool test_short_twain = false;
bool test_long_twain = false;
std::string html_template_file;
std::string html_out_file;
std::string html_contents;
std::list<results> result_list;
int handle_argument(const std::string& what)
{
if(what == "-b")
time_boost = true;
else if(what == "-bl")
time_localised_boost = true;
#ifdef BOOST_HAS_GRETA
else if(what == "-g")
time_greta = true;
else if(what == "-gs")
time_safe_greta = true;
#endif
#ifdef BOOST_HAS_POSIX
else if(what == "-posix")
time_posix = true;
#endif
#ifdef BOOST_HAS_PCRE
else if(what == "-pcre")
time_pcre = true;
#endif
else if(what == "-all")
{
time_boost = true;
time_localised_boost = true;
#ifdef BOOST_HAS_GRETA
time_greta = true;
time_safe_greta = true;
#endif
#ifdef BOOST_HAS_POSIX
time_posix = true;
#endif
#ifdef BOOST_HAS_PCRE
time_pcre = true;
#endif
}
else if(what == "-test-matches")
test_matches = true;
else if(what == "-test-code")
test_code = true;
else if(what == "-test-html")
test_html = true;
else if(what == "-test-short-twain")
test_short_twain = true;
else if(what == "-test-long-twain")
test_long_twain = true;
else if(what == "-test-all")
{
test_matches = true;
test_code = true;
test_html = true;
test_short_twain = true;
test_long_twain = true;
}
else if((what == "-h") || (what == "--help"))
return show_usage();
else if((what[0] == '-') || (what[0] == '/'))
{
std::cerr << "Unknown argument: \"" << what << "\"" << std::endl;
return 1;
}
else if(html_template_file.size() == 0)
{
html_template_file = what;
load_file(html_contents, what.c_str());
}
else if(html_out_file.size() == 0)
html_out_file = what;
else
{
std::cerr << "Unexpected argument: \"" << what << "\"" << std::endl;
return 1;
}
return 0;
}
int show_usage()
{
std::cout <<
"Usage\n"
"regex_comparison [-h] [library options] [test options] [html_template html_output_file]\n"
" -h Show help\n\n"
" library options:\n"
" -b Apply tests to boost library\n"
" -bl Apply tests to boost library with C++ locale\n"
#ifdef BOOST_HAS_GRETA
" -g Apply tests to GRETA library\n"
" -gs Apply tests to GRETA library (in non-recursive mode)\n"
#endif
#ifdef BOOST_HAS_POSIX
" -posix Apply tests to POSIX library\n"
#endif
#ifdef BOOST_HAS_PCRE
" -pcre Apply tests to PCRE library\n"
#endif
" -all Apply tests to all libraries\n\n"
" test options:\n"
" -test-matches Test short matches\n"
" -test-code Test c++ code examples\n"
" -test-html Test c++ code examples\n"
" -test-short-twain Test short searches\n"
" -test-long-twain Test long searches\n"
" -test-all Test everthing\n";
return 1;
}
void load_file(std::string& text, const char* file)
{
std::deque<char> temp_copy;
std::ifstream is(file);
if(!is.good())
{
std::string msg("Unable to open file: \"");
msg.append(file);
msg.append("\"");
throw std::runtime_error(msg);
}
is.seekg(0, std::ios_base::end);
std::istream::pos_type pos = is.tellg();
is.seekg(0, std::ios_base::beg);
text.erase();
text.reserve(pos);
std::istreambuf_iterator<char> it(is);
std::copy(it, std::istreambuf_iterator<char>(), std::back_inserter(text));
}
void print_result(std::ostream& os, double time, double best)
{
static const char* suffixes[] = {"s", "ms", "us", "ns", "ps", };
if(time < 0)
{
os << "<td>NA</td>";
return;
}
double rel = time / best;
bool highlight = ((rel > 0) && (rel < 1.1));
unsigned suffix = 0;
while(time < 0)
{
time *= 1000;
++suffix;
}
os << "<td>";
if(highlight)
os << "<font color=\"#008000\">";
if(rel <= 1000)
os << std::setprecision(3) << rel;
else
os << (int)rel;
os << "<BR>(";
if(time <= 1000)
os << std::setprecision(3) << time;
else
os << (int)time;
os << suffixes[suffix] << ")";
if(highlight)
os << "</font>";
os << "</td>";
}
void output_html_results(bool show_description, const std::string& tagname)
{
std::stringstream os;
if(result_list.size())
{
//
// start by outputting the table header:
//
os << "<table border=\"1\" cellspacing=\"1\">\n";
os << "<tr><td><strong>Expression</strong></td>";
if(show_description)
os << "<td><strong>Text</strong></td>";
#if defined(BOOST_HAS_GRETA)
if(time_greta == true)
os << "<td><strong>GRETA</strong></td>";
if(time_safe_greta == true)
os << "<td><strong>GRETA<BR>(non-recursive mode)</strong></td>";
#endif
if(time_boost == true)
os << "<td><strong>Boost</strong></td>";
if(time_localised_boost == true)
os << "<td><strong>Boost + C++ locale</strong></td>";
#if defined(BOOST_HAS_POSIX)
if(time_posix == true)
os << "<td><strong>POSIX</strong></td>";
#endif
#ifdef BOOST_HAS_PCRE
if(time_pcre == true)
os << "<td><strong>PCRE</strong></td>";
#endif
os << "</tr>\n";
//
// Now enumerate through all the test results:
//
std::list<results>::const_iterator first, last;
first = result_list.begin();
last = result_list.end();
while(first != last)
{
os << "<tr><td><code>" << first->expression << "</code></td>";
if(show_description)
os << "<td>" << first->description << "</td>";
#if defined(BOOST_HAS_GRETA)
if(time_greta == true)
print_result(os, first->greta_time, first->factor);
if(time_safe_greta == true)
print_result(os, first->safe_greta_time, first->factor);
#endif
#if defined(BOOST_HAS_POSIX)
if(time_boost == true)
print_result(os, first->boost_time, first->factor);
if(time_localised_boost == true)
print_result(os, first->localised_boost_time, first->factor);
#endif
if(time_posix == true)
print_result(os, first->posix_time, first->factor);
#if defined(BOOST_HAS_PCRE)
if(time_pcre == true)
print_result(os, first->pcre_time, first->factor);
#endif
os << "</tr>\n";
++first;
}
os << "</table>\n";
result_list.clear();
}
else
{
os << "<P><I>Results not available...</I></P>\n";
}
std::string result = os.str();
unsigned int pos = html_contents.find(tagname);
if(pos != std::string::npos)
{
html_contents.replace(pos, tagname.size(), result);
}
}
void output_final_html()
{
if(html_out_file.size())
{
std::ofstream os(html_out_file.c_str());
os << html_contents;
}
else
{
std::cout << html_contents;
}
}

59
performance/input.html Normal file
View File

@ -0,0 +1,59 @@
<html>
<head>
<title>Regular Expression Performance Comparison</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="vs_targetSchema" content="http://schemas.microsoft.com/intellisense/ie5">
<meta name="Template" content="C:\PROGRAM FILES\MICROSOFT OFFICE\OFFICE\html.dot">
<meta name="GENERATOR" content="Microsoft FrontPage Express 2.0">
</head>
<body bgcolor="#ffffff" link="#0000ff" vlink="#800080">
<h2>Regular Expression Performance Comparison</h2>
<p>The Boost and GRETA regular expression libraries have slightly different
interfaces, and it has been suggested that GRETA's interface allows for a more
efficient implementation. The following tables provide comparisons between:</p>
<p><a href="http://research.microsoft.com/projects/greta">GRETA</a>.</p>
<p><a href="http://www.boost.org/">The Boost regex library</a>.</p>
<p><a href="http://arglist.com/regex/">Henry Spencer's regular expression library</a>
- this is provided for comparison as a typical non-backtracking implementation.</p>
<p>
Times were obtained on a 2.8GHz Pentium&nbsp;4 PC running Windows XP, and the
code was compiled with Visual C++ 7.1 with all optimisations turned on. As ever
care should be taken in interpreting the results, only sensible regular
expressions (rather than pathological cases) are given, most are taken from the
Boost regex examples, or from the <a href="http://www.regxlib.com/">Library of
Regular Expressions</a>. In addition, some variation in the relative
performance of these libraries can be expected on other machines - as memory
access and processor caching effects can be quite large for most finite state
machine algorithms.</p>
<h3>Comparison 1: Long Search</h3>
<p>For each of the following regular expressions the time taken to find all
occurrences of the expression within a long English language text was measured
(<a href="ftp://ibiblio.org/pub/docs/books/gutenberg/etext02/mtent12.zip">mtent12.txt</a>
from <a href="http://promo.net/pg/">Project Gutenberg</a>, 19Mb).&nbsp;</p>
<P>%long_twain_search%</P>
<h3>Comparison 2: Medium Sized Search</h3>
<p>For each of the following regular expressions the time taken to find all
occurrences of the expression within a medium sized English language text was
measured (the first 50K from mtent12.txt).&nbsp;</p>
<P>%short_twain_search%</P>
<H3>Comparison 3:&nbsp;C++ Code&nbsp;Search</H3>
<P>For each of the following regular expressions the time taken to find all
occurrences of the expression within the C++ source file <A href="../../../boost/crc.hpp">
boost/crc.hpp</A>&nbsp;was measured.&nbsp;</P>
<P>%code_search%</P>
<H3>
<H3>Comparison 4: HTML Document Search</H3>
</H3>
<P>For each of the following regular expressions the time taken to find all
occurrences of the expression within the html file <A href="../../libraries.htm">libs/libraries.htm</A>
was measured.&nbsp;</P>
<P>%html_search%</P>
<H3>Comparison 3: Simple Matches</H3>
<p>
For each of the following regular expressions the time taken to match against
the text indicated was measured.&nbsp;</p>
<P>%short_matches%</P>
<hr>
<p>Copyright John Maddock April 2003, all rights reserved.</p>
</body>
</html>

251
performance/main.cpp Normal file
View File

@ -0,0 +1,251 @@
/*
*
* Copyright (c) 2002
* Dr John Maddock
*
* Permission to use, copy, modify, distribute and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appear in all copies and
* that both that copyright notice and this permission notice appear
* in supporting documentation. Dr John Maddock makes no representations
* about the suitability of this software for any purpose.
* It is provided "as is" without express or implied warranty.
*
*/
#include <iostream>
#include <fstream>
#include <iterator>
#include <cassert>
#include <boost/test/execution_monitor.hpp>
#include "regex_comparison.hpp"
void test_match(const std::string& re, const std::string& text, const std::string& description, bool icase)
{
double time;
results r(re, description);
std::cout << "Testing: \"" << re << "\" against \"" << description << "\"" << std::endl;
#ifdef BOOST_HAS_GRETA
if(time_greta == true)
{
time = g::time_match(re, text, icase);
r.greta_time = time;
std::cout << "\tGRETA regex: " << time << "s\n";
}
if(time_safe_greta == true)
{
time = gs::time_match(re, text, icase);
r.safe_greta_time = time;
std::cout << "\tSafe GRETA regex: " << time << "s\n";
}
#endif
if(time_boost == true)
{
time = b::time_match(re, text, icase);
r.boost_time = time;
std::cout << "\tBoost regex: " << time << "s\n";
}
if(time_localised_boost == true)
{
time = bl::time_match(re, text, icase);
r.localised_boost_time = time;
std::cout << "\tBoost regex (C++ locale): " << time << "s\n";
}
#ifdef BOOST_HAS_POSIX
if(time_posix == true)
{
time = posix::time_match(re, text, icase);
r.posix_time = time;
std::cout << "\tPOSIX regex: " << time << "s\n";
}
#endif
#ifdef BOOST_HAS_PCRE
if(time_pcre == true)
{
time = pcr::time_match(re, text, icase);
r.pcre_time = time;
std::cout << "\tPCRE regex: " << time << "s\n";
}
#endif
r.finalise();
result_list.push_back(r);
}
void test_find_all(const std::string& re, const std::string& text, const std::string& description, bool icase)
{
std::cout << "Testing: " << re << std::endl;
double time;
results r(re, description);
#ifdef BOOST_HAS_GRETA
if(time_greta == true)
{
time = g::time_find_all(re, text, icase);
r.greta_time = time;
std::cout << "\tGRETA regex: " << time << "s\n";
}
if(time_safe_greta == true)
{
time = gs::time_find_all(re, text, icase);
r.safe_greta_time = time;
std::cout << "\tSafe GRETA regex: " << time << "s\n";
}
#endif
if(time_boost == true)
{
time = b::time_find_all(re, text, icase);
r.boost_time = time;
std::cout << "\tBoost regex: " << time << "s\n";
}
if(time_localised_boost == true)
{
time = bl::time_find_all(re, text, icase);
r.localised_boost_time = time;
std::cout << "\tBoost regex (C++ locale): " << time << "s\n";
}
#ifdef BOOST_HAS_POSIX
if(time_posix == true)
{
time = posix::time_find_all(re, text, icase);
r.posix_time = time;
std::cout << "\tPOSIX regex: " << time << "s\n";
}
#endif
#ifdef BOOST_HAS_PCRE
if(time_pcre == true)
{
time = pcr::time_find_all(re, text, icase);
r.pcre_time = time;
std::cout << "\tPCRE regex: " << time << "s\n";
}
#endif
r.finalise();
result_list.push_back(r);
}
int cpp_main(int argc, char * argv[])
{
// start by processing the command line args:
if(argc < 2)
return show_usage();
int result = 0;
for(int c = 1; c < argc; ++c)
{
result += handle_argument(argv[c]);
}
if(result)
return result;
if(test_matches)
{
// start with a simple test, this is basically a measure of the minimal overhead
// involved in calling a regex matcher:
test_match("abc", "abc");
// these are from the regex docs:
test_match("^([0-9]+)(\\-| |$)(.*)$", "100- this is a line of ftp response which contains a message string");
test_match("([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4}", "1234-5678-1234-456");
// these are from http://www.regxlib.com/
test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "john_maddock@compuserve.com");
test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "foo12@foo.edu");
test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "bob.smith@foo.tv");
test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "EH10 2QQ");
test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "G1 1AA");
test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "SW1 1ZZ");
test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "4/1/2001");
test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "12/12/2001");
test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "123");
test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "+3.14159");
test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "-3.14159");
}
output_html_results(true, "%short_matches%");
std::string file_contents;
if(test_code)
{
load_file(file_contents, "../../../boost/crc.hpp");
const char* highlight_expression = // preprocessor directives: index 1
"(^[ \t]*#(?:[^\\\\\\n]|\\\\[^\\n_[:punct:][:alnum:]]*[\\n[:punct:][:word:]])*)|"
// comment: index 2
"(//[^\\n]*|/\\*.*?\\*/)|"
// literals: index 3
"\\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\\>|"
// string literals: index 4
"('(?:[^\\\\']|\\\\.)*'|\"(?:[^\\\\\"]|\\\\.)*\")|"
// keywords: index 5
"\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import"
"|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall"
"|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool"
"|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete"
"|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto"
"|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected"
"|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast"
"|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned"
"|using|virtual|void|volatile|wchar_t|while)\\>"
;
const char* class_expression = "^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
"(class|struct)[[:space:]]*(\\<\\w+\\>([ \t]*\\([^)]*\\))?"
"[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?"
"(\\{|:[^;\\{()]*\\{)";
const char* include_expression = "^[ \t]*#[ \t]*include[ \t]+(\"[^\"]+\"|<[^>]+>)";
const char* boost_include_expression = "^[ \t]*#[ \t]*include[ \t]+(\"boost/[^\"]+\"|<boost/[^>]+>)";
test_find_all(class_expression, file_contents);
test_find_all(highlight_expression, file_contents);
test_find_all(include_expression, file_contents);
test_find_all(boost_include_expression, file_contents);
}
output_html_results(false, "%code_search%");
if(test_html)
{
load_file(file_contents, "../../../libs/libraries.htm");
test_find_all("beman|john|dave", file_contents, true);
test_find_all("<p>.*?</p>", file_contents, true);
test_find_all("<a[^>]+href=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents, true);
test_find_all("<h[12345678][^>]*>.*?</h[12345678]>", file_contents, true);
test_find_all("<img[^>]+src=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents, true);
test_find_all("<font[^>]+face=(\"[^\"]*\"|[^[:space:]]+)[^>]*>.*?</font>", file_contents, true);
}
output_html_results(false, "%html_search%");
if(test_short_twain)
{
load_file(file_contents, "short_twain.txt");
test_find_all("Twain", file_contents);
test_find_all("Huck[[:alpha:]]+", file_contents);
test_find_all("[[:alpha:]]+ing", file_contents);
test_find_all("^[^\n]*?Twain", file_contents);
test_find_all("Tom|Sawyer|Huckleberry|Finn", file_contents);
test_find_all("(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)", file_contents);
}
output_html_results(false, "%short_twain_search%");
if(test_long_twain)
{
load_file(file_contents, "mtent12.txt");
test_find_all("Twain", file_contents);
test_find_all("Huck[[:alpha:]]+", file_contents);
test_find_all("[[:alpha:]]+ing", file_contents);
test_find_all("^[^\n]*?Twain", file_contents);
test_find_all("Tom|Sawyer|Huckleberry|Finn", file_contents);
time_posix = false;
test_find_all("(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)", file_contents);
time_posix = true;
}
output_html_results(false, "%long_twain_search%");
output_final_html();
return 0;
}

View File

@ -0,0 +1,135 @@
/*
*
* Copyright (c) 2002
* Dr John Maddock
*
* All rights reserved.
* May not be transfered or disclosed to a third party without
* prior consent of the author.
*
*/
#ifndef REGEX_COMPARISON_HPP
#define REGEX_COMPARISON_HPP
#include <string>
#include <list>
#include <boost/limits.hpp>
//
// globals:
//
extern bool time_boost;
extern bool time_localised_boost;
extern bool time_greta;
extern bool time_safe_greta;
extern bool time_posix;
extern bool time_pcre;
extern bool test_matches;
extern bool test_short_twain;
extern bool test_long_twain;
extern bool test_code;
extern bool test_html;
extern std::string html_template_file;
extern std::string html_out_file;
extern std::string html_contents;
int handle_argument(const std::string& what);
int show_usage();
void load_file(std::string& text, const char* file);
void output_html_results(bool show_description, const std::string& tagname);
void output_final_html();
struct results
{
double boost_time;
double localised_boost_time;
double greta_time;
double safe_greta_time;
double posix_time;
double pcre_time;
double factor;
std::string expression;
std::string description;
results(const std::string& ex, const std::string& desc)
: boost_time(-1),
greta_time(-1),
safe_greta_time(-1),
posix_time(-1),
pcre_time(-1),
factor(std::numeric_limits<double>::max()),
expression(ex),
description(desc)
{}
void finalise()
{
if((boost_time >= 0) && (boost_time < factor))
factor = boost_time;
if((greta_time >= 0) && (greta_time < factor))
factor = greta_time;
if((safe_greta_time >= 0) && (safe_greta_time < factor))
factor = safe_greta_time;
if((posix_time >= 0) && (posix_time < factor))
factor = posix_time;
if((pcre_time >= 0) && (pcre_time < factor))
factor = pcre_time;
if((factor >= 0) && (factor < factor))
factor = factor;
}
};
extern std::list<results> result_list;
namespace b {
// boost tests:
double time_match(const std::string& re, const std::string& text, bool icase);
double time_find_all(const std::string& re, const std::string& text, bool icase);
}
namespace bl {
// localised boost tests:
double time_match(const std::string& re, const std::string& text, bool icase);
double time_find_all(const std::string& re, const std::string& text, bool icase);
}
namespace pcr {
// pcre tests:
double time_match(const std::string& re, const std::string& text, bool icase);
double time_find_all(const std::string& re, const std::string& text, bool icase);
}
namespace g {
// greta tests:
double time_match(const std::string& re, const std::string& text, bool icase);
double time_find_all(const std::string& re, const std::string& text, bool icase);
}
namespace gs {
// safe greta tests:
double time_match(const std::string& re, const std::string& text, bool icase);
double time_find_all(const std::string& re, const std::string& text, bool icase);
}
namespace posix {
// safe greta tests:
double time_match(const std::string& re, const std::string& text, bool icase);
double time_find_all(const std::string& re, const std::string& text, bool icase);
}
void test_match(const std::string& re, const std::string& text, const std::string& description, bool icase = false);
void test_find_all(const std::string& re, const std::string& text, const std::string& description, bool icase = false);
inline void test_match(const std::string& re, const std::string& text, bool icase = false)
{ test_match(re, text, text, icase); }
inline void test_find_all(const std::string& re, const std::string& text, bool icase = false)
{ test_find_all(re, text, "", icase); }
#define REPEAT_COUNT 10
#endif

View File

@ -0,0 +1,98 @@
/*
*
* Copyright (c) 2002
* Dr John Maddock
*
* Permission to use, copy, modify, distribute and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appear in all copies and
* that both that copyright notice and this permission notice appear
* in supporting documentation. Dr John Maddock makes no representations
* about the suitability of this software for any purpose.
* It is provided "as is" without express or implied warranty.
*
*/
#include "regex_comparison.hpp"
#include <boost/timer.hpp>
#include <boost/regex.hpp>
namespace b{
double time_match(const std::string& re, const std::string& text, bool icase)
{
boost::regex e(re, (icase ? boost::regbase::perl | boost::regbase::icase : boost::regbase::perl));
boost::smatch what;
boost::timer tim;
int iter = 1;
int counter, repeats;
double result = 0;
double run;
do
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
boost::regex_match(text, what, e);
}
result = tim.elapsed();
iter *= 2;
}while(result < 0.5);
iter /= 2;
// repeat test and report least value for consistency:
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
boost::regex_match(text, what, e);
}
run = tim.elapsed();
result = std::min(run, result);
}
return result / iter;
}
bool dummy_grep_proc(const boost::smatch&)
{ return true; }
double time_find_all(const std::string& re, const std::string& text, bool icase)
{
boost::regex e(re, (icase ? boost::regbase::perl | boost::regbase::icase : boost::regbase::perl));
boost::smatch what;
boost::timer tim;
int iter = 1;
int counter, repeats;
double result = 0;
double run;
do
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
boost::regex_grep(&dummy_grep_proc, text, e);
}
result = tim.elapsed();
iter *= 2;
}while(result < 0.5);
iter /= 2;
if(result >10)
return result / iter;
// repeat test and report least value for consistency:
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
boost::regex_grep(&dummy_grep_proc, text, e);
}
run = tim.elapsed();
result = std::min(run, result);
}
return result / iter;
}
}

125
performance/time_greta.cpp Normal file
View File

@ -0,0 +1,125 @@
/*
*
* Copyright (c) 2002
* Dr John Maddock
*
* Permission to use, copy, modify, distribute and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appear in all copies and
* that both that copyright notice and this permission notice appear
* in supporting documentation. Dr John Maddock makes no representations
* about the suitability of this software for any purpose.
* It is provided "as is" without express or implied warranty.
*
*/
#include "regex_comparison.hpp"
#if defined(BOOST_HAS_GRETA)
#include <cassert>
#include <boost/timer.hpp>
#include "regexpr2.h"
namespace g{
double time_match(const std::string& re, const std::string& text, bool icase)
{
regex::rpattern e(re, (icase ? regex::MULTILINE | regex::NORMALIZE | regex::NOCASE : regex::MULTILINE | regex::NORMALIZE));
regex::match_results what;
boost::timer tim;
int iter = 1;
int counter, repeats;
double result = 0;
double run;
assert(e.match(text, what));
do
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
e.match(text, what);
}
result = tim.elapsed();
iter *= 2;
}while(result < 0.5);
iter /= 2;
// repeat test and report least value for consistency:
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
e.match(text, what);
}
run = tim.elapsed();
result = std::min(run, result);
}
return result / iter;
}
double time_find_all(const std::string& re, const std::string& text, bool icase)
{
regex::rpattern e(re, (icase ? regex::MULTILINE | regex::NORMALIZE | regex::NOCASE : regex::MULTILINE | regex::NORMALIZE));
regex::match_results what;
boost::timer tim;
int iter = 1;
int counter, repeats;
double result = 0;
double run;
do
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
e.match(text.begin(), text.end(), what);
while(what.backref(0).matched)
{
e.match(what.backref(0).end(), text.end(), what);
}
}
result = tim.elapsed();
iter *= 2;
}while(result < 0.5);
iter /= 2;
if(result > 10)
return result / iter;
// repeat test and report least value for consistency:
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
e.match(text.begin(), text.end(), what);
while(what.backref(0).matched)
{
e.match(what.backref(0).end(), text.end(), what);
}
}
run = tim.elapsed();
result = std::min(run, result);
}
return result / iter;
}
}
#else
namespace g {
double time_match(const std::string& re, const std::string& text, bool icase)
{
return -1;
}
double time_find_all(const std::string& re, const std::string& text, bool icase)
{
return -1;
}
}
#endif

View File

@ -0,0 +1,98 @@
/*
*
* Copyright (c) 2002
* Dr John Maddock
*
* Permission to use, copy, modify, distribute and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appear in all copies and
* that both that copyright notice and this permission notice appear
* in supporting documentation. Dr John Maddock makes no representations
* about the suitability of this software for any purpose.
* It is provided "as is" without express or implied warranty.
*
*/
#include "regex_comparison.hpp"
#include <boost/timer.hpp>
#include <boost/regex.hpp>
namespace bl{
double time_match(const std::string& re, const std::string& text, bool icase)
{
boost::reg_expression<char, boost::cpp_regex_traits<char> > e(re, (icase ? boost::regbase::perl | boost::regbase::icase : boost::regbase::perl));
boost::smatch what;
boost::timer tim;
int iter = 1;
int counter, repeats;
double result = 0;
double run;
do
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
boost::regex_match(text, what, e);
}
result = tim.elapsed();
iter *= 2;
}while(result < 0.5);
iter /= 2;
// repeat test and report least value for consistency:
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
boost::regex_match(text, what, e);
}
run = tim.elapsed();
result = std::min(run, result);
}
return result / iter;
}
bool dummy_grep_proc(const boost::smatch&)
{ return true; }
double time_find_all(const std::string& re, const std::string& text, bool icase)
{
boost::reg_expression<char, boost::cpp_regex_traits<char> > e(re, (icase ? boost::regbase::perl | boost::regbase::icase : boost::regbase::perl));
boost::smatch what;
boost::timer tim;
int iter = 1;
int counter, repeats;
double result = 0;
double run;
do
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
boost::regex_grep(&dummy_grep_proc, text, e);
}
result = tim.elapsed();
iter *= 2;
}while(result < 0.5);
iter /= 2;
if(result >10)
return result / iter;
// repeat test and report least value for consistency:
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
boost::regex_grep(&dummy_grep_proc, text, e);
}
run = tim.elapsed();
result = std::min(run, result);
}
return result / iter;
}
}

180
performance/time_pcre.cpp Normal file
View File

@ -0,0 +1,180 @@
/*
*
* Copyright (c) 2002
* Dr John Maddock
*
* Permission to use, copy, modify, distribute and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appear in all copies and
* that both that copyright notice and this permission notice appear
* in supporting documentation. Dr John Maddock makes no representations
* about the suitability of this software for any purpose.
* It is provided "as is" without express or implied warranty.
*
*/
#include <cassert>
#include <cfloat>
#include "regex_comparison.hpp"
#ifdef BOOST_HAS_PCRE
#include "pcre.h"
#include <boost/timer.hpp>
namespace pcr{
double time_match(const std::string& re, const std::string& text, bool icase)
{
pcre *ppcre;
const char *error;
int erroffset;
int what[50];
boost::timer tim;
int iter = 1;
int counter, repeats;
double result = 0;
double run;
if(0 == (ppcre = pcre_compile(re.c_str(), (icase ? PCRE_CASELESS | PCRE_ANCHORED | PCRE_DOTALL | PCRE_MULTILINE : PCRE_ANCHORED | PCRE_DOTALL | PCRE_MULTILINE),
&error, &erroffset, NULL)))
{
free(ppcre);
return -1;
}
pcre_extra *pe;
pe = pcre_study(ppcre, 0, &error);
if(error)
{
free(ppcre);
free(pe);
return -1;
}
do
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
erroffset = pcre_exec(ppcre, pe, text.c_str(), text.size(), 0, 0, what, sizeof(what));
}
result = tim.elapsed();
iter *= 2;
}while(result < 0.5);
iter /= 2;
// repeat test and report least value for consistency:
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
erroffset = pcre_exec(ppcre, pe, text.c_str(), text.size(), 0, 0, what, sizeof(what));
}
run = tim.elapsed();
result = std::min(run, result);
}
free(ppcre);
free(pe);
return result / iter;
}
double time_find_all(const std::string& re, const std::string& text, bool icase)
{
pcre *ppcre;
const char *error;
int erroffset;
int what[50];
boost::timer tim;
int iter = 1;
int counter, repeats;
double result = 0;
double run;
int exec_result;
int matches;
if(0 == (ppcre = pcre_compile(re.c_str(), (icase ? PCRE_CASELESS | PCRE_DOTALL | PCRE_MULTILINE : PCRE_DOTALL | PCRE_MULTILINE), &error, &erroffset, NULL)))
{
free(ppcre);
return -1;
}
pcre_extra *pe;
pe = pcre_study(ppcre, 0, &error);
if(error)
{
free(ppcre);
free(pe);
return -1;
}
do
{
int startoff;
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
matches = 0;
startoff = 0;
exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what));
while(exec_result >= 0)
{
++matches;
startoff = what[1];
exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what));
}
}
result = tim.elapsed();
iter *= 2;
}while(result < 0.5);
iter /= 2;
if(result >10)
return result / iter;
result = DBL_MAX;
// repeat test and report least value for consistency:
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
int startoff;
matches = 0;
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
matches = 0;
startoff = 0;
exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what));
while(exec_result >= 0)
{
++matches;
startoff = what[1];
exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what));
}
}
run = tim.elapsed();
result = std::min(run, result);
}
return result / iter;
}
}
#else
namespace pcr{
double time_match(const std::string& re, const std::string& text, bool icase)
{
return -1;
}
double time_find_all(const std::string& re, const std::string& text, bool icase)
{
return -1;
}
}
#endif

143
performance/time_posix.cpp Normal file
View File

@ -0,0 +1,143 @@
/*
*
* Copyright (c) 2002
* Dr John Maddock
*
* Permission to use, copy, modify, distribute and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appear in all copies and
* that both that copyright notice and this permission notice appear
* in supporting documentation. Dr John Maddock makes no representations
* about the suitability of this software for any purpose.
* It is provided "as is" without express or implied warranty.
*
*/
#include <cassert>
#include <cfloat>
#include "regex_comparison.hpp"
#ifdef BOOST_HAS_POSIX
#include <boost/timer.hpp>
#include "regex.h"
namespace posix{
double time_match(const std::string& re, const std::string& text, bool icase)
{
regex_t e;
regmatch_t what[20];
boost::timer tim;
int iter = 1;
int counter, repeats;
double result = 0;
double run;
if(0 != regcomp(&e, re.c_str(), (icase ? REG_ICASE | REG_EXTENDED : REG_EXTENDED)))
return -1;
do
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
regexec(&e, text.c_str(), e.re_nsub, what, 0);
}
result = tim.elapsed();
iter *= 2;
}while(result < 0.5);
iter /= 2;
// repeat test and report least value for consistency:
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
regexec(&e, text.c_str(), e.re_nsub, what, 0);
}
run = tim.elapsed();
result = std::min(run, result);
}
regfree(&e);
return result / iter;
}
double time_find_all(const std::string& re, const std::string& text, bool icase)
{
regex_t e;
regmatch_t what[20];
memset(what, 0, sizeof(what));
boost::timer tim;
int iter = 1;
int counter, repeats;
double result = 0;
double run;
int exec_result;
int matches;
if(0 != regcomp(&e, re.c_str(), (icase ? REG_ICASE | REG_EXTENDED : REG_EXTENDED)))
return -1;
do
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
what[0].rm_so = 0;
what[0].rm_eo = text.size();
matches = 0;
exec_result = regexec(&e, text.c_str(), 20, what, REG_STARTEND);
while(exec_result == 0)
{
++matches;
what[0].rm_so = what[0].rm_eo;
what[0].rm_eo = text.size();
exec_result = regexec(&e, text.c_str(), 20, what, REG_STARTEND);
}
}
result = tim.elapsed();
iter *= 2;
}while(result < 0.5);
iter /= 2;
if(result >10)
return result / iter;
result = DBL_MAX;
// repeat test and report least value for consistency:
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
what[0].rm_so = 0;
what[0].rm_eo = text.size();
matches = 0;
exec_result = regexec(&e, text.c_str(), 20, what, REG_STARTEND);
while(exec_result == 0)
{
++matches;
what[0].rm_so = what[0].rm_eo;
what[0].rm_eo = text.size();
exec_result = regexec(&e, text.c_str(), 20, what, REG_STARTEND);
}
}
run = tim.elapsed();
result = std::min(run, result);
}
return result / iter;
}
}
#else
namespace posix{
double time_match(const std::string& re, const std::string& text, bool icase)
{
return -1;
}
double time_find_all(const std::string& re, const std::string& text, bool icase)
{
return -1;
}
}
#endif

View File

@ -0,0 +1,127 @@
/*
*
* Copyright (c) 2002
* Dr John Maddock
*
* Permission to use, copy, modify, distribute and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appear in all copies and
* that both that copyright notice and this permission notice appear
* in supporting documentation. Dr John Maddock makes no representations
* about the suitability of this software for any purpose.
* It is provided "as is" without express or implied warranty.
*
*/
#include "regex_comparison.hpp"
#if defined(BOOST_HAS_GRETA)
#include <cassert>
#include <boost/timer.hpp>
#include "regexpr2.h"
namespace gs{
double time_match(const std::string& re, const std::string& text, bool icase)
{
regex::rpattern e(re, (icase ? regex::MULTILINE | regex::NORMALIZE | regex::NOCASE : regex::MULTILINE | regex::NORMALIZE), regex::MODE_SAFE);
regex::match_results what;
boost::timer tim;
int iter = 1;
int counter, repeats;
double result = 0;
double run;
assert(e.match(text, what));
do
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
e.match(text, what);
}
result = tim.elapsed();
iter *= 2;
}while(result < 0.5);
iter /= 2;
// repeat test and report least value for consistency:
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
e.match(text, what);
}
run = tim.elapsed();
result = std::min(run, result);
}
return result / iter;
}
double time_find_all(const std::string& re, const std::string& text, bool icase)
{
regex::rpattern e(re, (icase ? regex::MULTILINE | regex::NORMALIZE | regex::NOCASE : regex::MULTILINE | regex::NORMALIZE), regex::MODE_SAFE);
regex::match_results what;
boost::timer tim;
int iter = 1;
int counter, repeats;
double result = 0;
double run;
do
{
bool r;
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
e.match(text.begin(), text.end(), what);
while(what.backref(0).matched)
{
e.match(what.backref(0).end(), text.end(), what);
}
}
result = tim.elapsed();
iter *= 2;
}while(result < 0.5);
iter /= 2;
if(result > 10)
return result / iter;
// repeat test and report least value for consistency:
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
{
tim.restart();
for(counter = 0; counter < iter; ++counter)
{
e.match(text.begin(), text.end(), what);
while(what.backref(0).matched)
{
e.match(what.backref(0).end(), text.end(), what);
}
}
run = tim.elapsed();
result = std::min(run, result);
}
return result / iter;
}
}
#else
namespace gs{
double time_match(const std::string& re, const std::string& text, bool icase)
{
return -1;
}
double time_find_all(const std::string& re, const std::string& text, bool icase)
{
return -1;
}
}
#endif

View File

@ -0,0 +1,671 @@
<html>
<head>
<title>Regular Expression Performance Comparison</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta name="vs_targetSchema" content="http://schemas.microsoft.com/intellisense/ie5">
<meta name="Template" content="C:\PROGRAM FILES\MICROSOFT OFFICE\OFFICE\html.dot">
<meta name="GENERATOR" content="Microsoft FrontPage Express 2.0">
</head>
<body bgcolor="#ffffff" link="#0000ff" vlink="#800080">
<h2>Regular Expression Performance Comparison</h2>
<p>The Boost and GRETA regular expression libraries have slightly different
interfaces, and it has been suggested that GRETA's interface allows for a more
efficient implementation. The following tables provide comparisons between:</p>
<p><a href="http://research.microsoft.com/projects/greta">GRETA</a>.</p>
<p><a href="http://www.boost.org/">The Boost regex library</a>.</p>
<p><a href="http://arglist.com/regex/">Henry Spencer's regular expression library</a>
- this is provided for comparison as a typical non-backtracking implementation.</p>
<p>
Times were obtained on a 2.8GHz Pentium&nbsp;4 PC running Windows XP, and the
code was compiled with Visual C++ 7.1 with all optimisations turned on. As ever
care should be taken in interpreting the results, only sensible regular
expressions (rather than pathological cases) are given, most are taken from the
Boost regex examples, or from the <a href="http://www.regxlib.com/">Library of
Regular Expressions</a>. In addition, some variation in the relative
performance of these libraries can be expected on other machines - as memory
access and processor caching effects can be quite large for most finite state
machine algorithms.</p>
<h3>Comparison 1: Long Search</h3>
<p>For each of the following regular expressions the time taken to find all
occurrences of the expression within a long English language text was measured
(<a href="ftp://ibiblio.org/pub/docs/books/gutenberg/etext02/mtent12.zip">mtent12.txt</a>
from <a href="http://promo.net/pg/">Project Gutenberg</a>, 19Mb).&nbsp;</p>
<P><table border="1" cellspacing="1">
<tr>
<td><strong>Expression</strong></td>
<td><strong>GRETA</strong></td>
<td><strong>GRETA<BR>
(non-recursive mode)</strong></td>
<td><strong>Boost</strong></td>
<td><strong>Boost + C++ locale</strong></td>
<td><strong>POSIX</strong></td>
<td><strong>PCRE</strong></td>
</tr>
<tr>
<td><code>Twain</code></td>
<td>9.29<BR>
(0.00309s)</td>
<td>32.9<BR>
(0.011s)</td>
<td>1.34<BR>
(0.000445s)</td>
<td>1.37<BR>
(0.000455s)</td>
<td>6.23<BR>
(0.00207s)</td>
<td><font color="#008000">1<BR>
(0.000333s)</font></td>
</tr>
<tr>
<td><code>Huck[[:alpha:]]+</code></td>
<td>12.9<BR>
(0.00309s)</td>
<td>44.4<BR>
(0.0106s)</td>
<td>1.79<BR>
(0.00043s)</td>
<td>1.82<BR>
(0.000436s)</td>
<td><font color="#008000">1<BR>
(0.00024s)</font></td>
<td><font color="#008000">1.06<BR>
(0.000254s)</font></td>
</tr>
<tr>
<td><code>[[:alpha:]]+ing</code></td>
<td>7.6<BR>
(0.0178s)</td>
<td>15.2<BR>
(0.0357s)</td>
<td><font color="#008000">1<BR>
(0.00235s)</font></td>
<td><font color="#008000">0.867<BR>
(0.00204s)</font></td>
<td>4.26<BR>
(0.01s)</td>
<td>6<BR>
(0.0141s)</td>
</tr>
<tr>
<td><code>^[^ ]*?Twain</code></td>
<td>5.92<BR>
(0.00626s)</td>
<td>16.3<BR>
(0.0172s)</td>
<td><font color="#008000">1<BR>
(0.00106s)</font></td>
<td><font color="#008000">0.666<BR>
(0.000704s)</font></td>
<td>NA</td>
<td>2.04<BR>
(0.00215s)</td>
</tr>
<tr>
<td><code>Tom|Sawyer|Huckleberry|Finn</code></td>
<td>8<BR>
(0.00828s)</td>
<td>20<BR>
(0.0207s)</td>
<td><font color="#008000">1<BR>
(0.00104s)</font></td>
<td><font color="#008000">0.585<BR>
(0.000605s)</font></td>
<td>42.3<BR>
(0.0438s)</td>
<td>1.12<BR>
(0.00115s)</td>
</tr>
<tr>
<td><code>(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)</code></td>
<td>6.42<BR>
(0.012s)</td>
<td>16.3<BR>
(0.0307s)</td>
<td><font color="#008000">1<BR>
(0.00188s)</font></td>
<td><font color="#008000">0.719<BR>
(0.00135s)</font></td>
<td>NA</td>
<td>1.21<BR>
(0.00227s)</td>
</tr>
</table>
</P>
<h3>Comparison 2: Medium Sized Search</h3>
<p>For each of the following regular expressions the time taken to find all
occurrences of the expression within a medium sized English language text was
measured (the first 50K from mtent12.txt).&nbsp;</p>
<P><table border="1" cellspacing="1">
<tr>
<td><strong>Expression</strong></td>
<td><strong>GRETA</strong></td>
<td><strong>GRETA<BR>
(non-recursive mode)</strong></td>
<td><strong>Boost</strong></td>
<td><strong>Boost + C++ locale</strong></td>
<td><strong>POSIX</strong></td>
<td><strong>PCRE</strong></td>
</tr>
<tr>
<td><code>Twain</code></td>
<td>9.29<BR>
(0.00309s)</td>
<td>32.5<BR>
(0.0108s)</td>
<td>1.34<BR>
(0.000445s)</td>
<td>1.37<BR>
(0.000455s)</td>
<td>6.24<BR>
(0.00207s)</td>
<td><font color="#008000">1<BR>
(0.000333s)</font></td>
</tr>
<tr>
<td><code>Huck[[:alpha:]]+</code></td>
<td>12.9<BR>
(0.00309s)</td>
<td>47<BR>
(0.0113s)</td>
<td>1.77<BR>
(0.000425s)</td>
<td>1.84<BR>
(0.00044s)</td>
<td><font color="#008000">1<BR>
(0.00024s)</font></td>
<td><font color="#008000">1.04<BR>
(0.00025s)</font></td>
</tr>
<tr>
<td><code>[[:alpha:]]+ing</code></td>
<td>7.61<BR>
(0.0178s)</td>
<td>15.2<BR>
(0.0356s)</td>
<td><font color="#008000">1<BR>
(0.00234s)</font></td>
<td><font color="#008000">0.867<BR>
(0.00203s)</font></td>
<td>4.27<BR>
(0.01s)</td>
<td>5.94<BR>
(0.0139s)</td>
</tr>
<tr>
<td><code>^[^ ]*?Twain</code></td>
<td>5.72<BR>
(0.00626s)</td>
<td>15.5<BR>
(0.0169s)</td>
<td><font color="#008000">1<BR>
(0.00109s)</font></td>
<td><font color="#008000">0.644<BR>
(0.000704s)</font></td>
<td>NA</td>
<td>1.93<BR>
(0.00211s)</td>
</tr>
<tr>
<td><code>Tom|Sawyer|Huckleberry|Finn</code></td>
<td>7.85<BR>
(0.00828s)</td>
<td>19.9<BR>
(0.021s)</td>
<td><font color="#008000">1<BR>
(0.00105s)</font></td>
<td><font color="#008000">0.575<BR>
(0.000606s)</font></td>
<td>41.5<BR>
(0.0438s)</td>
<td><font color="#008000">1.09<BR>
(0.00115s)</font></td>
</tr>
<tr>
<td><code>(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)</code></td>
<td>6.49<BR>
(0.012s)</td>
<td>16.5<BR>
(0.0307s)</td>
<td><font color="#008000">1<BR>
(0.00186s)</font></td>
<td><font color="#008000">0.737<BR>
(0.00137s)</font></td>
<td>216<BR>
(0.401s)</td>
<td>1.24<BR>
(0.00231s)</td>
</tr>
</table>
</P>
<H3>Comparison 3:&nbsp;C++ Code&nbsp;Search</H3>
<P>For each of the following regular expressions the time taken to find all
occurrences of the expression within the C++ source file <A href="../../../boost/crc.hpp">
boost/crc.hpp</A>&nbsp;was measured.&nbsp;</P>
<P><table border="1" cellspacing="1">
<tr>
<td><strong>Expression</strong></td>
<td><strong>GRETA</strong></td>
<td><strong>GRETA<BR>
(non-recursive mode)</strong></td>
<td><strong>Boost</strong></td>
<td><strong>Boost + C++ locale</strong></td>
<td><strong>POSIX</strong></td>
<td><strong>PCRE</strong></td>
</tr>
<tr>
<td><code>^(template[[:space:]]*&lt;[^;:{]+&gt;[[:space:]]*)?(class|struct)[[:space:]]*(\&lt;\w+\&gt;([
]*\([^)]*\))?[[:space:]]*)*(\&lt;\w*\&gt;)[[:space:]]*(&lt;[^;:{]+&gt;[[:space:]]*)?(\{|:[^;\{()]*\{)</code></td>
<td>9.58<BR>
(0.0019s)</td>
<td>40.3<BR>
(0.00798s)</td>
<td><font color="#008000">1<BR>
(0.000198s)</font></td>
<td><font color="#008000">0.901<BR>
(0.000178s)</font></td>
<td>607<BR>
(0.12s)</td>
<td>3.16<BR>
(0.000626s)</td>
</tr>
<tr>
<td><code>(^[
]*#(?:[^\\\n]|\\[^\n_[:punct:][:alnum:]]*[\n[:punct:][:word:]])*)|(//[^\n]*|/\*.*?\*/)|\&lt;([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\&gt;|('(?:[^\\']|\\.)*'|"(?:[^\\"]|\\.)*")|\&lt;(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned|using|virtual|void|volatile|wchar_t|while)\&gt;</code></td>
<td><font color="#008000">1<BR>
(0.0116s)</font></td>
<td>2.7<BR>
(0.0313s)</td>
<td>1.22<BR>
(0.0141s)</td>
<td><font color="#008000">0.946<BR>
(0.011s)</font></td>
<td>NA</td>
<td>1.41<BR>
(0.0163s)</td>
</tr>
<tr>
<td><code>^[ ]*#[ ]*include[ ]+("[^"]+"|&lt;[^&gt;]+&gt;)</code></td>
<td>6.05<BR>
(0.00195s)</td>
<td>25.7<BR>
(0.0083s)</td>
<td><font color="#008000">1<BR>
(0.000323s)</font></td>
<td><font color="#008000">0.939<BR>
(0.000303s)</font></td>
<td>107<BR>
(0.0344s)</td>
<td>1.69<BR>
(0.000547s)</td>
</tr>
<tr>
<td><code>^[ ]*#[ ]*include[ ]+("boost/[^"]+"|<boost^[>]+&gt;)</code></td>
<td>5.8<BR>
(0.00196s)</td>
<td>24.6<BR>
(0.0083s)</td>
<td><font color="#008000">1<BR>
(0.000337s)</font></td>
<td><font color="#008000">1.07<BR>
(0.000362s)</font></td>
<td>122<BR>
(0.0413s)</td>
<td>1.59<BR>
(0.000538s)</td>
</tr>
</table>
</P>
<H3>
<H3>Comparison 4: HTML Document Search</H3>
</H3>
<P>For each of the following regular expressions the time taken to find all
occurrences of the expression within the html file <A href="../../libraries.htm">libs/libraries.htm</A>
was measured.&nbsp;</P>
<P><table border="1" cellspacing="1">
<tr>
<td><strong>Expression</strong></td>
<td><strong>GRETA</strong></td>
<td><strong>GRETA<BR>
(non-recursive mode)</strong></td>
<td><strong>Boost</strong></td>
<td><strong>Boost + C++ locale</strong></td>
<td><strong>POSIX</strong></td>
<td><strong>PCRE</strong></td>
</tr>
<tr>
<td><code>beman|john|dave</code></td>
<td>6.69<BR>
(0.00321s)</td>
<td>18.9<BR>
(0.00908s)</td>
<td><font color="#008000">1<BR>
(0.000479s)</font></td>
<td><font color="#008000">0.561<BR>
(0.000269s)</font></td>
<td>23.8<BR>
(0.0114s)</td>
<td><font color="#008000">1<BR>
(0.000479s)</font></td>
</tr>
<tr>
<td><code><p>.*?</p>
</code>
</td>
<td>5.89<BR>
(0.00164s)</td>
<td>19.6<BR>
(0.00548s)</td>
<td><font color="#008000">1<BR>
(0.000279s)</font></td>
<td><font color="#008000">1.05<BR>
(0.000293s)</font></td>
<td>NA</td>
<td>1.11<BR>
(0.000308s)</td>
</tr>
<tr>
<td><code><a^[>]+href=("[^"]*"|[^[:space:]]+)[^&gt;]*&gt;</code></td>
<td>3.94<BR>
(0.00219s)</td>
<td>10.4<BR>
(0.00579s)</td>
<td><font color="#008000">1.09<BR>
(0.000606s)</font></td>
<td><font color="#008000">0.825<BR>
(0.000459s)</font></td>
<td>221<BR>
(0.123s)</td>
<td><font color="#008000">1<BR>
(0.000557s)</font></td>
</tr>
<tr>
<td><code><h12345678][^[>]*&gt;.*?</h[12345678]></code></td>
<td>6.07<BR>
(0.0016s)</td>
<td>19.8<BR>
(0.00524s)</td>
<td>1.37<BR>
(0.000362s)</td>
<td><font color="#008000">0.722<BR>
(0.000191s)</font></td>
<td>NA</td>
<td><font color="#008000">1<BR>
(0.000264s)</font></td>
</tr>
<tr>
<td><code><img^[>]+src=("[^"]*"|[^[:space:]]+)[^&gt;]*&gt;</code></td>
<td>6.77<BR>
(0.00162s)</td>
<td>22.5<BR>
(0.0054s)</td>
<td>1.1<BR>
(0.000264s)</td>
<td>1.2<BR>
(0.000289s)</td>
<td>120<BR>
(0.0288s)</td>
<td><font color="#008000">1<BR>
(0.00024s)</font></td>
</tr>
<tr>
<td><code><font^[>]+face=("[^"]*"|[^[:space:]]+)[^&gt;]*&gt;.*?</font></code></td>
<td>6.77<BR>
(0.00162s)</td>
<td>22.5<BR>
(0.0054s)</td>
<td>1.12<BR>
(0.000269s)</td>
<td>1.2<BR>
(0.000289s)</td>
<td>NA</td>
<td><font color="#008000">1<BR>
(0.00024s)</font></td>
</tr>
</table>
</P>
<H3>Comparison 3: Simple Matches</H3>
<p>
For each of the following regular expressions the time taken to match against
the text indicated was measured.&nbsp;</p>
<P><table border="1" cellspacing="1">
<tr>
<td><strong>Expression</strong></td>
<td><strong>Text</strong></td>
<td><strong>GRETA</strong></td>
<td><strong>GRETA<BR>
(non-recursive mode)</strong></td>
<td><strong>Boost</strong></td>
<td><strong>Boost + C++ locale</strong></td>
<td><strong>POSIX</strong></td>
<td><strong>PCRE</strong></td>
</tr>
<tr>
<td><code>abc</code></td>
<td>abc</td>
<td>1.43<BR>
(2.25e-007s)</td>
<td>1.85<BR>
(2.91e-007s)</td>
<td>1.27<BR>
(2.01e-007s)</td>
<td>1.29<BR>
(2.03e-007s)</td>
<td>1.94<BR>
(3.06e-007s)</td>
<td><font color="#008000">1<BR>
(1.58e-007s)</font></td>
</tr>
<tr>
<td><code>^([0-9]+)(\-| |$)(.*)$</code></td>
<td>100- this is a line of ftp response which contains a message string</td>
<td><font color="#008000">1<BR>
(6.97e-007s)</font></td>
<td>2.69<BR>
(1.87e-006s)</td>
<td>1.89<BR>
(1.32e-006s)</td>
<td>1.86<BR>
(1.3e-006s)</td>
<td>298<BR>
(0.000208s)</td>
<td>1.29<BR>
(8.98e-007s)</td>
</tr>
<tr>
<td><code>([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4}</code></td>
<td>1234-5678-1234-456</td>
<td>2.41<BR>
(2.14e-006s)</td>
<td>2.97<BR>
(2.64e-006s)</td>
<td>2.37<BR>
(2.1e-006s)</td>
<td>2.24<BR>
(1.99e-006s)</td>
<td>29.6<BR>
(2.63e-005s)</td>
<td><font color="#008000">1<BR>
(8.88e-007s)</font></td>
</tr>
<tr>
<td><code>^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$</code></td>
<td>john_maddock@compuserve.com</td>
<td>1.83<BR>
(3.7e-006s)</td>
<td>2.34<BR>
(4.74e-006s)</td>
<td>1.59<BR>
(3.21e-006s)</td>
<td>1.55<BR>
(3.13e-006s)</td>
<td>172<BR>
(0.000347s)</td>
<td><font color="#008000">1<BR>
(2.02e-006s)</font></td>
</tr>
<tr>
<td><code>^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$</code></td>
<td>foo12@foo.edu</td>
<td>1.71<BR>
(3.09e-006s)</td>
<td>2.19<BR>
(3.97e-006s)</td>
<td>1.71<BR>
(3.09e-006s)</td>
<td>1.64<BR>
(2.98e-006s)</td>
<td>123<BR>
(0.000222s)</td>
<td><font color="#008000">1<BR>
(1.81e-006s)</font></td>
</tr>
<tr>
<td><code>^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$</code></td>
<td>bob.smith@foo.tv</td>
<td>1.72<BR>
(3.09e-006s)</td>
<td>2.21<BR>
(3.97e-006s)</td>
<td>1.72<BR>
(3.09e-006s)</td>
<td>1.7<BR>
(3.06e-006s)</td>
<td>133<BR>
(0.00024s)</td>
<td><font color="#008000">1<BR>
(1.79e-006s)</font></td>
</tr>
<tr>
<td><code>^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$</code></td>
<td>EH10 2QQ</td>
<td>1.29<BR>
(9.37e-007s)</td>
<td>1.71<BR>
(1.24e-006s)</td>
<td>1.29<BR>
(9.35e-007s)</td>
<td>1.18<BR>
(8.59e-007s)</td>
<td>7.79<BR>
(5.65e-006s)</td>
<td><font color="#008000">1<BR>
(7.26e-007s)</font></td>
</tr>
<tr>
<td><code>^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$</code></td>
<td>G1 1AA</td>
<td>1.41<BR>
(9.26e-007s)</td>
<td>2<BR>
(1.32e-006s)</td>
<td>1.38<BR>
(9.07e-007s)</td>
<td>1.31<BR>
(8.6e-007s)</td>
<td>7.41<BR>
(4.88e-006s)</td>
<td><font color="#008000">1<BR>
(6.59e-007s)</font></td>
</tr>
<tr>
<td><code>^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$</code></td>
<td>SW1 1ZZ</td>
<td>1.45<BR>
(9.54e-007s)</td>
<td>1.88<BR>
(1.24e-006s)</td>
<td>1.42<BR>
(9.36e-007s)</td>
<td>1.32<BR>
(8.69e-007s)</td>
<td>7.77<BR>
(5.12e-006s)</td>
<td><font color="#008000">1<BR>
(6.59e-007s)</font></td>
</tr>
<tr>
<td><code>^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$</code></td>
<td>4/1/2001</td>
<td>1.74<BR>
(1.01e-006s)</td>
<td>2.3<BR>
(1.34e-006s)</td>
<td>1.33<BR>
(7.73e-007s)</td>
<td>1.3<BR>
(7.54e-007s)</td>
<td>9.85<BR>
(5.73e-006s)</td>
<td><font color="#008000">1<BR>
(5.82e-007s)</font></td>
</tr>
<tr>
<td><code>^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$</code></td>
<td>12/12/2001</td>
<td>1.77<BR>
(1.01e-006s)</td>
<td>2.23<BR>
(1.28e-006s)</td>
<td>1.45<BR>
(8.31e-007s)</td>
<td>1.38<BR>
(7.93e-007s)</td>
<td>11.1<BR>
(6.34e-006s)</td>
<td><font color="#008000">1<BR>
(5.73e-007s)</font></td>
</tr>
<tr>
<td><code>^[-+]?[[:digit:]]*\.?[[:digit:]]*$</code></td>
<td>123</td>
<td>1.23<BR>
(7.65e-007s)</td>
<td>1.66<BR>
(1.03e-006s)</td>
<td>1.4<BR>
(8.69e-007s)</td>
<td>1.31<BR>
(8.12e-007s)</td>
<td>4.86<BR>
(3.02e-006s)</td>
<td><font color="#008000">1<BR>
(6.21e-007s)</font></td>
</tr>
<tr>
<td><code>^[-+]?[[:digit:]]*\.?[[:digit:]]*$</code></td>
<td>+3.14159</td>
<td>1.59<BR>
(1.05e-006s)</td>
<td>1.97<BR>
(1.3e-006s)</td>
<td>1.45<BR>
(9.54e-007s)</td>
<td>1.32<BR>
(8.69e-007s)</td>
<td>9.51<BR>
(6.26e-006s)</td>
<td><font color="#008000">1<BR>
(6.59e-007s)</font></td>
</tr>
<tr>
<td><code>^[-+]?[[:digit:]]*\.?[[:digit:]]*$</code></td>
<td>-3.14159</td>
<td>1.64<BR>
(1.07e-006s)</td>
<td>2<BR>
(1.3e-006s)</td>
<td>1.44<BR>
(9.35e-007s)</td>
<td>1.35<BR>
(8.78e-007s)</td>
<td>9.53<BR>
(6.19e-006s)</td>
<td><font color="#008000">1<BR>
(6.49e-007s)</font></td>
</tr>
</table>
</P>
<hr>
<p>Copyright John Maddock September 2002, all rights reserved.</p>
</body>
</html>