mirror of
https://github.com/boostorg/regex.git
synced 2025-07-29 12:07:28 +02:00
Added performance test files
[SVN r18134]
This commit is contained in:
285
performance/command_line.cpp
Normal file
285
performance/command_line.cpp
Normal file
@ -0,0 +1,285 @@
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <fstream>
|
||||
#include <deque>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <iterator>
|
||||
#include "regex_comparison.hpp"
|
||||
|
||||
//
|
||||
// globals:
|
||||
//
|
||||
bool time_boost = false;
|
||||
bool time_localised_boost = false;
|
||||
bool time_greta = false;
|
||||
bool time_safe_greta = false;
|
||||
bool time_posix = false;
|
||||
bool time_pcre = false;
|
||||
|
||||
bool test_matches = false;
|
||||
bool test_code = false;
|
||||
bool test_html = false;
|
||||
bool test_short_twain = false;
|
||||
bool test_long_twain = false;
|
||||
|
||||
|
||||
std::string html_template_file;
|
||||
std::string html_out_file;
|
||||
std::string html_contents;
|
||||
std::list<results> result_list;
|
||||
|
||||
int handle_argument(const std::string& what)
|
||||
{
|
||||
if(what == "-b")
|
||||
time_boost = true;
|
||||
else if(what == "-bl")
|
||||
time_localised_boost = true;
|
||||
#ifdef BOOST_HAS_GRETA
|
||||
else if(what == "-g")
|
||||
time_greta = true;
|
||||
else if(what == "-gs")
|
||||
time_safe_greta = true;
|
||||
#endif
|
||||
#ifdef BOOST_HAS_POSIX
|
||||
else if(what == "-posix")
|
||||
time_posix = true;
|
||||
#endif
|
||||
#ifdef BOOST_HAS_PCRE
|
||||
else if(what == "-pcre")
|
||||
time_pcre = true;
|
||||
#endif
|
||||
else if(what == "-all")
|
||||
{
|
||||
time_boost = true;
|
||||
time_localised_boost = true;
|
||||
#ifdef BOOST_HAS_GRETA
|
||||
time_greta = true;
|
||||
time_safe_greta = true;
|
||||
#endif
|
||||
#ifdef BOOST_HAS_POSIX
|
||||
time_posix = true;
|
||||
#endif
|
||||
#ifdef BOOST_HAS_PCRE
|
||||
time_pcre = true;
|
||||
#endif
|
||||
}
|
||||
else if(what == "-test-matches")
|
||||
test_matches = true;
|
||||
else if(what == "-test-code")
|
||||
test_code = true;
|
||||
else if(what == "-test-html")
|
||||
test_html = true;
|
||||
else if(what == "-test-short-twain")
|
||||
test_short_twain = true;
|
||||
else if(what == "-test-long-twain")
|
||||
test_long_twain = true;
|
||||
else if(what == "-test-all")
|
||||
{
|
||||
test_matches = true;
|
||||
test_code = true;
|
||||
test_html = true;
|
||||
test_short_twain = true;
|
||||
test_long_twain = true;
|
||||
}
|
||||
else if((what == "-h") || (what == "--help"))
|
||||
return show_usage();
|
||||
else if((what[0] == '-') || (what[0] == '/'))
|
||||
{
|
||||
std::cerr << "Unknown argument: \"" << what << "\"" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
else if(html_template_file.size() == 0)
|
||||
{
|
||||
html_template_file = what;
|
||||
load_file(html_contents, what.c_str());
|
||||
}
|
||||
else if(html_out_file.size() == 0)
|
||||
html_out_file = what;
|
||||
else
|
||||
{
|
||||
std::cerr << "Unexpected argument: \"" << what << "\"" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int show_usage()
|
||||
{
|
||||
std::cout <<
|
||||
"Usage\n"
|
||||
"regex_comparison [-h] [library options] [test options] [html_template html_output_file]\n"
|
||||
" -h Show help\n\n"
|
||||
" library options:\n"
|
||||
" -b Apply tests to boost library\n"
|
||||
" -bl Apply tests to boost library with C++ locale\n"
|
||||
#ifdef BOOST_HAS_GRETA
|
||||
" -g Apply tests to GRETA library\n"
|
||||
" -gs Apply tests to GRETA library (in non-recursive mode)\n"
|
||||
#endif
|
||||
#ifdef BOOST_HAS_POSIX
|
||||
" -posix Apply tests to POSIX library\n"
|
||||
#endif
|
||||
#ifdef BOOST_HAS_PCRE
|
||||
" -pcre Apply tests to PCRE library\n"
|
||||
#endif
|
||||
" -all Apply tests to all libraries\n\n"
|
||||
" test options:\n"
|
||||
" -test-matches Test short matches\n"
|
||||
" -test-code Test c++ code examples\n"
|
||||
" -test-html Test c++ code examples\n"
|
||||
" -test-short-twain Test short searches\n"
|
||||
" -test-long-twain Test long searches\n"
|
||||
" -test-all Test everthing\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
void load_file(std::string& text, const char* file)
|
||||
{
|
||||
std::deque<char> temp_copy;
|
||||
std::ifstream is(file);
|
||||
if(!is.good())
|
||||
{
|
||||
std::string msg("Unable to open file: \"");
|
||||
msg.append(file);
|
||||
msg.append("\"");
|
||||
throw std::runtime_error(msg);
|
||||
}
|
||||
is.seekg(0, std::ios_base::end);
|
||||
std::istream::pos_type pos = is.tellg();
|
||||
is.seekg(0, std::ios_base::beg);
|
||||
text.erase();
|
||||
text.reserve(pos);
|
||||
std::istreambuf_iterator<char> it(is);
|
||||
std::copy(it, std::istreambuf_iterator<char>(), std::back_inserter(text));
|
||||
}
|
||||
|
||||
void print_result(std::ostream& os, double time, double best)
|
||||
{
|
||||
static const char* suffixes[] = {"s", "ms", "us", "ns", "ps", };
|
||||
|
||||
if(time < 0)
|
||||
{
|
||||
os << "<td>NA</td>";
|
||||
return;
|
||||
}
|
||||
double rel = time / best;
|
||||
bool highlight = ((rel > 0) && (rel < 1.1));
|
||||
unsigned suffix = 0;
|
||||
while(time < 0)
|
||||
{
|
||||
time *= 1000;
|
||||
++suffix;
|
||||
}
|
||||
os << "<td>";
|
||||
if(highlight)
|
||||
os << "<font color=\"#008000\">";
|
||||
if(rel <= 1000)
|
||||
os << std::setprecision(3) << rel;
|
||||
else
|
||||
os << (int)rel;
|
||||
os << "<BR>(";
|
||||
if(time <= 1000)
|
||||
os << std::setprecision(3) << time;
|
||||
else
|
||||
os << (int)time;
|
||||
os << suffixes[suffix] << ")";
|
||||
if(highlight)
|
||||
os << "</font>";
|
||||
os << "</td>";
|
||||
}
|
||||
|
||||
void output_html_results(bool show_description, const std::string& tagname)
|
||||
{
|
||||
std::stringstream os;
|
||||
if(result_list.size())
|
||||
{
|
||||
//
|
||||
// start by outputting the table header:
|
||||
//
|
||||
os << "<table border=\"1\" cellspacing=\"1\">\n";
|
||||
os << "<tr><td><strong>Expression</strong></td>";
|
||||
if(show_description)
|
||||
os << "<td><strong>Text</strong></td>";
|
||||
#if defined(BOOST_HAS_GRETA)
|
||||
if(time_greta == true)
|
||||
os << "<td><strong>GRETA</strong></td>";
|
||||
if(time_safe_greta == true)
|
||||
os << "<td><strong>GRETA<BR>(non-recursive mode)</strong></td>";
|
||||
#endif
|
||||
if(time_boost == true)
|
||||
os << "<td><strong>Boost</strong></td>";
|
||||
if(time_localised_boost == true)
|
||||
os << "<td><strong>Boost + C++ locale</strong></td>";
|
||||
#if defined(BOOST_HAS_POSIX)
|
||||
if(time_posix == true)
|
||||
os << "<td><strong>POSIX</strong></td>";
|
||||
#endif
|
||||
#ifdef BOOST_HAS_PCRE
|
||||
if(time_pcre == true)
|
||||
os << "<td><strong>PCRE</strong></td>";
|
||||
#endif
|
||||
os << "</tr>\n";
|
||||
|
||||
//
|
||||
// Now enumerate through all the test results:
|
||||
//
|
||||
std::list<results>::const_iterator first, last;
|
||||
first = result_list.begin();
|
||||
last = result_list.end();
|
||||
while(first != last)
|
||||
{
|
||||
os << "<tr><td><code>" << first->expression << "</code></td>";
|
||||
if(show_description)
|
||||
os << "<td>" << first->description << "</td>";
|
||||
#if defined(BOOST_HAS_GRETA)
|
||||
if(time_greta == true)
|
||||
print_result(os, first->greta_time, first->factor);
|
||||
if(time_safe_greta == true)
|
||||
print_result(os, first->safe_greta_time, first->factor);
|
||||
#endif
|
||||
#if defined(BOOST_HAS_POSIX)
|
||||
if(time_boost == true)
|
||||
print_result(os, first->boost_time, first->factor);
|
||||
if(time_localised_boost == true)
|
||||
print_result(os, first->localised_boost_time, first->factor);
|
||||
#endif
|
||||
if(time_posix == true)
|
||||
print_result(os, first->posix_time, first->factor);
|
||||
#if defined(BOOST_HAS_PCRE)
|
||||
if(time_pcre == true)
|
||||
print_result(os, first->pcre_time, first->factor);
|
||||
#endif
|
||||
os << "</tr>\n";
|
||||
++first;
|
||||
}
|
||||
os << "</table>\n";
|
||||
result_list.clear();
|
||||
}
|
||||
else
|
||||
{
|
||||
os << "<P><I>Results not available...</I></P>\n";
|
||||
}
|
||||
|
||||
std::string result = os.str();
|
||||
|
||||
unsigned int pos = html_contents.find(tagname);
|
||||
if(pos != std::string::npos)
|
||||
{
|
||||
html_contents.replace(pos, tagname.size(), result);
|
||||
}
|
||||
}
|
||||
|
||||
void output_final_html()
|
||||
{
|
||||
if(html_out_file.size())
|
||||
{
|
||||
std::ofstream os(html_out_file.c_str());
|
||||
os << html_contents;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << html_contents;
|
||||
}
|
||||
}
|
59
performance/input.html
Normal file
59
performance/input.html
Normal file
@ -0,0 +1,59 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Regular Expression Performance Comparison</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<meta name="vs_targetSchema" content="http://schemas.microsoft.com/intellisense/ie5">
|
||||
<meta name="Template" content="C:\PROGRAM FILES\MICROSOFT OFFICE\OFFICE\html.dot">
|
||||
<meta name="GENERATOR" content="Microsoft FrontPage Express 2.0">
|
||||
</head>
|
||||
<body bgcolor="#ffffff" link="#0000ff" vlink="#800080">
|
||||
<h2>Regular Expression Performance Comparison</h2>
|
||||
<p>The Boost and GRETA regular expression libraries have slightly different
|
||||
interfaces, and it has been suggested that GRETA's interface allows for a more
|
||||
efficient implementation. The following tables provide comparisons between:</p>
|
||||
<p><a href="http://research.microsoft.com/projects/greta">GRETA</a>.</p>
|
||||
<p><a href="http://www.boost.org/">The Boost regex library</a>.</p>
|
||||
<p><a href="http://arglist.com/regex/">Henry Spencer's regular expression library</a>
|
||||
- this is provided for comparison as a typical non-backtracking implementation.</p>
|
||||
<p>
|
||||
Times were obtained on a 2.8GHz Pentium 4 PC running Windows XP, and the
|
||||
code was compiled with Visual C++ 7.1 with all optimisations turned on. As ever
|
||||
care should be taken in interpreting the results, only sensible regular
|
||||
expressions (rather than pathological cases) are given, most are taken from the
|
||||
Boost regex examples, or from the <a href="http://www.regxlib.com/">Library of
|
||||
Regular Expressions</a>. In addition, some variation in the relative
|
||||
performance of these libraries can be expected on other machines - as memory
|
||||
access and processor caching effects can be quite large for most finite state
|
||||
machine algorithms.</p>
|
||||
<h3>Comparison 1: Long Search</h3>
|
||||
<p>For each of the following regular expressions the time taken to find all
|
||||
occurrences of the expression within a long English language text was measured
|
||||
(<a href="ftp://ibiblio.org/pub/docs/books/gutenberg/etext02/mtent12.zip">mtent12.txt</a>
|
||||
from <a href="http://promo.net/pg/">Project Gutenberg</a>, 19Mb). </p>
|
||||
<P>%long_twain_search%</P>
|
||||
<h3>Comparison 2: Medium Sized Search</h3>
|
||||
<p>For each of the following regular expressions the time taken to find all
|
||||
occurrences of the expression within a medium sized English language text was
|
||||
measured (the first 50K from mtent12.txt). </p>
|
||||
<P>%short_twain_search%</P>
|
||||
<H3>Comparison 3: C++ Code Search</H3>
|
||||
<P>For each of the following regular expressions the time taken to find all
|
||||
occurrences of the expression within the C++ source file <A href="../../../boost/crc.hpp">
|
||||
boost/crc.hpp</A> was measured. </P>
|
||||
<P>%code_search%</P>
|
||||
<H3>
|
||||
<H3>Comparison 4: HTML Document Search</H3>
|
||||
</H3>
|
||||
<P>For each of the following regular expressions the time taken to find all
|
||||
occurrences of the expression within the html file <A href="../../libraries.htm">libs/libraries.htm</A>
|
||||
was measured. </P>
|
||||
<P>%html_search%</P>
|
||||
<H3>Comparison 3: Simple Matches</H3>
|
||||
<p>
|
||||
For each of the following regular expressions the time taken to match against
|
||||
the text indicated was measured. </p>
|
||||
<P>%short_matches%</P>
|
||||
<hr>
|
||||
<p>Copyright John Maddock April 2003, all rights reserved.</p>
|
||||
</body>
|
||||
</html>
|
251
performance/main.cpp
Normal file
251
performance/main.cpp
Normal file
@ -0,0 +1,251 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (c) 2002
|
||||
* Dr John Maddock
|
||||
*
|
||||
* Permission to use, copy, modify, distribute and sell this software
|
||||
* and its documentation for any purpose is hereby granted without fee,
|
||||
* provided that the above copyright notice appear in all copies and
|
||||
* that both that copyright notice and this permission notice appear
|
||||
* in supporting documentation. Dr John Maddock makes no representations
|
||||
* about the suitability of this software for any purpose.
|
||||
* It is provided "as is" without express or implied warranty.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <iterator>
|
||||
#include <cassert>
|
||||
#include <boost/test/execution_monitor.hpp>
|
||||
#include "regex_comparison.hpp"
|
||||
|
||||
|
||||
void test_match(const std::string& re, const std::string& text, const std::string& description, bool icase)
|
||||
{
|
||||
double time;
|
||||
results r(re, description);
|
||||
|
||||
std::cout << "Testing: \"" << re << "\" against \"" << description << "\"" << std::endl;
|
||||
|
||||
#ifdef BOOST_HAS_GRETA
|
||||
if(time_greta == true)
|
||||
{
|
||||
time = g::time_match(re, text, icase);
|
||||
r.greta_time = time;
|
||||
std::cout << "\tGRETA regex: " << time << "s\n";
|
||||
}
|
||||
if(time_safe_greta == true)
|
||||
{
|
||||
time = gs::time_match(re, text, icase);
|
||||
r.safe_greta_time = time;
|
||||
std::cout << "\tSafe GRETA regex: " << time << "s\n";
|
||||
}
|
||||
#endif
|
||||
if(time_boost == true)
|
||||
{
|
||||
time = b::time_match(re, text, icase);
|
||||
r.boost_time = time;
|
||||
std::cout << "\tBoost regex: " << time << "s\n";
|
||||
}
|
||||
if(time_localised_boost == true)
|
||||
{
|
||||
time = bl::time_match(re, text, icase);
|
||||
r.localised_boost_time = time;
|
||||
std::cout << "\tBoost regex (C++ locale): " << time << "s\n";
|
||||
}
|
||||
#ifdef BOOST_HAS_POSIX
|
||||
if(time_posix == true)
|
||||
{
|
||||
time = posix::time_match(re, text, icase);
|
||||
r.posix_time = time;
|
||||
std::cout << "\tPOSIX regex: " << time << "s\n";
|
||||
}
|
||||
#endif
|
||||
#ifdef BOOST_HAS_PCRE
|
||||
if(time_pcre == true)
|
||||
{
|
||||
time = pcr::time_match(re, text, icase);
|
||||
r.pcre_time = time;
|
||||
std::cout << "\tPCRE regex: " << time << "s\n";
|
||||
}
|
||||
#endif
|
||||
r.finalise();
|
||||
result_list.push_back(r);
|
||||
}
|
||||
|
||||
void test_find_all(const std::string& re, const std::string& text, const std::string& description, bool icase)
|
||||
{
|
||||
std::cout << "Testing: " << re << std::endl;
|
||||
|
||||
double time;
|
||||
results r(re, description);
|
||||
|
||||
#ifdef BOOST_HAS_GRETA
|
||||
if(time_greta == true)
|
||||
{
|
||||
time = g::time_find_all(re, text, icase);
|
||||
r.greta_time = time;
|
||||
std::cout << "\tGRETA regex: " << time << "s\n";
|
||||
}
|
||||
if(time_safe_greta == true)
|
||||
{
|
||||
time = gs::time_find_all(re, text, icase);
|
||||
r.safe_greta_time = time;
|
||||
std::cout << "\tSafe GRETA regex: " << time << "s\n";
|
||||
}
|
||||
#endif
|
||||
if(time_boost == true)
|
||||
{
|
||||
time = b::time_find_all(re, text, icase);
|
||||
r.boost_time = time;
|
||||
std::cout << "\tBoost regex: " << time << "s\n";
|
||||
}
|
||||
if(time_localised_boost == true)
|
||||
{
|
||||
time = bl::time_find_all(re, text, icase);
|
||||
r.localised_boost_time = time;
|
||||
std::cout << "\tBoost regex (C++ locale): " << time << "s\n";
|
||||
}
|
||||
#ifdef BOOST_HAS_POSIX
|
||||
if(time_posix == true)
|
||||
{
|
||||
time = posix::time_find_all(re, text, icase);
|
||||
r.posix_time = time;
|
||||
std::cout << "\tPOSIX regex: " << time << "s\n";
|
||||
}
|
||||
#endif
|
||||
#ifdef BOOST_HAS_PCRE
|
||||
if(time_pcre == true)
|
||||
{
|
||||
time = pcr::time_find_all(re, text, icase);
|
||||
r.pcre_time = time;
|
||||
std::cout << "\tPCRE regex: " << time << "s\n";
|
||||
}
|
||||
#endif
|
||||
r.finalise();
|
||||
result_list.push_back(r);
|
||||
}
|
||||
|
||||
int cpp_main(int argc, char * argv[])
|
||||
{
|
||||
// start by processing the command line args:
|
||||
if(argc < 2)
|
||||
return show_usage();
|
||||
int result = 0;
|
||||
for(int c = 1; c < argc; ++c)
|
||||
{
|
||||
result += handle_argument(argv[c]);
|
||||
}
|
||||
if(result)
|
||||
return result;
|
||||
|
||||
if(test_matches)
|
||||
{
|
||||
// start with a simple test, this is basically a measure of the minimal overhead
|
||||
// involved in calling a regex matcher:
|
||||
test_match("abc", "abc");
|
||||
// these are from the regex docs:
|
||||
test_match("^([0-9]+)(\\-| |$)(.*)$", "100- this is a line of ftp response which contains a message string");
|
||||
test_match("([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4}", "1234-5678-1234-456");
|
||||
// these are from http://www.regxlib.com/
|
||||
test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "john_maddock@compuserve.com");
|
||||
test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "foo12@foo.edu");
|
||||
test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "bob.smith@foo.tv");
|
||||
test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "EH10 2QQ");
|
||||
test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "G1 1AA");
|
||||
test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "SW1 1ZZ");
|
||||
test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "4/1/2001");
|
||||
test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "12/12/2001");
|
||||
test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "123");
|
||||
test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "+3.14159");
|
||||
test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "-3.14159");
|
||||
}
|
||||
output_html_results(true, "%short_matches%");
|
||||
|
||||
std::string file_contents;
|
||||
|
||||
if(test_code)
|
||||
{
|
||||
load_file(file_contents, "../../../boost/crc.hpp");
|
||||
|
||||
const char* highlight_expression = // preprocessor directives: index 1
|
||||
"(^[ \t]*#(?:[^\\\\\\n]|\\\\[^\\n_[:punct:][:alnum:]]*[\\n[:punct:][:word:]])*)|"
|
||||
// comment: index 2
|
||||
"(//[^\\n]*|/\\*.*?\\*/)|"
|
||||
// literals: index 3
|
||||
"\\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\\>|"
|
||||
// string literals: index 4
|
||||
"('(?:[^\\\\']|\\\\.)*'|\"(?:[^\\\\\"]|\\\\.)*\")|"
|
||||
// keywords: index 5
|
||||
"\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import"
|
||||
"|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall"
|
||||
"|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool"
|
||||
"|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete"
|
||||
"|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto"
|
||||
"|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected"
|
||||
"|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast"
|
||||
"|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned"
|
||||
"|using|virtual|void|volatile|wchar_t|while)\\>"
|
||||
;
|
||||
|
||||
const char* class_expression = "^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
|
||||
"(class|struct)[[:space:]]*(\\<\\w+\\>([ \t]*\\([^)]*\\))?"
|
||||
"[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?"
|
||||
"(\\{|:[^;\\{()]*\\{)";
|
||||
|
||||
const char* include_expression = "^[ \t]*#[ \t]*include[ \t]+(\"[^\"]+\"|<[^>]+>)";
|
||||
const char* boost_include_expression = "^[ \t]*#[ \t]*include[ \t]+(\"boost/[^\"]+\"|<boost/[^>]+>)";
|
||||
|
||||
|
||||
test_find_all(class_expression, file_contents);
|
||||
test_find_all(highlight_expression, file_contents);
|
||||
test_find_all(include_expression, file_contents);
|
||||
test_find_all(boost_include_expression, file_contents);
|
||||
}
|
||||
output_html_results(false, "%code_search%");
|
||||
|
||||
if(test_html)
|
||||
{
|
||||
load_file(file_contents, "../../../libs/libraries.htm");
|
||||
test_find_all("beman|john|dave", file_contents, true);
|
||||
test_find_all("<p>.*?</p>", file_contents, true);
|
||||
test_find_all("<a[^>]+href=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents, true);
|
||||
test_find_all("<h[12345678][^>]*>.*?</h[12345678]>", file_contents, true);
|
||||
test_find_all("<img[^>]+src=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents, true);
|
||||
test_find_all("<font[^>]+face=(\"[^\"]*\"|[^[:space:]]+)[^>]*>.*?</font>", file_contents, true);
|
||||
}
|
||||
output_html_results(false, "%html_search%");
|
||||
|
||||
if(test_short_twain)
|
||||
{
|
||||
load_file(file_contents, "short_twain.txt");
|
||||
|
||||
test_find_all("Twain", file_contents);
|
||||
test_find_all("Huck[[:alpha:]]+", file_contents);
|
||||
test_find_all("[[:alpha:]]+ing", file_contents);
|
||||
test_find_all("^[^\n]*?Twain", file_contents);
|
||||
test_find_all("Tom|Sawyer|Huckleberry|Finn", file_contents);
|
||||
test_find_all("(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)", file_contents);
|
||||
}
|
||||
output_html_results(false, "%short_twain_search%");
|
||||
|
||||
if(test_long_twain)
|
||||
{
|
||||
load_file(file_contents, "mtent12.txt");
|
||||
|
||||
test_find_all("Twain", file_contents);
|
||||
test_find_all("Huck[[:alpha:]]+", file_contents);
|
||||
test_find_all("[[:alpha:]]+ing", file_contents);
|
||||
test_find_all("^[^\n]*?Twain", file_contents);
|
||||
test_find_all("Tom|Sawyer|Huckleberry|Finn", file_contents);
|
||||
time_posix = false;
|
||||
test_find_all("(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)", file_contents);
|
||||
time_posix = true;
|
||||
}
|
||||
output_html_results(false, "%long_twain_search%");
|
||||
|
||||
output_final_html();
|
||||
return 0;
|
||||
}
|
||||
|
135
performance/regex_comparison.hpp
Normal file
135
performance/regex_comparison.hpp
Normal file
@ -0,0 +1,135 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (c) 2002
|
||||
* Dr John Maddock
|
||||
*
|
||||
* All rights reserved.
|
||||
* May not be transfered or disclosed to a third party without
|
||||
* prior consent of the author.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef REGEX_COMPARISON_HPP
|
||||
#define REGEX_COMPARISON_HPP
|
||||
|
||||
#include <string>
|
||||
#include <list>
|
||||
#include <boost/limits.hpp>
|
||||
|
||||
//
|
||||
// globals:
|
||||
//
|
||||
extern bool time_boost;
|
||||
extern bool time_localised_boost;
|
||||
extern bool time_greta;
|
||||
extern bool time_safe_greta;
|
||||
extern bool time_posix;
|
||||
extern bool time_pcre;
|
||||
|
||||
extern bool test_matches;
|
||||
extern bool test_short_twain;
|
||||
extern bool test_long_twain;
|
||||
extern bool test_code;
|
||||
extern bool test_html;
|
||||
|
||||
extern std::string html_template_file;
|
||||
extern std::string html_out_file;
|
||||
extern std::string html_contents;
|
||||
|
||||
|
||||
int handle_argument(const std::string& what);
|
||||
int show_usage();
|
||||
void load_file(std::string& text, const char* file);
|
||||
void output_html_results(bool show_description, const std::string& tagname);
|
||||
void output_final_html();
|
||||
|
||||
|
||||
struct results
|
||||
{
|
||||
double boost_time;
|
||||
double localised_boost_time;
|
||||
double greta_time;
|
||||
double safe_greta_time;
|
||||
double posix_time;
|
||||
double pcre_time;
|
||||
double factor;
|
||||
std::string expression;
|
||||
std::string description;
|
||||
results(const std::string& ex, const std::string& desc)
|
||||
: boost_time(-1),
|
||||
greta_time(-1),
|
||||
safe_greta_time(-1),
|
||||
posix_time(-1),
|
||||
pcre_time(-1),
|
||||
factor(std::numeric_limits<double>::max()),
|
||||
expression(ex),
|
||||
description(desc)
|
||||
{}
|
||||
void finalise()
|
||||
{
|
||||
if((boost_time >= 0) && (boost_time < factor))
|
||||
factor = boost_time;
|
||||
if((greta_time >= 0) && (greta_time < factor))
|
||||
factor = greta_time;
|
||||
if((safe_greta_time >= 0) && (safe_greta_time < factor))
|
||||
factor = safe_greta_time;
|
||||
if((posix_time >= 0) && (posix_time < factor))
|
||||
factor = posix_time;
|
||||
if((pcre_time >= 0) && (pcre_time < factor))
|
||||
factor = pcre_time;
|
||||
if((factor >= 0) && (factor < factor))
|
||||
factor = factor;
|
||||
}
|
||||
};
|
||||
|
||||
extern std::list<results> result_list;
|
||||
|
||||
|
||||
namespace b {
|
||||
// boost tests:
|
||||
double time_match(const std::string& re, const std::string& text, bool icase);
|
||||
double time_find_all(const std::string& re, const std::string& text, bool icase);
|
||||
|
||||
}
|
||||
namespace bl {
|
||||
// localised boost tests:
|
||||
double time_match(const std::string& re, const std::string& text, bool icase);
|
||||
double time_find_all(const std::string& re, const std::string& text, bool icase);
|
||||
|
||||
}
|
||||
namespace pcr {
|
||||
// pcre tests:
|
||||
double time_match(const std::string& re, const std::string& text, bool icase);
|
||||
double time_find_all(const std::string& re, const std::string& text, bool icase);
|
||||
|
||||
}
|
||||
namespace g {
|
||||
// greta tests:
|
||||
double time_match(const std::string& re, const std::string& text, bool icase);
|
||||
double time_find_all(const std::string& re, const std::string& text, bool icase);
|
||||
|
||||
}
|
||||
namespace gs {
|
||||
// safe greta tests:
|
||||
double time_match(const std::string& re, const std::string& text, bool icase);
|
||||
double time_find_all(const std::string& re, const std::string& text, bool icase);
|
||||
|
||||
}
|
||||
namespace posix {
|
||||
// safe greta tests:
|
||||
double time_match(const std::string& re, const std::string& text, bool icase);
|
||||
double time_find_all(const std::string& re, const std::string& text, bool icase);
|
||||
|
||||
}
|
||||
void test_match(const std::string& re, const std::string& text, const std::string& description, bool icase = false);
|
||||
void test_find_all(const std::string& re, const std::string& text, const std::string& description, bool icase = false);
|
||||
inline void test_match(const std::string& re, const std::string& text, bool icase = false)
|
||||
{ test_match(re, text, text, icase); }
|
||||
inline void test_find_all(const std::string& re, const std::string& text, bool icase = false)
|
||||
{ test_find_all(re, text, "", icase); }
|
||||
|
||||
|
||||
#define REPEAT_COUNT 10
|
||||
|
||||
#endif
|
98
performance/time_boost.cpp
Normal file
98
performance/time_boost.cpp
Normal file
@ -0,0 +1,98 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (c) 2002
|
||||
* Dr John Maddock
|
||||
*
|
||||
* Permission to use, copy, modify, distribute and sell this software
|
||||
* and its documentation for any purpose is hereby granted without fee,
|
||||
* provided that the above copyright notice appear in all copies and
|
||||
* that both that copyright notice and this permission notice appear
|
||||
* in supporting documentation. Dr John Maddock makes no representations
|
||||
* about the suitability of this software for any purpose.
|
||||
* It is provided "as is" without express or implied warranty.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "regex_comparison.hpp"
|
||||
#include <boost/timer.hpp>
|
||||
#include <boost/regex.hpp>
|
||||
|
||||
namespace b{
|
||||
|
||||
double time_match(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
boost::regex e(re, (icase ? boost::regbase::perl | boost::regbase::icase : boost::regbase::perl));
|
||||
boost::smatch what;
|
||||
boost::timer tim;
|
||||
int iter = 1;
|
||||
int counter, repeats;
|
||||
double result = 0;
|
||||
double run;
|
||||
do
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
boost::regex_match(text, what, e);
|
||||
}
|
||||
result = tim.elapsed();
|
||||
iter *= 2;
|
||||
}while(result < 0.5);
|
||||
iter /= 2;
|
||||
|
||||
// repeat test and report least value for consistency:
|
||||
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
boost::regex_match(text, what, e);
|
||||
}
|
||||
run = tim.elapsed();
|
||||
result = std::min(run, result);
|
||||
}
|
||||
return result / iter;
|
||||
}
|
||||
|
||||
bool dummy_grep_proc(const boost::smatch&)
|
||||
{ return true; }
|
||||
|
||||
double time_find_all(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
boost::regex e(re, (icase ? boost::regbase::perl | boost::regbase::icase : boost::regbase::perl));
|
||||
boost::smatch what;
|
||||
boost::timer tim;
|
||||
int iter = 1;
|
||||
int counter, repeats;
|
||||
double result = 0;
|
||||
double run;
|
||||
do
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
boost::regex_grep(&dummy_grep_proc, text, e);
|
||||
}
|
||||
result = tim.elapsed();
|
||||
iter *= 2;
|
||||
}while(result < 0.5);
|
||||
iter /= 2;
|
||||
|
||||
if(result >10)
|
||||
return result / iter;
|
||||
|
||||
// repeat test and report least value for consistency:
|
||||
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
boost::regex_grep(&dummy_grep_proc, text, e);
|
||||
}
|
||||
run = tim.elapsed();
|
||||
result = std::min(run, result);
|
||||
}
|
||||
return result / iter;
|
||||
}
|
||||
|
||||
}
|
125
performance/time_greta.cpp
Normal file
125
performance/time_greta.cpp
Normal file
@ -0,0 +1,125 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (c) 2002
|
||||
* Dr John Maddock
|
||||
*
|
||||
* Permission to use, copy, modify, distribute and sell this software
|
||||
* and its documentation for any purpose is hereby granted without fee,
|
||||
* provided that the above copyright notice appear in all copies and
|
||||
* that both that copyright notice and this permission notice appear
|
||||
* in supporting documentation. Dr John Maddock makes no representations
|
||||
* about the suitability of this software for any purpose.
|
||||
* It is provided "as is" without express or implied warranty.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "regex_comparison.hpp"
|
||||
#if defined(BOOST_HAS_GRETA)
|
||||
#include <cassert>
|
||||
#include <boost/timer.hpp>
|
||||
#include "regexpr2.h"
|
||||
|
||||
namespace g{
|
||||
|
||||
double time_match(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
regex::rpattern e(re, (icase ? regex::MULTILINE | regex::NORMALIZE | regex::NOCASE : regex::MULTILINE | regex::NORMALIZE));
|
||||
regex::match_results what;
|
||||
boost::timer tim;
|
||||
int iter = 1;
|
||||
int counter, repeats;
|
||||
double result = 0;
|
||||
double run;
|
||||
assert(e.match(text, what));
|
||||
do
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
e.match(text, what);
|
||||
}
|
||||
result = tim.elapsed();
|
||||
iter *= 2;
|
||||
}while(result < 0.5);
|
||||
iter /= 2;
|
||||
|
||||
// repeat test and report least value for consistency:
|
||||
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
e.match(text, what);
|
||||
}
|
||||
run = tim.elapsed();
|
||||
result = std::min(run, result);
|
||||
}
|
||||
return result / iter;
|
||||
}
|
||||
|
||||
double time_find_all(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
regex::rpattern e(re, (icase ? regex::MULTILINE | regex::NORMALIZE | regex::NOCASE : regex::MULTILINE | regex::NORMALIZE));
|
||||
regex::match_results what;
|
||||
boost::timer tim;
|
||||
int iter = 1;
|
||||
int counter, repeats;
|
||||
double result = 0;
|
||||
double run;
|
||||
do
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
e.match(text.begin(), text.end(), what);
|
||||
while(what.backref(0).matched)
|
||||
{
|
||||
e.match(what.backref(0).end(), text.end(), what);
|
||||
}
|
||||
}
|
||||
result = tim.elapsed();
|
||||
iter *= 2;
|
||||
}while(result < 0.5);
|
||||
iter /= 2;
|
||||
|
||||
if(result > 10)
|
||||
return result / iter;
|
||||
|
||||
// repeat test and report least value for consistency:
|
||||
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
e.match(text.begin(), text.end(), what);
|
||||
while(what.backref(0).matched)
|
||||
{
|
||||
e.match(what.backref(0).end(), text.end(), what);
|
||||
}
|
||||
}
|
||||
run = tim.elapsed();
|
||||
result = std::min(run, result);
|
||||
}
|
||||
return result / iter;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
namespace g {
|
||||
|
||||
double time_match(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
double time_find_all(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
98
performance/time_localised_boost.cpp
Normal file
98
performance/time_localised_boost.cpp
Normal file
@ -0,0 +1,98 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (c) 2002
|
||||
* Dr John Maddock
|
||||
*
|
||||
* Permission to use, copy, modify, distribute and sell this software
|
||||
* and its documentation for any purpose is hereby granted without fee,
|
||||
* provided that the above copyright notice appear in all copies and
|
||||
* that both that copyright notice and this permission notice appear
|
||||
* in supporting documentation. Dr John Maddock makes no representations
|
||||
* about the suitability of this software for any purpose.
|
||||
* It is provided "as is" without express or implied warranty.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "regex_comparison.hpp"
|
||||
#include <boost/timer.hpp>
|
||||
#include <boost/regex.hpp>
|
||||
|
||||
namespace bl{
|
||||
|
||||
double time_match(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
boost::reg_expression<char, boost::cpp_regex_traits<char> > e(re, (icase ? boost::regbase::perl | boost::regbase::icase : boost::regbase::perl));
|
||||
boost::smatch what;
|
||||
boost::timer tim;
|
||||
int iter = 1;
|
||||
int counter, repeats;
|
||||
double result = 0;
|
||||
double run;
|
||||
do
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
boost::regex_match(text, what, e);
|
||||
}
|
||||
result = tim.elapsed();
|
||||
iter *= 2;
|
||||
}while(result < 0.5);
|
||||
iter /= 2;
|
||||
|
||||
// repeat test and report least value for consistency:
|
||||
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
boost::regex_match(text, what, e);
|
||||
}
|
||||
run = tim.elapsed();
|
||||
result = std::min(run, result);
|
||||
}
|
||||
return result / iter;
|
||||
}
|
||||
|
||||
bool dummy_grep_proc(const boost::smatch&)
|
||||
{ return true; }
|
||||
|
||||
double time_find_all(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
boost::reg_expression<char, boost::cpp_regex_traits<char> > e(re, (icase ? boost::regbase::perl | boost::regbase::icase : boost::regbase::perl));
|
||||
boost::smatch what;
|
||||
boost::timer tim;
|
||||
int iter = 1;
|
||||
int counter, repeats;
|
||||
double result = 0;
|
||||
double run;
|
||||
do
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
boost::regex_grep(&dummy_grep_proc, text, e);
|
||||
}
|
||||
result = tim.elapsed();
|
||||
iter *= 2;
|
||||
}while(result < 0.5);
|
||||
iter /= 2;
|
||||
|
||||
if(result >10)
|
||||
return result / iter;
|
||||
|
||||
// repeat test and report least value for consistency:
|
||||
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
boost::regex_grep(&dummy_grep_proc, text, e);
|
||||
}
|
||||
run = tim.elapsed();
|
||||
result = std::min(run, result);
|
||||
}
|
||||
return result / iter;
|
||||
}
|
||||
|
||||
}
|
180
performance/time_pcre.cpp
Normal file
180
performance/time_pcre.cpp
Normal file
@ -0,0 +1,180 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (c) 2002
|
||||
* Dr John Maddock
|
||||
*
|
||||
* Permission to use, copy, modify, distribute and sell this software
|
||||
* and its documentation for any purpose is hereby granted without fee,
|
||||
* provided that the above copyright notice appear in all copies and
|
||||
* that both that copyright notice and this permission notice appear
|
||||
* in supporting documentation. Dr John Maddock makes no representations
|
||||
* about the suitability of this software for any purpose.
|
||||
* It is provided "as is" without express or implied warranty.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <cassert>
|
||||
#include <cfloat>
|
||||
#include "regex_comparison.hpp"
|
||||
#ifdef BOOST_HAS_PCRE
|
||||
#include "pcre.h"
|
||||
#include <boost/timer.hpp>
|
||||
|
||||
namespace pcr{
|
||||
|
||||
double time_match(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
pcre *ppcre;
|
||||
const char *error;
|
||||
int erroffset;
|
||||
|
||||
int what[50];
|
||||
|
||||
boost::timer tim;
|
||||
int iter = 1;
|
||||
int counter, repeats;
|
||||
double result = 0;
|
||||
double run;
|
||||
|
||||
if(0 == (ppcre = pcre_compile(re.c_str(), (icase ? PCRE_CASELESS | PCRE_ANCHORED | PCRE_DOTALL | PCRE_MULTILINE : PCRE_ANCHORED | PCRE_DOTALL | PCRE_MULTILINE),
|
||||
&error, &erroffset, NULL)))
|
||||
{
|
||||
free(ppcre);
|
||||
return -1;
|
||||
}
|
||||
|
||||
pcre_extra *pe;
|
||||
pe = pcre_study(ppcre, 0, &error);
|
||||
if(error)
|
||||
{
|
||||
free(ppcre);
|
||||
free(pe);
|
||||
return -1;
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
erroffset = pcre_exec(ppcre, pe, text.c_str(), text.size(), 0, 0, what, sizeof(what));
|
||||
}
|
||||
result = tim.elapsed();
|
||||
iter *= 2;
|
||||
}while(result < 0.5);
|
||||
iter /= 2;
|
||||
|
||||
// repeat test and report least value for consistency:
|
||||
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
erroffset = pcre_exec(ppcre, pe, text.c_str(), text.size(), 0, 0, what, sizeof(what));
|
||||
}
|
||||
run = tim.elapsed();
|
||||
result = std::min(run, result);
|
||||
}
|
||||
free(ppcre);
|
||||
free(pe);
|
||||
return result / iter;
|
||||
}
|
||||
|
||||
double time_find_all(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
pcre *ppcre;
|
||||
const char *error;
|
||||
int erroffset;
|
||||
|
||||
int what[50];
|
||||
|
||||
boost::timer tim;
|
||||
int iter = 1;
|
||||
int counter, repeats;
|
||||
double result = 0;
|
||||
double run;
|
||||
int exec_result;
|
||||
int matches;
|
||||
|
||||
if(0 == (ppcre = pcre_compile(re.c_str(), (icase ? PCRE_CASELESS | PCRE_DOTALL | PCRE_MULTILINE : PCRE_DOTALL | PCRE_MULTILINE), &error, &erroffset, NULL)))
|
||||
{
|
||||
free(ppcre);
|
||||
return -1;
|
||||
}
|
||||
|
||||
pcre_extra *pe;
|
||||
pe = pcre_study(ppcre, 0, &error);
|
||||
if(error)
|
||||
{
|
||||
free(ppcre);
|
||||
free(pe);
|
||||
return -1;
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
int startoff;
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
matches = 0;
|
||||
startoff = 0;
|
||||
exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what));
|
||||
while(exec_result >= 0)
|
||||
{
|
||||
++matches;
|
||||
startoff = what[1];
|
||||
exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what));
|
||||
}
|
||||
}
|
||||
result = tim.elapsed();
|
||||
iter *= 2;
|
||||
}while(result < 0.5);
|
||||
iter /= 2;
|
||||
|
||||
if(result >10)
|
||||
return result / iter;
|
||||
|
||||
result = DBL_MAX;
|
||||
|
||||
// repeat test and report least value for consistency:
|
||||
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
|
||||
{
|
||||
int startoff;
|
||||
matches = 0;
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
matches = 0;
|
||||
startoff = 0;
|
||||
exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what));
|
||||
while(exec_result >= 0)
|
||||
{
|
||||
++matches;
|
||||
startoff = what[1];
|
||||
exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what));
|
||||
}
|
||||
}
|
||||
run = tim.elapsed();
|
||||
result = std::min(run, result);
|
||||
}
|
||||
return result / iter;
|
||||
}
|
||||
|
||||
}
|
||||
#else
|
||||
|
||||
namespace pcr{
|
||||
|
||||
double time_match(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
double time_find_all(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
143
performance/time_posix.cpp
Normal file
143
performance/time_posix.cpp
Normal file
@ -0,0 +1,143 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (c) 2002
|
||||
* Dr John Maddock
|
||||
*
|
||||
* Permission to use, copy, modify, distribute and sell this software
|
||||
* and its documentation for any purpose is hereby granted without fee,
|
||||
* provided that the above copyright notice appear in all copies and
|
||||
* that both that copyright notice and this permission notice appear
|
||||
* in supporting documentation. Dr John Maddock makes no representations
|
||||
* about the suitability of this software for any purpose.
|
||||
* It is provided "as is" without express or implied warranty.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <cassert>
|
||||
#include <cfloat>
|
||||
#include "regex_comparison.hpp"
|
||||
#ifdef BOOST_HAS_POSIX
|
||||
#include <boost/timer.hpp>
|
||||
#include "regex.h"
|
||||
|
||||
namespace posix{
|
||||
|
||||
double time_match(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
regex_t e;
|
||||
regmatch_t what[20];
|
||||
boost::timer tim;
|
||||
int iter = 1;
|
||||
int counter, repeats;
|
||||
double result = 0;
|
||||
double run;
|
||||
if(0 != regcomp(&e, re.c_str(), (icase ? REG_ICASE | REG_EXTENDED : REG_EXTENDED)))
|
||||
return -1;
|
||||
do
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
regexec(&e, text.c_str(), e.re_nsub, what, 0);
|
||||
}
|
||||
result = tim.elapsed();
|
||||
iter *= 2;
|
||||
}while(result < 0.5);
|
||||
iter /= 2;
|
||||
|
||||
// repeat test and report least value for consistency:
|
||||
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
regexec(&e, text.c_str(), e.re_nsub, what, 0);
|
||||
}
|
||||
run = tim.elapsed();
|
||||
result = std::min(run, result);
|
||||
}
|
||||
regfree(&e);
|
||||
return result / iter;
|
||||
}
|
||||
|
||||
double time_find_all(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
regex_t e;
|
||||
regmatch_t what[20];
|
||||
memset(what, 0, sizeof(what));
|
||||
boost::timer tim;
|
||||
int iter = 1;
|
||||
int counter, repeats;
|
||||
double result = 0;
|
||||
double run;
|
||||
int exec_result;
|
||||
int matches;
|
||||
if(0 != regcomp(&e, re.c_str(), (icase ? REG_ICASE | REG_EXTENDED : REG_EXTENDED)))
|
||||
return -1;
|
||||
do
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
what[0].rm_so = 0;
|
||||
what[0].rm_eo = text.size();
|
||||
matches = 0;
|
||||
exec_result = regexec(&e, text.c_str(), 20, what, REG_STARTEND);
|
||||
while(exec_result == 0)
|
||||
{
|
||||
++matches;
|
||||
what[0].rm_so = what[0].rm_eo;
|
||||
what[0].rm_eo = text.size();
|
||||
exec_result = regexec(&e, text.c_str(), 20, what, REG_STARTEND);
|
||||
}
|
||||
}
|
||||
result = tim.elapsed();
|
||||
iter *= 2;
|
||||
}while(result < 0.5);
|
||||
iter /= 2;
|
||||
|
||||
if(result >10)
|
||||
return result / iter;
|
||||
|
||||
result = DBL_MAX;
|
||||
|
||||
// repeat test and report least value for consistency:
|
||||
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
what[0].rm_so = 0;
|
||||
what[0].rm_eo = text.size();
|
||||
matches = 0;
|
||||
exec_result = regexec(&e, text.c_str(), 20, what, REG_STARTEND);
|
||||
while(exec_result == 0)
|
||||
{
|
||||
++matches;
|
||||
what[0].rm_so = what[0].rm_eo;
|
||||
what[0].rm_eo = text.size();
|
||||
exec_result = regexec(&e, text.c_str(), 20, what, REG_STARTEND);
|
||||
}
|
||||
}
|
||||
run = tim.elapsed();
|
||||
result = std::min(run, result);
|
||||
}
|
||||
return result / iter;
|
||||
}
|
||||
|
||||
}
|
||||
#else
|
||||
|
||||
namespace posix{
|
||||
|
||||
double time_match(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
double time_find_all(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
127
performance/time_safe_greta.cpp
Normal file
127
performance/time_safe_greta.cpp
Normal file
@ -0,0 +1,127 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (c) 2002
|
||||
* Dr John Maddock
|
||||
*
|
||||
* Permission to use, copy, modify, distribute and sell this software
|
||||
* and its documentation for any purpose is hereby granted without fee,
|
||||
* provided that the above copyright notice appear in all copies and
|
||||
* that both that copyright notice and this permission notice appear
|
||||
* in supporting documentation. Dr John Maddock makes no representations
|
||||
* about the suitability of this software for any purpose.
|
||||
* It is provided "as is" without express or implied warranty.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "regex_comparison.hpp"
|
||||
#if defined(BOOST_HAS_GRETA)
|
||||
|
||||
#include <cassert>
|
||||
#include <boost/timer.hpp>
|
||||
#include "regexpr2.h"
|
||||
|
||||
namespace gs{
|
||||
|
||||
double time_match(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
regex::rpattern e(re, (icase ? regex::MULTILINE | regex::NORMALIZE | regex::NOCASE : regex::MULTILINE | regex::NORMALIZE), regex::MODE_SAFE);
|
||||
regex::match_results what;
|
||||
boost::timer tim;
|
||||
int iter = 1;
|
||||
int counter, repeats;
|
||||
double result = 0;
|
||||
double run;
|
||||
assert(e.match(text, what));
|
||||
do
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
e.match(text, what);
|
||||
}
|
||||
result = tim.elapsed();
|
||||
iter *= 2;
|
||||
}while(result < 0.5);
|
||||
iter /= 2;
|
||||
|
||||
// repeat test and report least value for consistency:
|
||||
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
e.match(text, what);
|
||||
}
|
||||
run = tim.elapsed();
|
||||
result = std::min(run, result);
|
||||
}
|
||||
return result / iter;
|
||||
}
|
||||
|
||||
double time_find_all(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
regex::rpattern e(re, (icase ? regex::MULTILINE | regex::NORMALIZE | regex::NOCASE : regex::MULTILINE | regex::NORMALIZE), regex::MODE_SAFE);
|
||||
regex::match_results what;
|
||||
boost::timer tim;
|
||||
int iter = 1;
|
||||
int counter, repeats;
|
||||
double result = 0;
|
||||
double run;
|
||||
do
|
||||
{
|
||||
bool r;
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
e.match(text.begin(), text.end(), what);
|
||||
while(what.backref(0).matched)
|
||||
{
|
||||
e.match(what.backref(0).end(), text.end(), what);
|
||||
}
|
||||
}
|
||||
result = tim.elapsed();
|
||||
iter *= 2;
|
||||
}while(result < 0.5);
|
||||
iter /= 2;
|
||||
|
||||
if(result > 10)
|
||||
return result / iter;
|
||||
|
||||
// repeat test and report least value for consistency:
|
||||
for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
|
||||
{
|
||||
tim.restart();
|
||||
for(counter = 0; counter < iter; ++counter)
|
||||
{
|
||||
e.match(text.begin(), text.end(), what);
|
||||
while(what.backref(0).matched)
|
||||
{
|
||||
e.match(what.backref(0).end(), text.end(), what);
|
||||
}
|
||||
}
|
||||
run = tim.elapsed();
|
||||
result = std::min(run, result);
|
||||
}
|
||||
return result / iter;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
namespace gs{
|
||||
|
||||
double time_match(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
double time_find_all(const std::string& re, const std::string& text, bool icase)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
671
performance/vc71-results.htm
Normal file
671
performance/vc71-results.htm
Normal file
@ -0,0 +1,671 @@
|
||||
<html>
|
||||
<head>
|
||||
<title>Regular Expression Performance Comparison</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
<meta name="vs_targetSchema" content="http://schemas.microsoft.com/intellisense/ie5">
|
||||
<meta name="Template" content="C:\PROGRAM FILES\MICROSOFT OFFICE\OFFICE\html.dot">
|
||||
<meta name="GENERATOR" content="Microsoft FrontPage Express 2.0">
|
||||
</head>
|
||||
<body bgcolor="#ffffff" link="#0000ff" vlink="#800080">
|
||||
<h2>Regular Expression Performance Comparison</h2>
|
||||
<p>The Boost and GRETA regular expression libraries have slightly different
|
||||
interfaces, and it has been suggested that GRETA's interface allows for a more
|
||||
efficient implementation. The following tables provide comparisons between:</p>
|
||||
<p><a href="http://research.microsoft.com/projects/greta">GRETA</a>.</p>
|
||||
<p><a href="http://www.boost.org/">The Boost regex library</a>.</p>
|
||||
<p><a href="http://arglist.com/regex/">Henry Spencer's regular expression library</a>
|
||||
- this is provided for comparison as a typical non-backtracking implementation.</p>
|
||||
<p>
|
||||
Times were obtained on a 2.8GHz Pentium 4 PC running Windows XP, and the
|
||||
code was compiled with Visual C++ 7.1 with all optimisations turned on. As ever
|
||||
care should be taken in interpreting the results, only sensible regular
|
||||
expressions (rather than pathological cases) are given, most are taken from the
|
||||
Boost regex examples, or from the <a href="http://www.regxlib.com/">Library of
|
||||
Regular Expressions</a>. In addition, some variation in the relative
|
||||
performance of these libraries can be expected on other machines - as memory
|
||||
access and processor caching effects can be quite large for most finite state
|
||||
machine algorithms.</p>
|
||||
<h3>Comparison 1: Long Search</h3>
|
||||
<p>For each of the following regular expressions the time taken to find all
|
||||
occurrences of the expression within a long English language text was measured
|
||||
(<a href="ftp://ibiblio.org/pub/docs/books/gutenberg/etext02/mtent12.zip">mtent12.txt</a>
|
||||
from <a href="http://promo.net/pg/">Project Gutenberg</a>, 19Mb). </p>
|
||||
<P><table border="1" cellspacing="1">
|
||||
<tr>
|
||||
<td><strong>Expression</strong></td>
|
||||
<td><strong>GRETA</strong></td>
|
||||
<td><strong>GRETA<BR>
|
||||
(non-recursive mode)</strong></td>
|
||||
<td><strong>Boost</strong></td>
|
||||
<td><strong>Boost + C++ locale</strong></td>
|
||||
<td><strong>POSIX</strong></td>
|
||||
<td><strong>PCRE</strong></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>Twain</code></td>
|
||||
<td>9.29<BR>
|
||||
(0.00309s)</td>
|
||||
<td>32.9<BR>
|
||||
(0.011s)</td>
|
||||
<td>1.34<BR>
|
||||
(0.000445s)</td>
|
||||
<td>1.37<BR>
|
||||
(0.000455s)</td>
|
||||
<td>6.23<BR>
|
||||
(0.00207s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(0.000333s)</font></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>Huck[[:alpha:]]+</code></td>
|
||||
<td>12.9<BR>
|
||||
(0.00309s)</td>
|
||||
<td>44.4<BR>
|
||||
(0.0106s)</td>
|
||||
<td>1.79<BR>
|
||||
(0.00043s)</td>
|
||||
<td>1.82<BR>
|
||||
(0.000436s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(0.00024s)</font></td>
|
||||
<td><font color="#008000">1.06<BR>
|
||||
(0.000254s)</font></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>[[:alpha:]]+ing</code></td>
|
||||
<td>7.6<BR>
|
||||
(0.0178s)</td>
|
||||
<td>15.2<BR>
|
||||
(0.0357s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(0.00235s)</font></td>
|
||||
<td><font color="#008000">0.867<BR>
|
||||
(0.00204s)</font></td>
|
||||
<td>4.26<BR>
|
||||
(0.01s)</td>
|
||||
<td>6<BR>
|
||||
(0.0141s)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>^[^ ]*?Twain</code></td>
|
||||
<td>5.92<BR>
|
||||
(0.00626s)</td>
|
||||
<td>16.3<BR>
|
||||
(0.0172s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(0.00106s)</font></td>
|
||||
<td><font color="#008000">0.666<BR>
|
||||
(0.000704s)</font></td>
|
||||
<td>NA</td>
|
||||
<td>2.04<BR>
|
||||
(0.00215s)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>Tom|Sawyer|Huckleberry|Finn</code></td>
|
||||
<td>8<BR>
|
||||
(0.00828s)</td>
|
||||
<td>20<BR>
|
||||
(0.0207s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(0.00104s)</font></td>
|
||||
<td><font color="#008000">0.585<BR>
|
||||
(0.000605s)</font></td>
|
||||
<td>42.3<BR>
|
||||
(0.0438s)</td>
|
||||
<td>1.12<BR>
|
||||
(0.00115s)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)</code></td>
|
||||
<td>6.42<BR>
|
||||
(0.012s)</td>
|
||||
<td>16.3<BR>
|
||||
(0.0307s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(0.00188s)</font></td>
|
||||
<td><font color="#008000">0.719<BR>
|
||||
(0.00135s)</font></td>
|
||||
<td>NA</td>
|
||||
<td>1.21<BR>
|
||||
(0.00227s)</td>
|
||||
</tr>
|
||||
</table>
|
||||
</P>
|
||||
<h3>Comparison 2: Medium Sized Search</h3>
|
||||
<p>For each of the following regular expressions the time taken to find all
|
||||
occurrences of the expression within a medium sized English language text was
|
||||
measured (the first 50K from mtent12.txt). </p>
|
||||
<P><table border="1" cellspacing="1">
|
||||
<tr>
|
||||
<td><strong>Expression</strong></td>
|
||||
<td><strong>GRETA</strong></td>
|
||||
<td><strong>GRETA<BR>
|
||||
(non-recursive mode)</strong></td>
|
||||
<td><strong>Boost</strong></td>
|
||||
<td><strong>Boost + C++ locale</strong></td>
|
||||
<td><strong>POSIX</strong></td>
|
||||
<td><strong>PCRE</strong></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>Twain</code></td>
|
||||
<td>9.29<BR>
|
||||
(0.00309s)</td>
|
||||
<td>32.5<BR>
|
||||
(0.0108s)</td>
|
||||
<td>1.34<BR>
|
||||
(0.000445s)</td>
|
||||
<td>1.37<BR>
|
||||
(0.000455s)</td>
|
||||
<td>6.24<BR>
|
||||
(0.00207s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(0.000333s)</font></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>Huck[[:alpha:]]+</code></td>
|
||||
<td>12.9<BR>
|
||||
(0.00309s)</td>
|
||||
<td>47<BR>
|
||||
(0.0113s)</td>
|
||||
<td>1.77<BR>
|
||||
(0.000425s)</td>
|
||||
<td>1.84<BR>
|
||||
(0.00044s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(0.00024s)</font></td>
|
||||
<td><font color="#008000">1.04<BR>
|
||||
(0.00025s)</font></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>[[:alpha:]]+ing</code></td>
|
||||
<td>7.61<BR>
|
||||
(0.0178s)</td>
|
||||
<td>15.2<BR>
|
||||
(0.0356s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(0.00234s)</font></td>
|
||||
<td><font color="#008000">0.867<BR>
|
||||
(0.00203s)</font></td>
|
||||
<td>4.27<BR>
|
||||
(0.01s)</td>
|
||||
<td>5.94<BR>
|
||||
(0.0139s)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>^[^ ]*?Twain</code></td>
|
||||
<td>5.72<BR>
|
||||
(0.00626s)</td>
|
||||
<td>15.5<BR>
|
||||
(0.0169s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(0.00109s)</font></td>
|
||||
<td><font color="#008000">0.644<BR>
|
||||
(0.000704s)</font></td>
|
||||
<td>NA</td>
|
||||
<td>1.93<BR>
|
||||
(0.00211s)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>Tom|Sawyer|Huckleberry|Finn</code></td>
|
||||
<td>7.85<BR>
|
||||
(0.00828s)</td>
|
||||
<td>19.9<BR>
|
||||
(0.021s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(0.00105s)</font></td>
|
||||
<td><font color="#008000">0.575<BR>
|
||||
(0.000606s)</font></td>
|
||||
<td>41.5<BR>
|
||||
(0.0438s)</td>
|
||||
<td><font color="#008000">1.09<BR>
|
||||
(0.00115s)</font></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)</code></td>
|
||||
<td>6.49<BR>
|
||||
(0.012s)</td>
|
||||
<td>16.5<BR>
|
||||
(0.0307s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(0.00186s)</font></td>
|
||||
<td><font color="#008000">0.737<BR>
|
||||
(0.00137s)</font></td>
|
||||
<td>216<BR>
|
||||
(0.401s)</td>
|
||||
<td>1.24<BR>
|
||||
(0.00231s)</td>
|
||||
</tr>
|
||||
</table>
|
||||
</P>
|
||||
<H3>Comparison 3: C++ Code Search</H3>
|
||||
<P>For each of the following regular expressions the time taken to find all
|
||||
occurrences of the expression within the C++ source file <A href="../../../boost/crc.hpp">
|
||||
boost/crc.hpp</A> was measured. </P>
|
||||
<P><table border="1" cellspacing="1">
|
||||
<tr>
|
||||
<td><strong>Expression</strong></td>
|
||||
<td><strong>GRETA</strong></td>
|
||||
<td><strong>GRETA<BR>
|
||||
(non-recursive mode)</strong></td>
|
||||
<td><strong>Boost</strong></td>
|
||||
<td><strong>Boost + C++ locale</strong></td>
|
||||
<td><strong>POSIX</strong></td>
|
||||
<td><strong>PCRE</strong></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?(class|struct)[[:space:]]*(\<\w+\>([
|
||||
]*\([^)]*\))?[[:space:]]*)*(\<\w*\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\{|:[^;\{()]*\{)</code></td>
|
||||
<td>9.58<BR>
|
||||
(0.0019s)</td>
|
||||
<td>40.3<BR>
|
||||
(0.00798s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(0.000198s)</font></td>
|
||||
<td><font color="#008000">0.901<BR>
|
||||
(0.000178s)</font></td>
|
||||
<td>607<BR>
|
||||
(0.12s)</td>
|
||||
<td>3.16<BR>
|
||||
(0.000626s)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>(^[
|
||||
]*#(?:[^\\\n]|\\[^\n_[:punct:][:alnum:]]*[\n[:punct:][:word:]])*)|(//[^\n]*|/\*.*?\*/)|\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\>|('(?:[^\\']|\\.)*'|"(?:[^\\"]|\\.)*")|\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned|using|virtual|void|volatile|wchar_t|while)\></code></td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(0.0116s)</font></td>
|
||||
<td>2.7<BR>
|
||||
(0.0313s)</td>
|
||||
<td>1.22<BR>
|
||||
(0.0141s)</td>
|
||||
<td><font color="#008000">0.946<BR>
|
||||
(0.011s)</font></td>
|
||||
<td>NA</td>
|
||||
<td>1.41<BR>
|
||||
(0.0163s)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>^[ ]*#[ ]*include[ ]+("[^"]+"|<[^>]+>)</code></td>
|
||||
<td>6.05<BR>
|
||||
(0.00195s)</td>
|
||||
<td>25.7<BR>
|
||||
(0.0083s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(0.000323s)</font></td>
|
||||
<td><font color="#008000">0.939<BR>
|
||||
(0.000303s)</font></td>
|
||||
<td>107<BR>
|
||||
(0.0344s)</td>
|
||||
<td>1.69<BR>
|
||||
(0.000547s)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>^[ ]*#[ ]*include[ ]+("boost/[^"]+"|<boost^[>]+>)</code></td>
|
||||
<td>5.8<BR>
|
||||
(0.00196s)</td>
|
||||
<td>24.6<BR>
|
||||
(0.0083s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(0.000337s)</font></td>
|
||||
<td><font color="#008000">1.07<BR>
|
||||
(0.000362s)</font></td>
|
||||
<td>122<BR>
|
||||
(0.0413s)</td>
|
||||
<td>1.59<BR>
|
||||
(0.000538s)</td>
|
||||
</tr>
|
||||
</table>
|
||||
</P>
|
||||
<H3>
|
||||
<H3>Comparison 4: HTML Document Search</H3>
|
||||
</H3>
|
||||
<P>For each of the following regular expressions the time taken to find all
|
||||
occurrences of the expression within the html file <A href="../../libraries.htm">libs/libraries.htm</A>
|
||||
was measured. </P>
|
||||
<P><table border="1" cellspacing="1">
|
||||
<tr>
|
||||
<td><strong>Expression</strong></td>
|
||||
<td><strong>GRETA</strong></td>
|
||||
<td><strong>GRETA<BR>
|
||||
(non-recursive mode)</strong></td>
|
||||
<td><strong>Boost</strong></td>
|
||||
<td><strong>Boost + C++ locale</strong></td>
|
||||
<td><strong>POSIX</strong></td>
|
||||
<td><strong>PCRE</strong></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>beman|john|dave</code></td>
|
||||
<td>6.69<BR>
|
||||
(0.00321s)</td>
|
||||
<td>18.9<BR>
|
||||
(0.00908s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(0.000479s)</font></td>
|
||||
<td><font color="#008000">0.561<BR>
|
||||
(0.000269s)</font></td>
|
||||
<td>23.8<BR>
|
||||
(0.0114s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(0.000479s)</font></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code><p>.*?</p>
|
||||
</code>
|
||||
</td>
|
||||
<td>5.89<BR>
|
||||
(0.00164s)</td>
|
||||
<td>19.6<BR>
|
||||
(0.00548s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(0.000279s)</font></td>
|
||||
<td><font color="#008000">1.05<BR>
|
||||
(0.000293s)</font></td>
|
||||
<td>NA</td>
|
||||
<td>1.11<BR>
|
||||
(0.000308s)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code><a^[>]+href=("[^"]*"|[^[:space:]]+)[^>]*></code></td>
|
||||
<td>3.94<BR>
|
||||
(0.00219s)</td>
|
||||
<td>10.4<BR>
|
||||
(0.00579s)</td>
|
||||
<td><font color="#008000">1.09<BR>
|
||||
(0.000606s)</font></td>
|
||||
<td><font color="#008000">0.825<BR>
|
||||
(0.000459s)</font></td>
|
||||
<td>221<BR>
|
||||
(0.123s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(0.000557s)</font></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code><h12345678][^[>]*>.*?</h[12345678]></code></td>
|
||||
<td>6.07<BR>
|
||||
(0.0016s)</td>
|
||||
<td>19.8<BR>
|
||||
(0.00524s)</td>
|
||||
<td>1.37<BR>
|
||||
(0.000362s)</td>
|
||||
<td><font color="#008000">0.722<BR>
|
||||
(0.000191s)</font></td>
|
||||
<td>NA</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(0.000264s)</font></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code><img^[>]+src=("[^"]*"|[^[:space:]]+)[^>]*></code></td>
|
||||
<td>6.77<BR>
|
||||
(0.00162s)</td>
|
||||
<td>22.5<BR>
|
||||
(0.0054s)</td>
|
||||
<td>1.1<BR>
|
||||
(0.000264s)</td>
|
||||
<td>1.2<BR>
|
||||
(0.000289s)</td>
|
||||
<td>120<BR>
|
||||
(0.0288s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(0.00024s)</font></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code><font^[>]+face=("[^"]*"|[^[:space:]]+)[^>]*>.*?</font></code></td>
|
||||
<td>6.77<BR>
|
||||
(0.00162s)</td>
|
||||
<td>22.5<BR>
|
||||
(0.0054s)</td>
|
||||
<td>1.12<BR>
|
||||
(0.000269s)</td>
|
||||
<td>1.2<BR>
|
||||
(0.000289s)</td>
|
||||
<td>NA</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(0.00024s)</font></td>
|
||||
</tr>
|
||||
</table>
|
||||
</P>
|
||||
<H3>Comparison 3: Simple Matches</H3>
|
||||
<p>
|
||||
For each of the following regular expressions the time taken to match against
|
||||
the text indicated was measured. </p>
|
||||
<P><table border="1" cellspacing="1">
|
||||
<tr>
|
||||
<td><strong>Expression</strong></td>
|
||||
<td><strong>Text</strong></td>
|
||||
<td><strong>GRETA</strong></td>
|
||||
<td><strong>GRETA<BR>
|
||||
(non-recursive mode)</strong></td>
|
||||
<td><strong>Boost</strong></td>
|
||||
<td><strong>Boost + C++ locale</strong></td>
|
||||
<td><strong>POSIX</strong></td>
|
||||
<td><strong>PCRE</strong></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>abc</code></td>
|
||||
<td>abc</td>
|
||||
<td>1.43<BR>
|
||||
(2.25e-007s)</td>
|
||||
<td>1.85<BR>
|
||||
(2.91e-007s)</td>
|
||||
<td>1.27<BR>
|
||||
(2.01e-007s)</td>
|
||||
<td>1.29<BR>
|
||||
(2.03e-007s)</td>
|
||||
<td>1.94<BR>
|
||||
(3.06e-007s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(1.58e-007s)</font></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>^([0-9]+)(\-| |$)(.*)$</code></td>
|
||||
<td>100- this is a line of ftp response which contains a message string</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(6.97e-007s)</font></td>
|
||||
<td>2.69<BR>
|
||||
(1.87e-006s)</td>
|
||||
<td>1.89<BR>
|
||||
(1.32e-006s)</td>
|
||||
<td>1.86<BR>
|
||||
(1.3e-006s)</td>
|
||||
<td>298<BR>
|
||||
(0.000208s)</td>
|
||||
<td>1.29<BR>
|
||||
(8.98e-007s)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4}</code></td>
|
||||
<td>1234-5678-1234-456</td>
|
||||
<td>2.41<BR>
|
||||
(2.14e-006s)</td>
|
||||
<td>2.97<BR>
|
||||
(2.64e-006s)</td>
|
||||
<td>2.37<BR>
|
||||
(2.1e-006s)</td>
|
||||
<td>2.24<BR>
|
||||
(1.99e-006s)</td>
|
||||
<td>29.6<BR>
|
||||
(2.63e-005s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(8.88e-007s)</font></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$</code></td>
|
||||
<td>john_maddock@compuserve.com</td>
|
||||
<td>1.83<BR>
|
||||
(3.7e-006s)</td>
|
||||
<td>2.34<BR>
|
||||
(4.74e-006s)</td>
|
||||
<td>1.59<BR>
|
||||
(3.21e-006s)</td>
|
||||
<td>1.55<BR>
|
||||
(3.13e-006s)</td>
|
||||
<td>172<BR>
|
||||
(0.000347s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(2.02e-006s)</font></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$</code></td>
|
||||
<td>foo12@foo.edu</td>
|
||||
<td>1.71<BR>
|
||||
(3.09e-006s)</td>
|
||||
<td>2.19<BR>
|
||||
(3.97e-006s)</td>
|
||||
<td>1.71<BR>
|
||||
(3.09e-006s)</td>
|
||||
<td>1.64<BR>
|
||||
(2.98e-006s)</td>
|
||||
<td>123<BR>
|
||||
(0.000222s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(1.81e-006s)</font></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$</code></td>
|
||||
<td>bob.smith@foo.tv</td>
|
||||
<td>1.72<BR>
|
||||
(3.09e-006s)</td>
|
||||
<td>2.21<BR>
|
||||
(3.97e-006s)</td>
|
||||
<td>1.72<BR>
|
||||
(3.09e-006s)</td>
|
||||
<td>1.7<BR>
|
||||
(3.06e-006s)</td>
|
||||
<td>133<BR>
|
||||
(0.00024s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(1.79e-006s)</font></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$</code></td>
|
||||
<td>EH10 2QQ</td>
|
||||
<td>1.29<BR>
|
||||
(9.37e-007s)</td>
|
||||
<td>1.71<BR>
|
||||
(1.24e-006s)</td>
|
||||
<td>1.29<BR>
|
||||
(9.35e-007s)</td>
|
||||
<td>1.18<BR>
|
||||
(8.59e-007s)</td>
|
||||
<td>7.79<BR>
|
||||
(5.65e-006s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(7.26e-007s)</font></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$</code></td>
|
||||
<td>G1 1AA</td>
|
||||
<td>1.41<BR>
|
||||
(9.26e-007s)</td>
|
||||
<td>2<BR>
|
||||
(1.32e-006s)</td>
|
||||
<td>1.38<BR>
|
||||
(9.07e-007s)</td>
|
||||
<td>1.31<BR>
|
||||
(8.6e-007s)</td>
|
||||
<td>7.41<BR>
|
||||
(4.88e-006s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(6.59e-007s)</font></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$</code></td>
|
||||
<td>SW1 1ZZ</td>
|
||||
<td>1.45<BR>
|
||||
(9.54e-007s)</td>
|
||||
<td>1.88<BR>
|
||||
(1.24e-006s)</td>
|
||||
<td>1.42<BR>
|
||||
(9.36e-007s)</td>
|
||||
<td>1.32<BR>
|
||||
(8.69e-007s)</td>
|
||||
<td>7.77<BR>
|
||||
(5.12e-006s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(6.59e-007s)</font></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$</code></td>
|
||||
<td>4/1/2001</td>
|
||||
<td>1.74<BR>
|
||||
(1.01e-006s)</td>
|
||||
<td>2.3<BR>
|
||||
(1.34e-006s)</td>
|
||||
<td>1.33<BR>
|
||||
(7.73e-007s)</td>
|
||||
<td>1.3<BR>
|
||||
(7.54e-007s)</td>
|
||||
<td>9.85<BR>
|
||||
(5.73e-006s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(5.82e-007s)</font></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$</code></td>
|
||||
<td>12/12/2001</td>
|
||||
<td>1.77<BR>
|
||||
(1.01e-006s)</td>
|
||||
<td>2.23<BR>
|
||||
(1.28e-006s)</td>
|
||||
<td>1.45<BR>
|
||||
(8.31e-007s)</td>
|
||||
<td>1.38<BR>
|
||||
(7.93e-007s)</td>
|
||||
<td>11.1<BR>
|
||||
(6.34e-006s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(5.73e-007s)</font></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>^[-+]?[[:digit:]]*\.?[[:digit:]]*$</code></td>
|
||||
<td>123</td>
|
||||
<td>1.23<BR>
|
||||
(7.65e-007s)</td>
|
||||
<td>1.66<BR>
|
||||
(1.03e-006s)</td>
|
||||
<td>1.4<BR>
|
||||
(8.69e-007s)</td>
|
||||
<td>1.31<BR>
|
||||
(8.12e-007s)</td>
|
||||
<td>4.86<BR>
|
||||
(3.02e-006s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(6.21e-007s)</font></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>^[-+]?[[:digit:]]*\.?[[:digit:]]*$</code></td>
|
||||
<td>+3.14159</td>
|
||||
<td>1.59<BR>
|
||||
(1.05e-006s)</td>
|
||||
<td>1.97<BR>
|
||||
(1.3e-006s)</td>
|
||||
<td>1.45<BR>
|
||||
(9.54e-007s)</td>
|
||||
<td>1.32<BR>
|
||||
(8.69e-007s)</td>
|
||||
<td>9.51<BR>
|
||||
(6.26e-006s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(6.59e-007s)</font></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>^[-+]?[[:digit:]]*\.?[[:digit:]]*$</code></td>
|
||||
<td>-3.14159</td>
|
||||
<td>1.64<BR>
|
||||
(1.07e-006s)</td>
|
||||
<td>2<BR>
|
||||
(1.3e-006s)</td>
|
||||
<td>1.44<BR>
|
||||
(9.35e-007s)</td>
|
||||
<td>1.35<BR>
|
||||
(8.78e-007s)</td>
|
||||
<td>9.53<BR>
|
||||
(6.19e-006s)</td>
|
||||
<td><font color="#008000">1<BR>
|
||||
(6.49e-007s)</font></td>
|
||||
</tr>
|
||||
</table>
|
||||
</P>
|
||||
<hr>
|
||||
<p>Copyright John Maddock September 2002, all rights reserved.</p>
|
||||
</body>
|
||||
</html>
|
Reference in New Issue
Block a user