diff --git a/performance/command_line.cpp b/performance/command_line.cpp new file mode 100644 index 00000000..6500a895 --- /dev/null +++ b/performance/command_line.cpp @@ -0,0 +1,285 @@ + +#include +#include +#include +#include +#include +#include +#include +#include "regex_comparison.hpp" + +// +// globals: +// +bool time_boost = false; +bool time_localised_boost = false; +bool time_greta = false; +bool time_safe_greta = false; +bool time_posix = false; +bool time_pcre = false; + +bool test_matches = false; +bool test_code = false; +bool test_html = false; +bool test_short_twain = false; +bool test_long_twain = false; + + +std::string html_template_file; +std::string html_out_file; +std::string html_contents; +std::list result_list; + +int handle_argument(const std::string& what) +{ + if(what == "-b") + time_boost = true; + else if(what == "-bl") + time_localised_boost = true; +#ifdef BOOST_HAS_GRETA + else if(what == "-g") + time_greta = true; + else if(what == "-gs") + time_safe_greta = true; +#endif +#ifdef BOOST_HAS_POSIX + else if(what == "-posix") + time_posix = true; +#endif +#ifdef BOOST_HAS_PCRE + else if(what == "-pcre") + time_pcre = true; +#endif + else if(what == "-all") + { + time_boost = true; + time_localised_boost = true; +#ifdef BOOST_HAS_GRETA + time_greta = true; + time_safe_greta = true; +#endif +#ifdef BOOST_HAS_POSIX + time_posix = true; +#endif +#ifdef BOOST_HAS_PCRE + time_pcre = true; +#endif + } + else if(what == "-test-matches") + test_matches = true; + else if(what == "-test-code") + test_code = true; + else if(what == "-test-html") + test_html = true; + else if(what == "-test-short-twain") + test_short_twain = true; + else if(what == "-test-long-twain") + test_long_twain = true; + else if(what == "-test-all") + { + test_matches = true; + test_code = true; + test_html = true; + test_short_twain = true; + test_long_twain = true; + } + else if((what == "-h") || (what == "--help")) + return show_usage(); + else if((what[0] == '-') || (what[0] == '/')) + { + std::cerr << "Unknown argument: \"" << what << "\"" << std::endl; + return 1; + } + else if(html_template_file.size() == 0) + { + html_template_file = what; + load_file(html_contents, what.c_str()); + } + else if(html_out_file.size() == 0) + html_out_file = what; + else + { + std::cerr << "Unexpected argument: \"" << what << "\"" << std::endl; + return 1; + } + return 0; +} + +int show_usage() +{ + std::cout << + "Usage\n" + "regex_comparison [-h] [library options] [test options] [html_template html_output_file]\n" + " -h Show help\n\n" + " library options:\n" + " -b Apply tests to boost library\n" + " -bl Apply tests to boost library with C++ locale\n" +#ifdef BOOST_HAS_GRETA + " -g Apply tests to GRETA library\n" + " -gs Apply tests to GRETA library (in non-recursive mode)\n" +#endif +#ifdef BOOST_HAS_POSIX + " -posix Apply tests to POSIX library\n" +#endif +#ifdef BOOST_HAS_PCRE + " -pcre Apply tests to PCRE library\n" +#endif + " -all Apply tests to all libraries\n\n" + " test options:\n" + " -test-matches Test short matches\n" + " -test-code Test c++ code examples\n" + " -test-html Test c++ code examples\n" + " -test-short-twain Test short searches\n" + " -test-long-twain Test long searches\n" + " -test-all Test everthing\n"; + return 1; +} + +void load_file(std::string& text, const char* file) +{ + std::deque temp_copy; + std::ifstream is(file); + if(!is.good()) + { + std::string msg("Unable to open file: \""); + msg.append(file); + msg.append("\""); + throw std::runtime_error(msg); + } + is.seekg(0, std::ios_base::end); + std::istream::pos_type pos = is.tellg(); + is.seekg(0, std::ios_base::beg); + text.erase(); + text.reserve(pos); + std::istreambuf_iterator it(is); + std::copy(it, std::istreambuf_iterator(), std::back_inserter(text)); +} + +void print_result(std::ostream& os, double time, double best) +{ + static const char* suffixes[] = {"s", "ms", "us", "ns", "ps", }; + + if(time < 0) + { + os << "NA"; + return; + } + double rel = time / best; + bool highlight = ((rel > 0) && (rel < 1.1)); + unsigned suffix = 0; + while(time < 0) + { + time *= 1000; + ++suffix; + } + os << ""; + if(highlight) + os << ""; + if(rel <= 1000) + os << std::setprecision(3) << rel; + else + os << (int)rel; + os << "
("; + if(time <= 1000) + os << std::setprecision(3) << time; + else + os << (int)time; + os << suffixes[suffix] << ")"; + if(highlight) + os << "
"; + os << ""; +} + +void output_html_results(bool show_description, const std::string& tagname) +{ + std::stringstream os; + if(result_list.size()) + { + // + // start by outputting the table header: + // + os << "\n"; + os << ""; + if(show_description) + os << ""; +#if defined(BOOST_HAS_GRETA) + if(time_greta == true) + os << ""; + if(time_safe_greta == true) + os << ""; +#endif + if(time_boost == true) + os << ""; + if(time_localised_boost == true) + os << ""; +#if defined(BOOST_HAS_POSIX) + if(time_posix == true) + os << ""; +#endif +#ifdef BOOST_HAS_PCRE + if(time_pcre == true) + os << ""; +#endif + os << "\n"; + + // + // Now enumerate through all the test results: + // + std::list::const_iterator first, last; + first = result_list.begin(); + last = result_list.end(); + while(first != last) + { + os << ""; + if(show_description) + os << ""; +#if defined(BOOST_HAS_GRETA) + if(time_greta == true) + print_result(os, first->greta_time, first->factor); + if(time_safe_greta == true) + print_result(os, first->safe_greta_time, first->factor); +#endif +#if defined(BOOST_HAS_POSIX) + if(time_boost == true) + print_result(os, first->boost_time, first->factor); + if(time_localised_boost == true) + print_result(os, first->localised_boost_time, first->factor); +#endif + if(time_posix == true) + print_result(os, first->posix_time, first->factor); +#if defined(BOOST_HAS_PCRE) + if(time_pcre == true) + print_result(os, first->pcre_time, first->factor); +#endif + os << "\n"; + ++first; + } + os << "
ExpressionTextGRETAGRETA
(non-recursive mode)
BoostBoost + C++ localePOSIXPCRE
" << first->expression << "" << first->description << "
\n"; + result_list.clear(); + } + else + { + os << "

Results not available...

\n"; + } + + std::string result = os.str(); + + unsigned int pos = html_contents.find(tagname); + if(pos != std::string::npos) + { + html_contents.replace(pos, tagname.size(), result); + } +} + +void output_final_html() +{ + if(html_out_file.size()) + { + std::ofstream os(html_out_file.c_str()); + os << html_contents; + } + else + { + std::cout << html_contents; + } +} \ No newline at end of file diff --git a/performance/input.html b/performance/input.html new file mode 100644 index 00000000..b218c211 --- /dev/null +++ b/performance/input.html @@ -0,0 +1,59 @@ + + + Regular Expression Performance Comparison + + + + + + +

Regular Expression Performance Comparison

+

The Boost and GRETA regular expression libraries have slightly different + interfaces, and it has been suggested that GRETA's interface allows for a more + efficient implementation. The following tables provide comparisons between:

+

GRETA.

+

The Boost regex library.

+

Henry Spencer's regular expression library + - this is provided for comparison as a typical non-backtracking implementation.

+

+ Times were obtained on a 2.8GHz Pentium 4 PC running Windows XP, and the + code was compiled with Visual C++ 7.1 with all optimisations turned on. As ever + care should be taken in interpreting the results, only sensible regular + expressions (rather than pathological cases) are given, most are taken from the + Boost regex examples, or from the Library of + Regular Expressions. In addition, some variation in the relative + performance of these libraries can be expected on other machines - as memory + access and processor caching effects can be quite large for most finite state + machine algorithms.

+

Comparison 1: Long Search

+

For each of the following regular expressions the time taken to find all + occurrences of the expression within a long English language text was measured + (mtent12.txt + from Project Gutenberg, 19Mb). 

+

%long_twain_search%

+

Comparison 2: Medium Sized Search

+

For each of the following regular expressions the time taken to find all + occurrences of the expression within a medium sized English language text was + measured (the first 50K from mtent12.txt). 

+

%short_twain_search%

+

Comparison 3: C++ Code Search

+

For each of the following regular expressions the time taken to find all + occurrences of the expression within the C++ source file + boost/crc.hpp was measured. 

+

%code_search%

+

+

Comparison 4: HTML Document Search

+ +

For each of the following regular expressions the time taken to find all + occurrences of the expression within the html file libs/libraries.htm + was measured. 

+

%html_search%

+

Comparison 3: Simple Matches

+

+ For each of the following regular expressions the time taken to match against + the text indicated was measured. 

+

%short_matches%

+
+

Copyright John Maddock April 2003, all rights reserved.

+ + diff --git a/performance/main.cpp b/performance/main.cpp new file mode 100644 index 00000000..0b6db1b7 --- /dev/null +++ b/performance/main.cpp @@ -0,0 +1,251 @@ +/* + * + * Copyright (c) 2002 + * Dr John Maddock + * + * Permission to use, copy, modify, distribute and sell this software + * and its documentation for any purpose is hereby granted without fee, + * provided that the above copyright notice appear in all copies and + * that both that copyright notice and this permission notice appear + * in supporting documentation. Dr John Maddock makes no representations + * about the suitability of this software for any purpose. + * It is provided "as is" without express or implied warranty. + * + */ + +#include +#include +#include +#include +#include +#include "regex_comparison.hpp" + + +void test_match(const std::string& re, const std::string& text, const std::string& description, bool icase) +{ + double time; + results r(re, description); + + std::cout << "Testing: \"" << re << "\" against \"" << description << "\"" << std::endl; + +#ifdef BOOST_HAS_GRETA + if(time_greta == true) + { + time = g::time_match(re, text, icase); + r.greta_time = time; + std::cout << "\tGRETA regex: " << time << "s\n"; + } + if(time_safe_greta == true) + { + time = gs::time_match(re, text, icase); + r.safe_greta_time = time; + std::cout << "\tSafe GRETA regex: " << time << "s\n"; + } +#endif + if(time_boost == true) + { + time = b::time_match(re, text, icase); + r.boost_time = time; + std::cout << "\tBoost regex: " << time << "s\n"; + } + if(time_localised_boost == true) + { + time = bl::time_match(re, text, icase); + r.localised_boost_time = time; + std::cout << "\tBoost regex (C++ locale): " << time << "s\n"; + } +#ifdef BOOST_HAS_POSIX + if(time_posix == true) + { + time = posix::time_match(re, text, icase); + r.posix_time = time; + std::cout << "\tPOSIX regex: " << time << "s\n"; + } +#endif +#ifdef BOOST_HAS_PCRE + if(time_pcre == true) + { + time = pcr::time_match(re, text, icase); + r.pcre_time = time; + std::cout << "\tPCRE regex: " << time << "s\n"; + } +#endif + r.finalise(); + result_list.push_back(r); +} + +void test_find_all(const std::string& re, const std::string& text, const std::string& description, bool icase) +{ + std::cout << "Testing: " << re << std::endl; + + double time; + results r(re, description); + +#ifdef BOOST_HAS_GRETA + if(time_greta == true) + { + time = g::time_find_all(re, text, icase); + r.greta_time = time; + std::cout << "\tGRETA regex: " << time << "s\n"; + } + if(time_safe_greta == true) + { + time = gs::time_find_all(re, text, icase); + r.safe_greta_time = time; + std::cout << "\tSafe GRETA regex: " << time << "s\n"; + } +#endif + if(time_boost == true) + { + time = b::time_find_all(re, text, icase); + r.boost_time = time; + std::cout << "\tBoost regex: " << time << "s\n"; + } + if(time_localised_boost == true) + { + time = bl::time_find_all(re, text, icase); + r.localised_boost_time = time; + std::cout << "\tBoost regex (C++ locale): " << time << "s\n"; + } +#ifdef BOOST_HAS_POSIX + if(time_posix == true) + { + time = posix::time_find_all(re, text, icase); + r.posix_time = time; + std::cout << "\tPOSIX regex: " << time << "s\n"; + } +#endif +#ifdef BOOST_HAS_PCRE + if(time_pcre == true) + { + time = pcr::time_find_all(re, text, icase); + r.pcre_time = time; + std::cout << "\tPCRE regex: " << time << "s\n"; + } +#endif + r.finalise(); + result_list.push_back(r); +} + +int cpp_main(int argc, char * argv[]) +{ + // start by processing the command line args: + if(argc < 2) + return show_usage(); + int result = 0; + for(int c = 1; c < argc; ++c) + { + result += handle_argument(argv[c]); + } + if(result) + return result; + + if(test_matches) + { + // start with a simple test, this is basically a measure of the minimal overhead + // involved in calling a regex matcher: + test_match("abc", "abc"); + // these are from the regex docs: + test_match("^([0-9]+)(\\-| |$)(.*)$", "100- this is a line of ftp response which contains a message string"); + test_match("([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4}", "1234-5678-1234-456"); + // these are from http://www.regxlib.com/ + test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "john_maddock@compuserve.com"); + test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "foo12@foo.edu"); + test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "bob.smith@foo.tv"); + test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "EH10 2QQ"); + test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "G1 1AA"); + test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "SW1 1ZZ"); + test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "4/1/2001"); + test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "12/12/2001"); + test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "123"); + test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "+3.14159"); + test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "-3.14159"); + } + output_html_results(true, "%short_matches%"); + + std::string file_contents; + + if(test_code) + { + load_file(file_contents, "../../../boost/crc.hpp"); + + const char* highlight_expression = // preprocessor directives: index 1 + "(^[ \t]*#(?:[^\\\\\\n]|\\\\[^\\n_[:punct:][:alnum:]]*[\\n[:punct:][:word:]])*)|" + // comment: index 2 + "(//[^\\n]*|/\\*.*?\\*/)|" + // literals: index 3 + "\\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\\>|" + // string literals: index 4 + "('(?:[^\\\\']|\\\\.)*'|\"(?:[^\\\\\"]|\\\\.)*\")|" + // keywords: index 5 + "\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import" + "|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall" + "|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool" + "|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete" + "|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto" + "|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected" + "|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast" + "|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned" + "|using|virtual|void|volatile|wchar_t|while)\\>" + ; + + const char* class_expression = "^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?" + "(class|struct)[[:space:]]*(\\<\\w+\\>([ \t]*\\([^)]*\\))?" + "[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?" + "(\\{|:[^;\\{()]*\\{)"; + + const char* include_expression = "^[ \t]*#[ \t]*include[ \t]+(\"[^\"]+\"|<[^>]+>)"; + const char* boost_include_expression = "^[ \t]*#[ \t]*include[ \t]+(\"boost/[^\"]+\"|]+>)"; + + + test_find_all(class_expression, file_contents); + test_find_all(highlight_expression, file_contents); + test_find_all(include_expression, file_contents); + test_find_all(boost_include_expression, file_contents); + } + output_html_results(false, "%code_search%"); + + if(test_html) + { + load_file(file_contents, "../../../libs/libraries.htm"); + test_find_all("beman|john|dave", file_contents, true); + test_find_all("

.*?

", file_contents, true); + test_find_all("]+href=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents, true); + test_find_all("]*>.*?", file_contents, true); + test_find_all("]+src=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents, true); + test_find_all("]+face=(\"[^\"]*\"|[^[:space:]]+)[^>]*>.*?", file_contents, true); + } + output_html_results(false, "%html_search%"); + + if(test_short_twain) + { + load_file(file_contents, "short_twain.txt"); + + test_find_all("Twain", file_contents); + test_find_all("Huck[[:alpha:]]+", file_contents); + test_find_all("[[:alpha:]]+ing", file_contents); + test_find_all("^[^\n]*?Twain", file_contents); + test_find_all("Tom|Sawyer|Huckleberry|Finn", file_contents); + test_find_all("(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)", file_contents); + } + output_html_results(false, "%short_twain_search%"); + + if(test_long_twain) + { + load_file(file_contents, "mtent12.txt"); + + test_find_all("Twain", file_contents); + test_find_all("Huck[[:alpha:]]+", file_contents); + test_find_all("[[:alpha:]]+ing", file_contents); + test_find_all("^[^\n]*?Twain", file_contents); + test_find_all("Tom|Sawyer|Huckleberry|Finn", file_contents); + time_posix = false; + test_find_all("(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)", file_contents); + time_posix = true; + } + output_html_results(false, "%long_twain_search%"); + + output_final_html(); + return 0; +} + diff --git a/performance/regex_comparison.hpp b/performance/regex_comparison.hpp new file mode 100644 index 00000000..9e7902cb --- /dev/null +++ b/performance/regex_comparison.hpp @@ -0,0 +1,135 @@ +/* + * + * Copyright (c) 2002 + * Dr John Maddock + * + * All rights reserved. + * May not be transfered or disclosed to a third party without + * prior consent of the author. + * + */ + + +#ifndef REGEX_COMPARISON_HPP +#define REGEX_COMPARISON_HPP + +#include +#include +#include + +// +// globals: +// +extern bool time_boost; +extern bool time_localised_boost; +extern bool time_greta; +extern bool time_safe_greta; +extern bool time_posix; +extern bool time_pcre; + +extern bool test_matches; +extern bool test_short_twain; +extern bool test_long_twain; +extern bool test_code; +extern bool test_html; + +extern std::string html_template_file; +extern std::string html_out_file; +extern std::string html_contents; + + +int handle_argument(const std::string& what); +int show_usage(); +void load_file(std::string& text, const char* file); +void output_html_results(bool show_description, const std::string& tagname); +void output_final_html(); + + +struct results +{ + double boost_time; + double localised_boost_time; + double greta_time; + double safe_greta_time; + double posix_time; + double pcre_time; + double factor; + std::string expression; + std::string description; + results(const std::string& ex, const std::string& desc) + : boost_time(-1), + greta_time(-1), + safe_greta_time(-1), + posix_time(-1), + pcre_time(-1), + factor(std::numeric_limits::max()), + expression(ex), + description(desc) + {} + void finalise() + { + if((boost_time >= 0) && (boost_time < factor)) + factor = boost_time; + if((greta_time >= 0) && (greta_time < factor)) + factor = greta_time; + if((safe_greta_time >= 0) && (safe_greta_time < factor)) + factor = safe_greta_time; + if((posix_time >= 0) && (posix_time < factor)) + factor = posix_time; + if((pcre_time >= 0) && (pcre_time < factor)) + factor = pcre_time; + if((factor >= 0) && (factor < factor)) + factor = factor; + } +}; + +extern std::list result_list; + + +namespace b { +// boost tests: +double time_match(const std::string& re, const std::string& text, bool icase); +double time_find_all(const std::string& re, const std::string& text, bool icase); + +} +namespace bl { +// localised boost tests: +double time_match(const std::string& re, const std::string& text, bool icase); +double time_find_all(const std::string& re, const std::string& text, bool icase); + +} +namespace pcr { +// pcre tests: +double time_match(const std::string& re, const std::string& text, bool icase); +double time_find_all(const std::string& re, const std::string& text, bool icase); + +} +namespace g { +// greta tests: +double time_match(const std::string& re, const std::string& text, bool icase); +double time_find_all(const std::string& re, const std::string& text, bool icase); + +} +namespace gs { +// safe greta tests: +double time_match(const std::string& re, const std::string& text, bool icase); +double time_find_all(const std::string& re, const std::string& text, bool icase); + +} +namespace posix { +// safe greta tests: +double time_match(const std::string& re, const std::string& text, bool icase); +double time_find_all(const std::string& re, const std::string& text, bool icase); + +} +void test_match(const std::string& re, const std::string& text, const std::string& description, bool icase = false); +void test_find_all(const std::string& re, const std::string& text, const std::string& description, bool icase = false); +inline void test_match(const std::string& re, const std::string& text, bool icase = false) +{ test_match(re, text, text, icase); } +inline void test_find_all(const std::string& re, const std::string& text, bool icase = false) +{ test_find_all(re, text, "", icase); } + + +#define REPEAT_COUNT 10 + +#endif diff --git a/performance/time_boost.cpp b/performance/time_boost.cpp new file mode 100644 index 00000000..2bb0b58b --- /dev/null +++ b/performance/time_boost.cpp @@ -0,0 +1,98 @@ +/* + * + * Copyright (c) 2002 + * Dr John Maddock + * + * Permission to use, copy, modify, distribute and sell this software + * and its documentation for any purpose is hereby granted without fee, + * provided that the above copyright notice appear in all copies and + * that both that copyright notice and this permission notice appear + * in supporting documentation. Dr John Maddock makes no representations + * about the suitability of this software for any purpose. + * It is provided "as is" without express or implied warranty. + * + */ + +#include "regex_comparison.hpp" +#include +#include + +namespace b{ + +double time_match(const std::string& re, const std::string& text, bool icase) +{ + boost::regex e(re, (icase ? boost::regbase::perl | boost::regbase::icase : boost::regbase::perl)); + boost::smatch what; + boost::timer tim; + int iter = 1; + int counter, repeats; + double result = 0; + double run; + do + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + boost::regex_match(text, what, e); + } + result = tim.elapsed(); + iter *= 2; + }while(result < 0.5); + iter /= 2; + + // repeat test and report least value for consistency: + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + boost::regex_match(text, what, e); + } + run = tim.elapsed(); + result = std::min(run, result); + } + return result / iter; +} + +bool dummy_grep_proc(const boost::smatch&) +{ return true; } + +double time_find_all(const std::string& re, const std::string& text, bool icase) +{ + boost::regex e(re, (icase ? boost::regbase::perl | boost::regbase::icase : boost::regbase::perl)); + boost::smatch what; + boost::timer tim; + int iter = 1; + int counter, repeats; + double result = 0; + double run; + do + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + boost::regex_grep(&dummy_grep_proc, text, e); + } + result = tim.elapsed(); + iter *= 2; + }while(result < 0.5); + iter /= 2; + + if(result >10) + return result / iter; + + // repeat test and report least value for consistency: + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + boost::regex_grep(&dummy_grep_proc, text, e); + } + run = tim.elapsed(); + result = std::min(run, result); + } + return result / iter; +} + +} diff --git a/performance/time_greta.cpp b/performance/time_greta.cpp new file mode 100644 index 00000000..f6e4b309 --- /dev/null +++ b/performance/time_greta.cpp @@ -0,0 +1,125 @@ +/* + * + * Copyright (c) 2002 + * Dr John Maddock + * + * Permission to use, copy, modify, distribute and sell this software + * and its documentation for any purpose is hereby granted without fee, + * provided that the above copyright notice appear in all copies and + * that both that copyright notice and this permission notice appear + * in supporting documentation. Dr John Maddock makes no representations + * about the suitability of this software for any purpose. + * It is provided "as is" without express or implied warranty. + * + */ + +#include "regex_comparison.hpp" +#if defined(BOOST_HAS_GRETA) +#include +#include +#include "regexpr2.h" + +namespace g{ + +double time_match(const std::string& re, const std::string& text, bool icase) +{ + regex::rpattern e(re, (icase ? regex::MULTILINE | regex::NORMALIZE | regex::NOCASE : regex::MULTILINE | regex::NORMALIZE)); + regex::match_results what; + boost::timer tim; + int iter = 1; + int counter, repeats; + double result = 0; + double run; + assert(e.match(text, what)); + do + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + e.match(text, what); + } + result = tim.elapsed(); + iter *= 2; + }while(result < 0.5); + iter /= 2; + + // repeat test and report least value for consistency: + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + e.match(text, what); + } + run = tim.elapsed(); + result = std::min(run, result); + } + return result / iter; +} + +double time_find_all(const std::string& re, const std::string& text, bool icase) +{ + regex::rpattern e(re, (icase ? regex::MULTILINE | regex::NORMALIZE | regex::NOCASE : regex::MULTILINE | regex::NORMALIZE)); + regex::match_results what; + boost::timer tim; + int iter = 1; + int counter, repeats; + double result = 0; + double run; + do + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + e.match(text.begin(), text.end(), what); + while(what.backref(0).matched) + { + e.match(what.backref(0).end(), text.end(), what); + } + } + result = tim.elapsed(); + iter *= 2; + }while(result < 0.5); + iter /= 2; + + if(result > 10) + return result / iter; + + // repeat test and report least value for consistency: + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + e.match(text.begin(), text.end(), what); + while(what.backref(0).matched) + { + e.match(what.backref(0).end(), text.end(), what); + } + } + run = tim.elapsed(); + result = std::min(run, result); + } + return result / iter; +} + +} + +#else + +namespace g { + +double time_match(const std::string& re, const std::string& text, bool icase) +{ + return -1; +} + +double time_find_all(const std::string& re, const std::string& text, bool icase) +{ + return -1; +} + +} + +#endif + diff --git a/performance/time_localised_boost.cpp b/performance/time_localised_boost.cpp new file mode 100644 index 00000000..34b67424 --- /dev/null +++ b/performance/time_localised_boost.cpp @@ -0,0 +1,98 @@ +/* + * + * Copyright (c) 2002 + * Dr John Maddock + * + * Permission to use, copy, modify, distribute and sell this software + * and its documentation for any purpose is hereby granted without fee, + * provided that the above copyright notice appear in all copies and + * that both that copyright notice and this permission notice appear + * in supporting documentation. Dr John Maddock makes no representations + * about the suitability of this software for any purpose. + * It is provided "as is" without express or implied warranty. + * + */ + +#include "regex_comparison.hpp" +#include +#include + +namespace bl{ + +double time_match(const std::string& re, const std::string& text, bool icase) +{ + boost::reg_expression > e(re, (icase ? boost::regbase::perl | boost::regbase::icase : boost::regbase::perl)); + boost::smatch what; + boost::timer tim; + int iter = 1; + int counter, repeats; + double result = 0; + double run; + do + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + boost::regex_match(text, what, e); + } + result = tim.elapsed(); + iter *= 2; + }while(result < 0.5); + iter /= 2; + + // repeat test and report least value for consistency: + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + boost::regex_match(text, what, e); + } + run = tim.elapsed(); + result = std::min(run, result); + } + return result / iter; +} + +bool dummy_grep_proc(const boost::smatch&) +{ return true; } + +double time_find_all(const std::string& re, const std::string& text, bool icase) +{ + boost::reg_expression > e(re, (icase ? boost::regbase::perl | boost::regbase::icase : boost::regbase::perl)); + boost::smatch what; + boost::timer tim; + int iter = 1; + int counter, repeats; + double result = 0; + double run; + do + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + boost::regex_grep(&dummy_grep_proc, text, e); + } + result = tim.elapsed(); + iter *= 2; + }while(result < 0.5); + iter /= 2; + + if(result >10) + return result / iter; + + // repeat test and report least value for consistency: + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + boost::regex_grep(&dummy_grep_proc, text, e); + } + run = tim.elapsed(); + result = std::min(run, result); + } + return result / iter; +} + +} diff --git a/performance/time_pcre.cpp b/performance/time_pcre.cpp new file mode 100644 index 00000000..dd73d553 --- /dev/null +++ b/performance/time_pcre.cpp @@ -0,0 +1,180 @@ +/* + * + * Copyright (c) 2002 + * Dr John Maddock + * + * Permission to use, copy, modify, distribute and sell this software + * and its documentation for any purpose is hereby granted without fee, + * provided that the above copyright notice appear in all copies and + * that both that copyright notice and this permission notice appear + * in supporting documentation. Dr John Maddock makes no representations + * about the suitability of this software for any purpose. + * It is provided "as is" without express or implied warranty. + * + */ + +#include +#include +#include "regex_comparison.hpp" +#ifdef BOOST_HAS_PCRE +#include "pcre.h" +#include + +namespace pcr{ + +double time_match(const std::string& re, const std::string& text, bool icase) +{ + pcre *ppcre; + const char *error; + int erroffset; + + int what[50]; + + boost::timer tim; + int iter = 1; + int counter, repeats; + double result = 0; + double run; + + if(0 == (ppcre = pcre_compile(re.c_str(), (icase ? PCRE_CASELESS | PCRE_ANCHORED | PCRE_DOTALL | PCRE_MULTILINE : PCRE_ANCHORED | PCRE_DOTALL | PCRE_MULTILINE), + &error, &erroffset, NULL))) + { + free(ppcre); + return -1; + } + + pcre_extra *pe; + pe = pcre_study(ppcre, 0, &error); + if(error) + { + free(ppcre); + free(pe); + return -1; + } + + do + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + erroffset = pcre_exec(ppcre, pe, text.c_str(), text.size(), 0, 0, what, sizeof(what)); + } + result = tim.elapsed(); + iter *= 2; + }while(result < 0.5); + iter /= 2; + + // repeat test and report least value for consistency: + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + erroffset = pcre_exec(ppcre, pe, text.c_str(), text.size(), 0, 0, what, sizeof(what)); + } + run = tim.elapsed(); + result = std::min(run, result); + } + free(ppcre); + free(pe); + return result / iter; +} + +double time_find_all(const std::string& re, const std::string& text, bool icase) +{ + pcre *ppcre; + const char *error; + int erroffset; + + int what[50]; + + boost::timer tim; + int iter = 1; + int counter, repeats; + double result = 0; + double run; + int exec_result; + int matches; + + if(0 == (ppcre = pcre_compile(re.c_str(), (icase ? PCRE_CASELESS | PCRE_DOTALL | PCRE_MULTILINE : PCRE_DOTALL | PCRE_MULTILINE), &error, &erroffset, NULL))) + { + free(ppcre); + return -1; + } + + pcre_extra *pe; + pe = pcre_study(ppcre, 0, &error); + if(error) + { + free(ppcre); + free(pe); + return -1; + } + + do + { + int startoff; + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + matches = 0; + startoff = 0; + exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what)); + while(exec_result >= 0) + { + ++matches; + startoff = what[1]; + exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what)); + } + } + result = tim.elapsed(); + iter *= 2; + }while(result < 0.5); + iter /= 2; + + if(result >10) + return result / iter; + + result = DBL_MAX; + + // repeat test and report least value for consistency: + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + int startoff; + matches = 0; + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + matches = 0; + startoff = 0; + exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what)); + while(exec_result >= 0) + { + ++matches; + startoff = what[1]; + exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what)); + } + } + run = tim.elapsed(); + result = std::min(run, result); + } + return result / iter; +} + +} +#else + +namespace pcr{ + +double time_match(const std::string& re, const std::string& text, bool icase) +{ + return -1; +} +double time_find_all(const std::string& re, const std::string& text, bool icase) +{ + return -1; +} + +} + +#endif \ No newline at end of file diff --git a/performance/time_posix.cpp b/performance/time_posix.cpp new file mode 100644 index 00000000..cd2cec68 --- /dev/null +++ b/performance/time_posix.cpp @@ -0,0 +1,143 @@ +/* + * + * Copyright (c) 2002 + * Dr John Maddock + * + * Permission to use, copy, modify, distribute and sell this software + * and its documentation for any purpose is hereby granted without fee, + * provided that the above copyright notice appear in all copies and + * that both that copyright notice and this permission notice appear + * in supporting documentation. Dr John Maddock makes no representations + * about the suitability of this software for any purpose. + * It is provided "as is" without express or implied warranty. + * + */ + +#include +#include +#include "regex_comparison.hpp" +#ifdef BOOST_HAS_POSIX +#include +#include "regex.h" + +namespace posix{ + +double time_match(const std::string& re, const std::string& text, bool icase) +{ + regex_t e; + regmatch_t what[20]; + boost::timer tim; + int iter = 1; + int counter, repeats; + double result = 0; + double run; + if(0 != regcomp(&e, re.c_str(), (icase ? REG_ICASE | REG_EXTENDED : REG_EXTENDED))) + return -1; + do + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + regexec(&e, text.c_str(), e.re_nsub, what, 0); + } + result = tim.elapsed(); + iter *= 2; + }while(result < 0.5); + iter /= 2; + + // repeat test and report least value for consistency: + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + regexec(&e, text.c_str(), e.re_nsub, what, 0); + } + run = tim.elapsed(); + result = std::min(run, result); + } + regfree(&e); + return result / iter; +} + +double time_find_all(const std::string& re, const std::string& text, bool icase) +{ + regex_t e; + regmatch_t what[20]; + memset(what, 0, sizeof(what)); + boost::timer tim; + int iter = 1; + int counter, repeats; + double result = 0; + double run; + int exec_result; + int matches; + if(0 != regcomp(&e, re.c_str(), (icase ? REG_ICASE | REG_EXTENDED : REG_EXTENDED))) + return -1; + do + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + what[0].rm_so = 0; + what[0].rm_eo = text.size(); + matches = 0; + exec_result = regexec(&e, text.c_str(), 20, what, REG_STARTEND); + while(exec_result == 0) + { + ++matches; + what[0].rm_so = what[0].rm_eo; + what[0].rm_eo = text.size(); + exec_result = regexec(&e, text.c_str(), 20, what, REG_STARTEND); + } + } + result = tim.elapsed(); + iter *= 2; + }while(result < 0.5); + iter /= 2; + + if(result >10) + return result / iter; + + result = DBL_MAX; + + // repeat test and report least value for consistency: + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + what[0].rm_so = 0; + what[0].rm_eo = text.size(); + matches = 0; + exec_result = regexec(&e, text.c_str(), 20, what, REG_STARTEND); + while(exec_result == 0) + { + ++matches; + what[0].rm_so = what[0].rm_eo; + what[0].rm_eo = text.size(); + exec_result = regexec(&e, text.c_str(), 20, what, REG_STARTEND); + } + } + run = tim.elapsed(); + result = std::min(run, result); + } + return result / iter; +} + +} +#else + +namespace posix{ + +double time_match(const std::string& re, const std::string& text, bool icase) +{ + return -1; +} +double time_find_all(const std::string& re, const std::string& text, bool icase) +{ + return -1; +} + +} +#endif \ No newline at end of file diff --git a/performance/time_safe_greta.cpp b/performance/time_safe_greta.cpp new file mode 100644 index 00000000..6c600bda --- /dev/null +++ b/performance/time_safe_greta.cpp @@ -0,0 +1,127 @@ +/* + * + * Copyright (c) 2002 + * Dr John Maddock + * + * Permission to use, copy, modify, distribute and sell this software + * and its documentation for any purpose is hereby granted without fee, + * provided that the above copyright notice appear in all copies and + * that both that copyright notice and this permission notice appear + * in supporting documentation. Dr John Maddock makes no representations + * about the suitability of this software for any purpose. + * It is provided "as is" without express or implied warranty. + * + */ + +#include "regex_comparison.hpp" +#if defined(BOOST_HAS_GRETA) + +#include +#include +#include "regexpr2.h" + +namespace gs{ + +double time_match(const std::string& re, const std::string& text, bool icase) +{ + regex::rpattern e(re, (icase ? regex::MULTILINE | regex::NORMALIZE | regex::NOCASE : regex::MULTILINE | regex::NORMALIZE), regex::MODE_SAFE); + regex::match_results what; + boost::timer tim; + int iter = 1; + int counter, repeats; + double result = 0; + double run; + assert(e.match(text, what)); + do + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + e.match(text, what); + } + result = tim.elapsed(); + iter *= 2; + }while(result < 0.5); + iter /= 2; + + // repeat test and report least value for consistency: + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + e.match(text, what); + } + run = tim.elapsed(); + result = std::min(run, result); + } + return result / iter; +} + +double time_find_all(const std::string& re, const std::string& text, bool icase) +{ + regex::rpattern e(re, (icase ? regex::MULTILINE | regex::NORMALIZE | regex::NOCASE : regex::MULTILINE | regex::NORMALIZE), regex::MODE_SAFE); + regex::match_results what; + boost::timer tim; + int iter = 1; + int counter, repeats; + double result = 0; + double run; + do + { + bool r; + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + e.match(text.begin(), text.end(), what); + while(what.backref(0).matched) + { + e.match(what.backref(0).end(), text.end(), what); + } + } + result = tim.elapsed(); + iter *= 2; + }while(result < 0.5); + iter /= 2; + + if(result > 10) + return result / iter; + + // repeat test and report least value for consistency: + for(repeats = 0; repeats < REPEAT_COUNT; ++repeats) + { + tim.restart(); + for(counter = 0; counter < iter; ++counter) + { + e.match(text.begin(), text.end(), what); + while(what.backref(0).matched) + { + e.match(what.backref(0).end(), text.end(), what); + } + } + run = tim.elapsed(); + result = std::min(run, result); + } + return result / iter; +} + +} + +#else + +namespace gs{ + +double time_match(const std::string& re, const std::string& text, bool icase) +{ + return -1; +} + +double time_find_all(const std::string& re, const std::string& text, bool icase) +{ + return -1; +} + +} + +#endif + diff --git a/performance/vc71-results.htm b/performance/vc71-results.htm new file mode 100644 index 00000000..7a8ff40b --- /dev/null +++ b/performance/vc71-results.htm @@ -0,0 +1,671 @@ + + + Regular Expression Performance Comparison + + + + + + +

Regular Expression Performance Comparison

+

The Boost and GRETA regular expression libraries have slightly different + interfaces, and it has been suggested that GRETA's interface allows for a more + efficient implementation. The following tables provide comparisons between:

+

GRETA.

+

The Boost regex library.

+

Henry Spencer's regular expression library + - this is provided for comparison as a typical non-backtracking implementation.

+

+ Times were obtained on a 2.8GHz Pentium 4 PC running Windows XP, and the + code was compiled with Visual C++ 7.1 with all optimisations turned on. As ever + care should be taken in interpreting the results, only sensible regular + expressions (rather than pathological cases) are given, most are taken from the + Boost regex examples, or from the Library of + Regular Expressions. In addition, some variation in the relative + performance of these libraries can be expected on other machines - as memory + access and processor caching effects can be quite large for most finite state + machine algorithms.

+

Comparison 1: Long Search

+

For each of the following regular expressions the time taken to find all + occurrences of the expression within a long English language text was measured + (mtent12.txt + from Project Gutenberg, 19Mb). 

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ExpressionGRETAGRETA
+ (non-recursive mode)
BoostBoost + C++ localePOSIXPCRE
Twain9.29
+ (0.00309s)
32.9
+ (0.011s)
1.34
+ (0.000445s)
1.37
+ (0.000455s)
6.23
+ (0.00207s)
1
+ (0.000333s)
Huck[[:alpha:]]+12.9
+ (0.00309s)
44.4
+ (0.0106s)
1.79
+ (0.00043s)
1.82
+ (0.000436s)
1
+ (0.00024s)
1.06
+ (0.000254s)
[[:alpha:]]+ing7.6
+ (0.0178s)
15.2
+ (0.0357s)
1
+ (0.00235s)
0.867
+ (0.00204s)
4.26
+ (0.01s)
6
+ (0.0141s)
^[^ ]*?Twain5.92
+ (0.00626s)
16.3
+ (0.0172s)
1
+ (0.00106s)
0.666
+ (0.000704s)
NA2.04
+ (0.00215s)
Tom|Sawyer|Huckleberry|Finn8
+ (0.00828s)
20
+ (0.0207s)
1
+ (0.00104s)
0.585
+ (0.000605s)
42.3
+ (0.0438s)
1.12
+ (0.00115s)
(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)6.42
+ (0.012s)
16.3
+ (0.0307s)
1
+ (0.00188s)
0.719
+ (0.00135s)
NA1.21
+ (0.00227s)
+

+

Comparison 2: Medium Sized Search

+

For each of the following regular expressions the time taken to find all + occurrences of the expression within a medium sized English language text was + measured (the first 50K from mtent12.txt). 

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ExpressionGRETAGRETA
+ (non-recursive mode)
BoostBoost + C++ localePOSIXPCRE
Twain9.29
+ (0.00309s)
32.5
+ (0.0108s)
1.34
+ (0.000445s)
1.37
+ (0.000455s)
6.24
+ (0.00207s)
1
+ (0.000333s)
Huck[[:alpha:]]+12.9
+ (0.00309s)
47
+ (0.0113s)
1.77
+ (0.000425s)
1.84
+ (0.00044s)
1
+ (0.00024s)
1.04
+ (0.00025s)
[[:alpha:]]+ing7.61
+ (0.0178s)
15.2
+ (0.0356s)
1
+ (0.00234s)
0.867
+ (0.00203s)
4.27
+ (0.01s)
5.94
+ (0.0139s)
^[^ ]*?Twain5.72
+ (0.00626s)
15.5
+ (0.0169s)
1
+ (0.00109s)
0.644
+ (0.000704s)
NA1.93
+ (0.00211s)
Tom|Sawyer|Huckleberry|Finn7.85
+ (0.00828s)
19.9
+ (0.021s)
1
+ (0.00105s)
0.575
+ (0.000606s)
41.5
+ (0.0438s)
1.09
+ (0.00115s)
(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)6.49
+ (0.012s)
16.5
+ (0.0307s)
1
+ (0.00186s)
0.737
+ (0.00137s)
216
+ (0.401s)
1.24
+ (0.00231s)
+

+

Comparison 3: C++ Code Search

+

For each of the following regular expressions the time taken to find all + occurrences of the expression within the C++ source file + boost/crc.hpp was measured. 

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ExpressionGRETAGRETA
+ (non-recursive mode)
BoostBoost + C++ localePOSIXPCRE
^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?(class|struct)[[:space:]]*(\<\w+\>([ + ]*\([^)]*\))?[[:space:]]*)*(\<\w*\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\{|:[^;\{()]*\{)9.58
+ (0.0019s)
40.3
+ (0.00798s)
1
+ (0.000198s)
0.901
+ (0.000178s)
607
+ (0.12s)
3.16
+ (0.000626s)
(^[ + ]*#(?:[^\\\n]|\\[^\n_[:punct:][:alnum:]]*[\n[:punct:][:word:]])*)|(//[^\n]*|/\*.*?\*/)|\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\>|('(?:[^\\']|\\.)*'|"(?:[^\\"]|\\.)*")|\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned|using|virtual|void|volatile|wchar_t|while)\>1
+ (0.0116s)
2.7
+ (0.0313s)
1.22
+ (0.0141s)
0.946
+ (0.011s)
NA1.41
+ (0.0163s)
^[ ]*#[ ]*include[ ]+("[^"]+"|<[^>]+>)6.05
+ (0.00195s)
25.7
+ (0.0083s)
1
+ (0.000323s)
0.939
+ (0.000303s)
107
+ (0.0344s)
1.69
+ (0.000547s)
^[ ]*#[ ]*include[ ]+("boost/[^"]+"|]+>)5.8
+ (0.00196s)
24.6
+ (0.0083s)
1
+ (0.000337s)
1.07
+ (0.000362s)
122
+ (0.0413s)
1.59
+ (0.000538s)
+

+

+

Comparison 4: HTML Document Search

+ +

For each of the following regular expressions the time taken to find all + occurrences of the expression within the html file libs/libraries.htm + was measured. 

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ExpressionGRETAGRETA
+ (non-recursive mode)
BoostBoost + C++ localePOSIXPCRE
beman|john|dave6.69
+ (0.00321s)
18.9
+ (0.00908s)
1
+ (0.000479s)
0.561
+ (0.000269s)
23.8
+ (0.0114s)
1
+ (0.000479s)

.*?

+
+
5.89
+ (0.00164s)
19.6
+ (0.00548s)
1
+ (0.000279s)
1.05
+ (0.000293s)
NA1.11
+ (0.000308s)
]+href=("[^"]*"|[^[:space:]]+)[^>]*>3.94
+ (0.00219s)
10.4
+ (0.00579s)
1.09
+ (0.000606s)
0.825
+ (0.000459s)
221
+ (0.123s)
1
+ (0.000557s)
]*>.*?6.07
+ (0.0016s)
19.8
+ (0.00524s)
1.37
+ (0.000362s)
0.722
+ (0.000191s)
NA1
+ (0.000264s)
]+src=("[^"]*"|[^[:space:]]+)[^>]*>6.77
+ (0.00162s)
22.5
+ (0.0054s)
1.1
+ (0.000264s)
1.2
+ (0.000289s)
120
+ (0.0288s)
1
+ (0.00024s)
]+face=("[^"]*"|[^[:space:]]+)[^>]*>.*?6.77
+ (0.00162s)
22.5
+ (0.0054s)
1.12
+ (0.000269s)
1.2
+ (0.000289s)
NA1
+ (0.00024s)
+

+

Comparison 3: Simple Matches

+

+ For each of the following regular expressions the time taken to match against + the text indicated was measured. 

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ExpressionTextGRETAGRETA
+ (non-recursive mode)
BoostBoost + C++ localePOSIXPCRE
abcabc1.43
+ (2.25e-007s)
1.85
+ (2.91e-007s)
1.27
+ (2.01e-007s)
1.29
+ (2.03e-007s)
1.94
+ (3.06e-007s)
1
+ (1.58e-007s)
^([0-9]+)(\-| |$)(.*)$100- this is a line of ftp response which contains a message string1
+ (6.97e-007s)
2.69
+ (1.87e-006s)
1.89
+ (1.32e-006s)
1.86
+ (1.3e-006s)
298
+ (0.000208s)
1.29
+ (8.98e-007s)
([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4}1234-5678-1234-4562.41
+ (2.14e-006s)
2.97
+ (2.64e-006s)
2.37
+ (2.1e-006s)
2.24
+ (1.99e-006s)
29.6
+ (2.63e-005s)
1
+ (8.88e-007s)
^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$john_maddock@compuserve.com1.83
+ (3.7e-006s)
2.34
+ (4.74e-006s)
1.59
+ (3.21e-006s)
1.55
+ (3.13e-006s)
172
+ (0.000347s)
1
+ (2.02e-006s)
^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$foo12@foo.edu1.71
+ (3.09e-006s)
2.19
+ (3.97e-006s)
1.71
+ (3.09e-006s)
1.64
+ (2.98e-006s)
123
+ (0.000222s)
1
+ (1.81e-006s)
^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$bob.smith@foo.tv1.72
+ (3.09e-006s)
2.21
+ (3.97e-006s)
1.72
+ (3.09e-006s)
1.7
+ (3.06e-006s)
133
+ (0.00024s)
1
+ (1.79e-006s)
^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$EH10 2QQ1.29
+ (9.37e-007s)
1.71
+ (1.24e-006s)
1.29
+ (9.35e-007s)
1.18
+ (8.59e-007s)
7.79
+ (5.65e-006s)
1
+ (7.26e-007s)
^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$G1 1AA1.41
+ (9.26e-007s)
2
+ (1.32e-006s)
1.38
+ (9.07e-007s)
1.31
+ (8.6e-007s)
7.41
+ (4.88e-006s)
1
+ (6.59e-007s)
^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$SW1 1ZZ1.45
+ (9.54e-007s)
1.88
+ (1.24e-006s)
1.42
+ (9.36e-007s)
1.32
+ (8.69e-007s)
7.77
+ (5.12e-006s)
1
+ (6.59e-007s)
^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$4/1/20011.74
+ (1.01e-006s)
2.3
+ (1.34e-006s)
1.33
+ (7.73e-007s)
1.3
+ (7.54e-007s)
9.85
+ (5.73e-006s)
1
+ (5.82e-007s)
^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$12/12/20011.77
+ (1.01e-006s)
2.23
+ (1.28e-006s)
1.45
+ (8.31e-007s)
1.38
+ (7.93e-007s)
11.1
+ (6.34e-006s)
1
+ (5.73e-007s)
^[-+]?[[:digit:]]*\.?[[:digit:]]*$1231.23
+ (7.65e-007s)
1.66
+ (1.03e-006s)
1.4
+ (8.69e-007s)
1.31
+ (8.12e-007s)
4.86
+ (3.02e-006s)
1
+ (6.21e-007s)
^[-+]?[[:digit:]]*\.?[[:digit:]]*$+3.141591.59
+ (1.05e-006s)
1.97
+ (1.3e-006s)
1.45
+ (9.54e-007s)
1.32
+ (8.69e-007s)
9.51
+ (6.26e-006s)
1
+ (6.59e-007s)
^[-+]?[[:digit:]]*\.?[[:digit:]]*$-3.141591.64
+ (1.07e-006s)
2
+ (1.3e-006s)
1.44
+ (9.35e-007s)
1.35
+ (8.78e-007s)
9.53
+ (6.19e-006s)
1
+ (6.49e-007s)
+

+
+

Copyright John Maddock September 2002, all rights reserved.

+ +