diff --git a/performance/command_line.cpp b/performance/command_line.cpp
new file mode 100644
index 00000000..6500a895
--- /dev/null
+++ b/performance/command_line.cpp
@@ -0,0 +1,285 @@
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "regex_comparison.hpp"
+
+//
+// globals:
+//
+bool time_boost = false;
+bool time_localised_boost = false;
+bool time_greta = false;
+bool time_safe_greta = false;
+bool time_posix = false;
+bool time_pcre = false;
+
+bool test_matches = false;
+bool test_code = false;
+bool test_html = false;
+bool test_short_twain = false;
+bool test_long_twain = false;
+
+
+std::string html_template_file;
+std::string html_out_file;
+std::string html_contents;
+std::list result_list;
+
+int handle_argument(const std::string& what)
+{
+ if(what == "-b")
+ time_boost = true;
+ else if(what == "-bl")
+ time_localised_boost = true;
+#ifdef BOOST_HAS_GRETA
+ else if(what == "-g")
+ time_greta = true;
+ else if(what == "-gs")
+ time_safe_greta = true;
+#endif
+#ifdef BOOST_HAS_POSIX
+ else if(what == "-posix")
+ time_posix = true;
+#endif
+#ifdef BOOST_HAS_PCRE
+ else if(what == "-pcre")
+ time_pcre = true;
+#endif
+ else if(what == "-all")
+ {
+ time_boost = true;
+ time_localised_boost = true;
+#ifdef BOOST_HAS_GRETA
+ time_greta = true;
+ time_safe_greta = true;
+#endif
+#ifdef BOOST_HAS_POSIX
+ time_posix = true;
+#endif
+#ifdef BOOST_HAS_PCRE
+ time_pcre = true;
+#endif
+ }
+ else if(what == "-test-matches")
+ test_matches = true;
+ else if(what == "-test-code")
+ test_code = true;
+ else if(what == "-test-html")
+ test_html = true;
+ else if(what == "-test-short-twain")
+ test_short_twain = true;
+ else if(what == "-test-long-twain")
+ test_long_twain = true;
+ else if(what == "-test-all")
+ {
+ test_matches = true;
+ test_code = true;
+ test_html = true;
+ test_short_twain = true;
+ test_long_twain = true;
+ }
+ else if((what == "-h") || (what == "--help"))
+ return show_usage();
+ else if((what[0] == '-') || (what[0] == '/'))
+ {
+ std::cerr << "Unknown argument: \"" << what << "\"" << std::endl;
+ return 1;
+ }
+ else if(html_template_file.size() == 0)
+ {
+ html_template_file = what;
+ load_file(html_contents, what.c_str());
+ }
+ else if(html_out_file.size() == 0)
+ html_out_file = what;
+ else
+ {
+ std::cerr << "Unexpected argument: \"" << what << "\"" << std::endl;
+ return 1;
+ }
+ return 0;
+}
+
+int show_usage()
+{
+ std::cout <<
+ "Usage\n"
+ "regex_comparison [-h] [library options] [test options] [html_template html_output_file]\n"
+ " -h Show help\n\n"
+ " library options:\n"
+ " -b Apply tests to boost library\n"
+ " -bl Apply tests to boost library with C++ locale\n"
+#ifdef BOOST_HAS_GRETA
+ " -g Apply tests to GRETA library\n"
+ " -gs Apply tests to GRETA library (in non-recursive mode)\n"
+#endif
+#ifdef BOOST_HAS_POSIX
+ " -posix Apply tests to POSIX library\n"
+#endif
+#ifdef BOOST_HAS_PCRE
+ " -pcre Apply tests to PCRE library\n"
+#endif
+ " -all Apply tests to all libraries\n\n"
+ " test options:\n"
+ " -test-matches Test short matches\n"
+ " -test-code Test c++ code examples\n"
+ " -test-html Test c++ code examples\n"
+ " -test-short-twain Test short searches\n"
+ " -test-long-twain Test long searches\n"
+ " -test-all Test everthing\n";
+ return 1;
+}
+
+void load_file(std::string& text, const char* file)
+{
+ std::deque temp_copy;
+ std::ifstream is(file);
+ if(!is.good())
+ {
+ std::string msg("Unable to open file: \"");
+ msg.append(file);
+ msg.append("\"");
+ throw std::runtime_error(msg);
+ }
+ is.seekg(0, std::ios_base::end);
+ std::istream::pos_type pos = is.tellg();
+ is.seekg(0, std::ios_base::beg);
+ text.erase();
+ text.reserve(pos);
+ std::istreambuf_iterator it(is);
+ std::copy(it, std::istreambuf_iterator(), std::back_inserter(text));
+}
+
+void print_result(std::ostream& os, double time, double best)
+{
+ static const char* suffixes[] = {"s", "ms", "us", "ns", "ps", };
+
+ if(time < 0)
+ {
+ os << "NA | ";
+ return;
+ }
+ double rel = time / best;
+ bool highlight = ((rel > 0) && (rel < 1.1));
+ unsigned suffix = 0;
+ while(time < 0)
+ {
+ time *= 1000;
+ ++suffix;
+ }
+ os << "";
+ if(highlight)
+ os << "";
+ if(rel <= 1000)
+ os << std::setprecision(3) << rel;
+ else
+ os << (int)rel;
+ os << " (";
+ if(time <= 1000)
+ os << std::setprecision(3) << time;
+ else
+ os << (int)time;
+ os << suffixes[suffix] << ")";
+ if(highlight)
+ os << "";
+ os << " | ";
+}
+
+void output_html_results(bool show_description, const std::string& tagname)
+{
+ std::stringstream os;
+ if(result_list.size())
+ {
+ //
+ // start by outputting the table header:
+ //
+ os << "\n";
+ os << "Expression | ";
+ if(show_description)
+ os << "Text | ";
+#if defined(BOOST_HAS_GRETA)
+ if(time_greta == true)
+ os << "GRETA | ";
+ if(time_safe_greta == true)
+ os << "GRETA (non-recursive mode) | ";
+#endif
+ if(time_boost == true)
+ os << "Boost | ";
+ if(time_localised_boost == true)
+ os << "Boost + C++ locale | ";
+#if defined(BOOST_HAS_POSIX)
+ if(time_posix == true)
+ os << "POSIX | ";
+#endif
+#ifdef BOOST_HAS_PCRE
+ if(time_pcre == true)
+ os << "PCRE | ";
+#endif
+ os << "
\n";
+
+ //
+ // Now enumerate through all the test results:
+ //
+ std::list::const_iterator first, last;
+ first = result_list.begin();
+ last = result_list.end();
+ while(first != last)
+ {
+ os << "" << first->expression << " | ";
+ if(show_description)
+ os << "" << first->description << " | ";
+#if defined(BOOST_HAS_GRETA)
+ if(time_greta == true)
+ print_result(os, first->greta_time, first->factor);
+ if(time_safe_greta == true)
+ print_result(os, first->safe_greta_time, first->factor);
+#endif
+#if defined(BOOST_HAS_POSIX)
+ if(time_boost == true)
+ print_result(os, first->boost_time, first->factor);
+ if(time_localised_boost == true)
+ print_result(os, first->localised_boost_time, first->factor);
+#endif
+ if(time_posix == true)
+ print_result(os, first->posix_time, first->factor);
+#if defined(BOOST_HAS_PCRE)
+ if(time_pcre == true)
+ print_result(os, first->pcre_time, first->factor);
+#endif
+ os << "
\n";
+ ++first;
+ }
+ os << "
\n";
+ result_list.clear();
+ }
+ else
+ {
+ os << "Results not available...
\n";
+ }
+
+ std::string result = os.str();
+
+ unsigned int pos = html_contents.find(tagname);
+ if(pos != std::string::npos)
+ {
+ html_contents.replace(pos, tagname.size(), result);
+ }
+}
+
+void output_final_html()
+{
+ if(html_out_file.size())
+ {
+ std::ofstream os(html_out_file.c_str());
+ os << html_contents;
+ }
+ else
+ {
+ std::cout << html_contents;
+ }
+}
\ No newline at end of file
diff --git a/performance/input.html b/performance/input.html
new file mode 100644
index 00000000..b218c211
--- /dev/null
+++ b/performance/input.html
@@ -0,0 +1,59 @@
+
+
+ Regular Expression Performance Comparison
+
+
+
+
+
+
+ Regular Expression Performance Comparison
+ The Boost and GRETA regular expression libraries have slightly different
+ interfaces, and it has been suggested that GRETA's interface allows for a more
+ efficient implementation. The following tables provide comparisons between:
+ GRETA.
+ The Boost regex library.
+ Henry Spencer's regular expression library
+ - this is provided for comparison as a typical non-backtracking implementation.
+
+ Times were obtained on a 2.8GHz Pentium 4 PC running Windows XP, and the
+ code was compiled with Visual C++ 7.1 with all optimisations turned on. As ever
+ care should be taken in interpreting the results, only sensible regular
+ expressions (rather than pathological cases) are given, most are taken from the
+ Boost regex examples, or from the Library of
+ Regular Expressions. In addition, some variation in the relative
+ performance of these libraries can be expected on other machines - as memory
+ access and processor caching effects can be quite large for most finite state
+ machine algorithms.
+ Comparison 1: Long Search
+ For each of the following regular expressions the time taken to find all
+ occurrences of the expression within a long English language text was measured
+ (mtent12.txt
+ from Project Gutenberg, 19Mb).
+ %long_twain_search%
+ Comparison 2: Medium Sized Search
+ For each of the following regular expressions the time taken to find all
+ occurrences of the expression within a medium sized English language text was
+ measured (the first 50K from mtent12.txt).
+ %short_twain_search%
+ Comparison 3: C++ Code Search
+ For each of the following regular expressions the time taken to find all
+ occurrences of the expression within the C++ source file
+ boost/crc.hpp was measured.
+ %code_search%
+
+ Comparison 4: HTML Document Search
+
+ For each of the following regular expressions the time taken to find all
+ occurrences of the expression within the html file libs/libraries.htm
+ was measured.
+ %html_search%
+ Comparison 3: Simple Matches
+
+ For each of the following regular expressions the time taken to match against
+ the text indicated was measured.
+ %short_matches%
+
+ Copyright John Maddock April 2003, all rights reserved.
+
+
diff --git a/performance/main.cpp b/performance/main.cpp
new file mode 100644
index 00000000..0b6db1b7
--- /dev/null
+++ b/performance/main.cpp
@@ -0,0 +1,251 @@
+/*
+ *
+ * Copyright (c) 2002
+ * Dr John Maddock
+ *
+ * Permission to use, copy, modify, distribute and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appear in all copies and
+ * that both that copyright notice and this permission notice appear
+ * in supporting documentation. Dr John Maddock makes no representations
+ * about the suitability of this software for any purpose.
+ * It is provided "as is" without express or implied warranty.
+ *
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include "regex_comparison.hpp"
+
+
+void test_match(const std::string& re, const std::string& text, const std::string& description, bool icase)
+{
+ double time;
+ results r(re, description);
+
+ std::cout << "Testing: \"" << re << "\" against \"" << description << "\"" << std::endl;
+
+#ifdef BOOST_HAS_GRETA
+ if(time_greta == true)
+ {
+ time = g::time_match(re, text, icase);
+ r.greta_time = time;
+ std::cout << "\tGRETA regex: " << time << "s\n";
+ }
+ if(time_safe_greta == true)
+ {
+ time = gs::time_match(re, text, icase);
+ r.safe_greta_time = time;
+ std::cout << "\tSafe GRETA regex: " << time << "s\n";
+ }
+#endif
+ if(time_boost == true)
+ {
+ time = b::time_match(re, text, icase);
+ r.boost_time = time;
+ std::cout << "\tBoost regex: " << time << "s\n";
+ }
+ if(time_localised_boost == true)
+ {
+ time = bl::time_match(re, text, icase);
+ r.localised_boost_time = time;
+ std::cout << "\tBoost regex (C++ locale): " << time << "s\n";
+ }
+#ifdef BOOST_HAS_POSIX
+ if(time_posix == true)
+ {
+ time = posix::time_match(re, text, icase);
+ r.posix_time = time;
+ std::cout << "\tPOSIX regex: " << time << "s\n";
+ }
+#endif
+#ifdef BOOST_HAS_PCRE
+ if(time_pcre == true)
+ {
+ time = pcr::time_match(re, text, icase);
+ r.pcre_time = time;
+ std::cout << "\tPCRE regex: " << time << "s\n";
+ }
+#endif
+ r.finalise();
+ result_list.push_back(r);
+}
+
+void test_find_all(const std::string& re, const std::string& text, const std::string& description, bool icase)
+{
+ std::cout << "Testing: " << re << std::endl;
+
+ double time;
+ results r(re, description);
+
+#ifdef BOOST_HAS_GRETA
+ if(time_greta == true)
+ {
+ time = g::time_find_all(re, text, icase);
+ r.greta_time = time;
+ std::cout << "\tGRETA regex: " << time << "s\n";
+ }
+ if(time_safe_greta == true)
+ {
+ time = gs::time_find_all(re, text, icase);
+ r.safe_greta_time = time;
+ std::cout << "\tSafe GRETA regex: " << time << "s\n";
+ }
+#endif
+ if(time_boost == true)
+ {
+ time = b::time_find_all(re, text, icase);
+ r.boost_time = time;
+ std::cout << "\tBoost regex: " << time << "s\n";
+ }
+ if(time_localised_boost == true)
+ {
+ time = bl::time_find_all(re, text, icase);
+ r.localised_boost_time = time;
+ std::cout << "\tBoost regex (C++ locale): " << time << "s\n";
+ }
+#ifdef BOOST_HAS_POSIX
+ if(time_posix == true)
+ {
+ time = posix::time_find_all(re, text, icase);
+ r.posix_time = time;
+ std::cout << "\tPOSIX regex: " << time << "s\n";
+ }
+#endif
+#ifdef BOOST_HAS_PCRE
+ if(time_pcre == true)
+ {
+ time = pcr::time_find_all(re, text, icase);
+ r.pcre_time = time;
+ std::cout << "\tPCRE regex: " << time << "s\n";
+ }
+#endif
+ r.finalise();
+ result_list.push_back(r);
+}
+
+int cpp_main(int argc, char * argv[])
+{
+ // start by processing the command line args:
+ if(argc < 2)
+ return show_usage();
+ int result = 0;
+ for(int c = 1; c < argc; ++c)
+ {
+ result += handle_argument(argv[c]);
+ }
+ if(result)
+ return result;
+
+ if(test_matches)
+ {
+ // start with a simple test, this is basically a measure of the minimal overhead
+ // involved in calling a regex matcher:
+ test_match("abc", "abc");
+ // these are from the regex docs:
+ test_match("^([0-9]+)(\\-| |$)(.*)$", "100- this is a line of ftp response which contains a message string");
+ test_match("([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4}", "1234-5678-1234-456");
+ // these are from http://www.regxlib.com/
+ test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "john_maddock@compuserve.com");
+ test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "foo12@foo.edu");
+ test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "bob.smith@foo.tv");
+ test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "EH10 2QQ");
+ test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "G1 1AA");
+ test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "SW1 1ZZ");
+ test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "4/1/2001");
+ test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "12/12/2001");
+ test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "123");
+ test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "+3.14159");
+ test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "-3.14159");
+ }
+ output_html_results(true, "%short_matches%");
+
+ std::string file_contents;
+
+ if(test_code)
+ {
+ load_file(file_contents, "../../../boost/crc.hpp");
+
+ const char* highlight_expression = // preprocessor directives: index 1
+ "(^[ \t]*#(?:[^\\\\\\n]|\\\\[^\\n_[:punct:][:alnum:]]*[\\n[:punct:][:word:]])*)|"
+ // comment: index 2
+ "(//[^\\n]*|/\\*.*?\\*/)|"
+ // literals: index 3
+ "\\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\\>|"
+ // string literals: index 4
+ "('(?:[^\\\\']|\\\\.)*'|\"(?:[^\\\\\"]|\\\\.)*\")|"
+ // keywords: index 5
+ "\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import"
+ "|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall"
+ "|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool"
+ "|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete"
+ "|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto"
+ "|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected"
+ "|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast"
+ "|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned"
+ "|using|virtual|void|volatile|wchar_t|while)\\>"
+ ;
+
+ const char* class_expression = "^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
+ "(class|struct)[[:space:]]*(\\<\\w+\\>([ \t]*\\([^)]*\\))?"
+ "[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?"
+ "(\\{|:[^;\\{()]*\\{)";
+
+ const char* include_expression = "^[ \t]*#[ \t]*include[ \t]+(\"[^\"]+\"|<[^>]+>)";
+ const char* boost_include_expression = "^[ \t]*#[ \t]*include[ \t]+(\"boost/[^\"]+\"|]+>)";
+
+
+ test_find_all(class_expression, file_contents);
+ test_find_all(highlight_expression, file_contents);
+ test_find_all(include_expression, file_contents);
+ test_find_all(boost_include_expression, file_contents);
+ }
+ output_html_results(false, "%code_search%");
+
+ if(test_html)
+ {
+ load_file(file_contents, "../../../libs/libraries.htm");
+ test_find_all("beman|john|dave", file_contents, true);
+ test_find_all(".*?
", file_contents, true);
+ test_find_all("]+href=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents, true);
+ test_find_all("]*>.*?", file_contents, true);
+ test_find_all("
]+src=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents, true);
+ test_find_all("]+face=(\"[^\"]*\"|[^[:space:]]+)[^>]*>.*?", file_contents, true);
+ }
+ output_html_results(false, "%html_search%");
+
+ if(test_short_twain)
+ {
+ load_file(file_contents, "short_twain.txt");
+
+ test_find_all("Twain", file_contents);
+ test_find_all("Huck[[:alpha:]]+", file_contents);
+ test_find_all("[[:alpha:]]+ing", file_contents);
+ test_find_all("^[^\n]*?Twain", file_contents);
+ test_find_all("Tom|Sawyer|Huckleberry|Finn", file_contents);
+ test_find_all("(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)", file_contents);
+ }
+ output_html_results(false, "%short_twain_search%");
+
+ if(test_long_twain)
+ {
+ load_file(file_contents, "mtent12.txt");
+
+ test_find_all("Twain", file_contents);
+ test_find_all("Huck[[:alpha:]]+", file_contents);
+ test_find_all("[[:alpha:]]+ing", file_contents);
+ test_find_all("^[^\n]*?Twain", file_contents);
+ test_find_all("Tom|Sawyer|Huckleberry|Finn", file_contents);
+ time_posix = false;
+ test_find_all("(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)", file_contents);
+ time_posix = true;
+ }
+ output_html_results(false, "%long_twain_search%");
+
+ output_final_html();
+ return 0;
+}
+
diff --git a/performance/regex_comparison.hpp b/performance/regex_comparison.hpp
new file mode 100644
index 00000000..9e7902cb
--- /dev/null
+++ b/performance/regex_comparison.hpp
@@ -0,0 +1,135 @@
+/*
+ *
+ * Copyright (c) 2002
+ * Dr John Maddock
+ *
+ * All rights reserved.
+ * May not be transfered or disclosed to a third party without
+ * prior consent of the author.
+ *
+ */
+
+
+#ifndef REGEX_COMPARISON_HPP
+#define REGEX_COMPARISON_HPP
+
+#include
+#include
+#include
+
+//
+// globals:
+//
+extern bool time_boost;
+extern bool time_localised_boost;
+extern bool time_greta;
+extern bool time_safe_greta;
+extern bool time_posix;
+extern bool time_pcre;
+
+extern bool test_matches;
+extern bool test_short_twain;
+extern bool test_long_twain;
+extern bool test_code;
+extern bool test_html;
+
+extern std::string html_template_file;
+extern std::string html_out_file;
+extern std::string html_contents;
+
+
+int handle_argument(const std::string& what);
+int show_usage();
+void load_file(std::string& text, const char* file);
+void output_html_results(bool show_description, const std::string& tagname);
+void output_final_html();
+
+
+struct results
+{
+ double boost_time;
+ double localised_boost_time;
+ double greta_time;
+ double safe_greta_time;
+ double posix_time;
+ double pcre_time;
+ double factor;
+ std::string expression;
+ std::string description;
+ results(const std::string& ex, const std::string& desc)
+ : boost_time(-1),
+ greta_time(-1),
+ safe_greta_time(-1),
+ posix_time(-1),
+ pcre_time(-1),
+ factor(std::numeric_limits::max()),
+ expression(ex),
+ description(desc)
+ {}
+ void finalise()
+ {
+ if((boost_time >= 0) && (boost_time < factor))
+ factor = boost_time;
+ if((greta_time >= 0) && (greta_time < factor))
+ factor = greta_time;
+ if((safe_greta_time >= 0) && (safe_greta_time < factor))
+ factor = safe_greta_time;
+ if((posix_time >= 0) && (posix_time < factor))
+ factor = posix_time;
+ if((pcre_time >= 0) && (pcre_time < factor))
+ factor = pcre_time;
+ if((factor >= 0) && (factor < factor))
+ factor = factor;
+ }
+};
+
+extern std::list result_list;
+
+
+namespace b {
+// boost tests:
+double time_match(const std::string& re, const std::string& text, bool icase);
+double time_find_all(const std::string& re, const std::string& text, bool icase);
+
+}
+namespace bl {
+// localised boost tests:
+double time_match(const std::string& re, const std::string& text, bool icase);
+double time_find_all(const std::string& re, const std::string& text, bool icase);
+
+}
+namespace pcr {
+// pcre tests:
+double time_match(const std::string& re, const std::string& text, bool icase);
+double time_find_all(const std::string& re, const std::string& text, bool icase);
+
+}
+namespace g {
+// greta tests:
+double time_match(const std::string& re, const std::string& text, bool icase);
+double time_find_all(const std::string& re, const std::string& text, bool icase);
+
+}
+namespace gs {
+// safe greta tests:
+double time_match(const std::string& re, const std::string& text, bool icase);
+double time_find_all(const std::string& re, const std::string& text, bool icase);
+
+}
+namespace posix {
+// safe greta tests:
+double time_match(const std::string& re, const std::string& text, bool icase);
+double time_find_all(const std::string& re, const std::string& text, bool icase);
+
+}
+void test_match(const std::string& re, const std::string& text, const std::string& description, bool icase = false);
+void test_find_all(const std::string& re, const std::string& text, const std::string& description, bool icase = false);
+inline void test_match(const std::string& re, const std::string& text, bool icase = false)
+{ test_match(re, text, text, icase); }
+inline void test_find_all(const std::string& re, const std::string& text, bool icase = false)
+{ test_find_all(re, text, "", icase); }
+
+
+#define REPEAT_COUNT 10
+
+#endif
diff --git a/performance/time_boost.cpp b/performance/time_boost.cpp
new file mode 100644
index 00000000..2bb0b58b
--- /dev/null
+++ b/performance/time_boost.cpp
@@ -0,0 +1,98 @@
+/*
+ *
+ * Copyright (c) 2002
+ * Dr John Maddock
+ *
+ * Permission to use, copy, modify, distribute and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appear in all copies and
+ * that both that copyright notice and this permission notice appear
+ * in supporting documentation. Dr John Maddock makes no representations
+ * about the suitability of this software for any purpose.
+ * It is provided "as is" without express or implied warranty.
+ *
+ */
+
+#include "regex_comparison.hpp"
+#include
+#include
+
+namespace b{
+
+double time_match(const std::string& re, const std::string& text, bool icase)
+{
+ boost::regex e(re, (icase ? boost::regbase::perl | boost::regbase::icase : boost::regbase::perl));
+ boost::smatch what;
+ boost::timer tim;
+ int iter = 1;
+ int counter, repeats;
+ double result = 0;
+ double run;
+ do
+ {
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ boost::regex_match(text, what, e);
+ }
+ result = tim.elapsed();
+ iter *= 2;
+ }while(result < 0.5);
+ iter /= 2;
+
+ // repeat test and report least value for consistency:
+ for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
+ {
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ boost::regex_match(text, what, e);
+ }
+ run = tim.elapsed();
+ result = std::min(run, result);
+ }
+ return result / iter;
+}
+
+bool dummy_grep_proc(const boost::smatch&)
+{ return true; }
+
+double time_find_all(const std::string& re, const std::string& text, bool icase)
+{
+ boost::regex e(re, (icase ? boost::regbase::perl | boost::regbase::icase : boost::regbase::perl));
+ boost::smatch what;
+ boost::timer tim;
+ int iter = 1;
+ int counter, repeats;
+ double result = 0;
+ double run;
+ do
+ {
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ boost::regex_grep(&dummy_grep_proc, text, e);
+ }
+ result = tim.elapsed();
+ iter *= 2;
+ }while(result < 0.5);
+ iter /= 2;
+
+ if(result >10)
+ return result / iter;
+
+ // repeat test and report least value for consistency:
+ for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
+ {
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ boost::regex_grep(&dummy_grep_proc, text, e);
+ }
+ run = tim.elapsed();
+ result = std::min(run, result);
+ }
+ return result / iter;
+}
+
+}
diff --git a/performance/time_greta.cpp b/performance/time_greta.cpp
new file mode 100644
index 00000000..f6e4b309
--- /dev/null
+++ b/performance/time_greta.cpp
@@ -0,0 +1,125 @@
+/*
+ *
+ * Copyright (c) 2002
+ * Dr John Maddock
+ *
+ * Permission to use, copy, modify, distribute and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appear in all copies and
+ * that both that copyright notice and this permission notice appear
+ * in supporting documentation. Dr John Maddock makes no representations
+ * about the suitability of this software for any purpose.
+ * It is provided "as is" without express or implied warranty.
+ *
+ */
+
+#include "regex_comparison.hpp"
+#if defined(BOOST_HAS_GRETA)
+#include
+#include
+#include "regexpr2.h"
+
+namespace g{
+
+double time_match(const std::string& re, const std::string& text, bool icase)
+{
+ regex::rpattern e(re, (icase ? regex::MULTILINE | regex::NORMALIZE | regex::NOCASE : regex::MULTILINE | regex::NORMALIZE));
+ regex::match_results what;
+ boost::timer tim;
+ int iter = 1;
+ int counter, repeats;
+ double result = 0;
+ double run;
+ assert(e.match(text, what));
+ do
+ {
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ e.match(text, what);
+ }
+ result = tim.elapsed();
+ iter *= 2;
+ }while(result < 0.5);
+ iter /= 2;
+
+ // repeat test and report least value for consistency:
+ for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
+ {
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ e.match(text, what);
+ }
+ run = tim.elapsed();
+ result = std::min(run, result);
+ }
+ return result / iter;
+}
+
+double time_find_all(const std::string& re, const std::string& text, bool icase)
+{
+ regex::rpattern e(re, (icase ? regex::MULTILINE | regex::NORMALIZE | regex::NOCASE : regex::MULTILINE | regex::NORMALIZE));
+ regex::match_results what;
+ boost::timer tim;
+ int iter = 1;
+ int counter, repeats;
+ double result = 0;
+ double run;
+ do
+ {
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ e.match(text.begin(), text.end(), what);
+ while(what.backref(0).matched)
+ {
+ e.match(what.backref(0).end(), text.end(), what);
+ }
+ }
+ result = tim.elapsed();
+ iter *= 2;
+ }while(result < 0.5);
+ iter /= 2;
+
+ if(result > 10)
+ return result / iter;
+
+ // repeat test and report least value for consistency:
+ for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
+ {
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ e.match(text.begin(), text.end(), what);
+ while(what.backref(0).matched)
+ {
+ e.match(what.backref(0).end(), text.end(), what);
+ }
+ }
+ run = tim.elapsed();
+ result = std::min(run, result);
+ }
+ return result / iter;
+}
+
+}
+
+#else
+
+namespace g {
+
+double time_match(const std::string& re, const std::string& text, bool icase)
+{
+ return -1;
+}
+
+double time_find_all(const std::string& re, const std::string& text, bool icase)
+{
+ return -1;
+}
+
+}
+
+#endif
+
diff --git a/performance/time_localised_boost.cpp b/performance/time_localised_boost.cpp
new file mode 100644
index 00000000..34b67424
--- /dev/null
+++ b/performance/time_localised_boost.cpp
@@ -0,0 +1,98 @@
+/*
+ *
+ * Copyright (c) 2002
+ * Dr John Maddock
+ *
+ * Permission to use, copy, modify, distribute and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appear in all copies and
+ * that both that copyright notice and this permission notice appear
+ * in supporting documentation. Dr John Maddock makes no representations
+ * about the suitability of this software for any purpose.
+ * It is provided "as is" without express or implied warranty.
+ *
+ */
+
+#include "regex_comparison.hpp"
+#include
+#include
+
+namespace bl{
+
+double time_match(const std::string& re, const std::string& text, bool icase)
+{
+ boost::reg_expression > e(re, (icase ? boost::regbase::perl | boost::regbase::icase : boost::regbase::perl));
+ boost::smatch what;
+ boost::timer tim;
+ int iter = 1;
+ int counter, repeats;
+ double result = 0;
+ double run;
+ do
+ {
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ boost::regex_match(text, what, e);
+ }
+ result = tim.elapsed();
+ iter *= 2;
+ }while(result < 0.5);
+ iter /= 2;
+
+ // repeat test and report least value for consistency:
+ for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
+ {
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ boost::regex_match(text, what, e);
+ }
+ run = tim.elapsed();
+ result = std::min(run, result);
+ }
+ return result / iter;
+}
+
+bool dummy_grep_proc(const boost::smatch&)
+{ return true; }
+
+double time_find_all(const std::string& re, const std::string& text, bool icase)
+{
+ boost::reg_expression > e(re, (icase ? boost::regbase::perl | boost::regbase::icase : boost::regbase::perl));
+ boost::smatch what;
+ boost::timer tim;
+ int iter = 1;
+ int counter, repeats;
+ double result = 0;
+ double run;
+ do
+ {
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ boost::regex_grep(&dummy_grep_proc, text, e);
+ }
+ result = tim.elapsed();
+ iter *= 2;
+ }while(result < 0.5);
+ iter /= 2;
+
+ if(result >10)
+ return result / iter;
+
+ // repeat test and report least value for consistency:
+ for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
+ {
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ boost::regex_grep(&dummy_grep_proc, text, e);
+ }
+ run = tim.elapsed();
+ result = std::min(run, result);
+ }
+ return result / iter;
+}
+
+}
diff --git a/performance/time_pcre.cpp b/performance/time_pcre.cpp
new file mode 100644
index 00000000..dd73d553
--- /dev/null
+++ b/performance/time_pcre.cpp
@@ -0,0 +1,180 @@
+/*
+ *
+ * Copyright (c) 2002
+ * Dr John Maddock
+ *
+ * Permission to use, copy, modify, distribute and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appear in all copies and
+ * that both that copyright notice and this permission notice appear
+ * in supporting documentation. Dr John Maddock makes no representations
+ * about the suitability of this software for any purpose.
+ * It is provided "as is" without express or implied warranty.
+ *
+ */
+
+#include
+#include
+#include "regex_comparison.hpp"
+#ifdef BOOST_HAS_PCRE
+#include "pcre.h"
+#include
+
+namespace pcr{
+
+double time_match(const std::string& re, const std::string& text, bool icase)
+{
+ pcre *ppcre;
+ const char *error;
+ int erroffset;
+
+ int what[50];
+
+ boost::timer tim;
+ int iter = 1;
+ int counter, repeats;
+ double result = 0;
+ double run;
+
+ if(0 == (ppcre = pcre_compile(re.c_str(), (icase ? PCRE_CASELESS | PCRE_ANCHORED | PCRE_DOTALL | PCRE_MULTILINE : PCRE_ANCHORED | PCRE_DOTALL | PCRE_MULTILINE),
+ &error, &erroffset, NULL)))
+ {
+ free(ppcre);
+ return -1;
+ }
+
+ pcre_extra *pe;
+ pe = pcre_study(ppcre, 0, &error);
+ if(error)
+ {
+ free(ppcre);
+ free(pe);
+ return -1;
+ }
+
+ do
+ {
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ erroffset = pcre_exec(ppcre, pe, text.c_str(), text.size(), 0, 0, what, sizeof(what));
+ }
+ result = tim.elapsed();
+ iter *= 2;
+ }while(result < 0.5);
+ iter /= 2;
+
+ // repeat test and report least value for consistency:
+ for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
+ {
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ erroffset = pcre_exec(ppcre, pe, text.c_str(), text.size(), 0, 0, what, sizeof(what));
+ }
+ run = tim.elapsed();
+ result = std::min(run, result);
+ }
+ free(ppcre);
+ free(pe);
+ return result / iter;
+}
+
+double time_find_all(const std::string& re, const std::string& text, bool icase)
+{
+ pcre *ppcre;
+ const char *error;
+ int erroffset;
+
+ int what[50];
+
+ boost::timer tim;
+ int iter = 1;
+ int counter, repeats;
+ double result = 0;
+ double run;
+ int exec_result;
+ int matches;
+
+ if(0 == (ppcre = pcre_compile(re.c_str(), (icase ? PCRE_CASELESS | PCRE_DOTALL | PCRE_MULTILINE : PCRE_DOTALL | PCRE_MULTILINE), &error, &erroffset, NULL)))
+ {
+ free(ppcre);
+ return -1;
+ }
+
+ pcre_extra *pe;
+ pe = pcre_study(ppcre, 0, &error);
+ if(error)
+ {
+ free(ppcre);
+ free(pe);
+ return -1;
+ }
+
+ do
+ {
+ int startoff;
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ matches = 0;
+ startoff = 0;
+ exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what));
+ while(exec_result >= 0)
+ {
+ ++matches;
+ startoff = what[1];
+ exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what));
+ }
+ }
+ result = tim.elapsed();
+ iter *= 2;
+ }while(result < 0.5);
+ iter /= 2;
+
+ if(result >10)
+ return result / iter;
+
+ result = DBL_MAX;
+
+ // repeat test and report least value for consistency:
+ for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
+ {
+ int startoff;
+ matches = 0;
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ matches = 0;
+ startoff = 0;
+ exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what));
+ while(exec_result >= 0)
+ {
+ ++matches;
+ startoff = what[1];
+ exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what));
+ }
+ }
+ run = tim.elapsed();
+ result = std::min(run, result);
+ }
+ return result / iter;
+}
+
+}
+#else
+
+namespace pcr{
+
+double time_match(const std::string& re, const std::string& text, bool icase)
+{
+ return -1;
+}
+double time_find_all(const std::string& re, const std::string& text, bool icase)
+{
+ return -1;
+}
+
+}
+
+#endif
\ No newline at end of file
diff --git a/performance/time_posix.cpp b/performance/time_posix.cpp
new file mode 100644
index 00000000..cd2cec68
--- /dev/null
+++ b/performance/time_posix.cpp
@@ -0,0 +1,143 @@
+/*
+ *
+ * Copyright (c) 2002
+ * Dr John Maddock
+ *
+ * Permission to use, copy, modify, distribute and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appear in all copies and
+ * that both that copyright notice and this permission notice appear
+ * in supporting documentation. Dr John Maddock makes no representations
+ * about the suitability of this software for any purpose.
+ * It is provided "as is" without express or implied warranty.
+ *
+ */
+
+#include
+#include
+#include "regex_comparison.hpp"
+#ifdef BOOST_HAS_POSIX
+#include
+#include "regex.h"
+
+namespace posix{
+
+double time_match(const std::string& re, const std::string& text, bool icase)
+{
+ regex_t e;
+ regmatch_t what[20];
+ boost::timer tim;
+ int iter = 1;
+ int counter, repeats;
+ double result = 0;
+ double run;
+ if(0 != regcomp(&e, re.c_str(), (icase ? REG_ICASE | REG_EXTENDED : REG_EXTENDED)))
+ return -1;
+ do
+ {
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ regexec(&e, text.c_str(), e.re_nsub, what, 0);
+ }
+ result = tim.elapsed();
+ iter *= 2;
+ }while(result < 0.5);
+ iter /= 2;
+
+ // repeat test and report least value for consistency:
+ for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
+ {
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ regexec(&e, text.c_str(), e.re_nsub, what, 0);
+ }
+ run = tim.elapsed();
+ result = std::min(run, result);
+ }
+ regfree(&e);
+ return result / iter;
+}
+
+double time_find_all(const std::string& re, const std::string& text, bool icase)
+{
+ regex_t e;
+ regmatch_t what[20];
+ memset(what, 0, sizeof(what));
+ boost::timer tim;
+ int iter = 1;
+ int counter, repeats;
+ double result = 0;
+ double run;
+ int exec_result;
+ int matches;
+ if(0 != regcomp(&e, re.c_str(), (icase ? REG_ICASE | REG_EXTENDED : REG_EXTENDED)))
+ return -1;
+ do
+ {
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ what[0].rm_so = 0;
+ what[0].rm_eo = text.size();
+ matches = 0;
+ exec_result = regexec(&e, text.c_str(), 20, what, REG_STARTEND);
+ while(exec_result == 0)
+ {
+ ++matches;
+ what[0].rm_so = what[0].rm_eo;
+ what[0].rm_eo = text.size();
+ exec_result = regexec(&e, text.c_str(), 20, what, REG_STARTEND);
+ }
+ }
+ result = tim.elapsed();
+ iter *= 2;
+ }while(result < 0.5);
+ iter /= 2;
+
+ if(result >10)
+ return result / iter;
+
+ result = DBL_MAX;
+
+ // repeat test and report least value for consistency:
+ for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
+ {
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ what[0].rm_so = 0;
+ what[0].rm_eo = text.size();
+ matches = 0;
+ exec_result = regexec(&e, text.c_str(), 20, what, REG_STARTEND);
+ while(exec_result == 0)
+ {
+ ++matches;
+ what[0].rm_so = what[0].rm_eo;
+ what[0].rm_eo = text.size();
+ exec_result = regexec(&e, text.c_str(), 20, what, REG_STARTEND);
+ }
+ }
+ run = tim.elapsed();
+ result = std::min(run, result);
+ }
+ return result / iter;
+}
+
+}
+#else
+
+namespace posix{
+
+double time_match(const std::string& re, const std::string& text, bool icase)
+{
+ return -1;
+}
+double time_find_all(const std::string& re, const std::string& text, bool icase)
+{
+ return -1;
+}
+
+}
+#endif
\ No newline at end of file
diff --git a/performance/time_safe_greta.cpp b/performance/time_safe_greta.cpp
new file mode 100644
index 00000000..6c600bda
--- /dev/null
+++ b/performance/time_safe_greta.cpp
@@ -0,0 +1,127 @@
+/*
+ *
+ * Copyright (c) 2002
+ * Dr John Maddock
+ *
+ * Permission to use, copy, modify, distribute and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appear in all copies and
+ * that both that copyright notice and this permission notice appear
+ * in supporting documentation. Dr John Maddock makes no representations
+ * about the suitability of this software for any purpose.
+ * It is provided "as is" without express or implied warranty.
+ *
+ */
+
+#include "regex_comparison.hpp"
+#if defined(BOOST_HAS_GRETA)
+
+#include
+#include
+#include "regexpr2.h"
+
+namespace gs{
+
+double time_match(const std::string& re, const std::string& text, bool icase)
+{
+ regex::rpattern e(re, (icase ? regex::MULTILINE | regex::NORMALIZE | regex::NOCASE : regex::MULTILINE | regex::NORMALIZE), regex::MODE_SAFE);
+ regex::match_results what;
+ boost::timer tim;
+ int iter = 1;
+ int counter, repeats;
+ double result = 0;
+ double run;
+ assert(e.match(text, what));
+ do
+ {
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ e.match(text, what);
+ }
+ result = tim.elapsed();
+ iter *= 2;
+ }while(result < 0.5);
+ iter /= 2;
+
+ // repeat test and report least value for consistency:
+ for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
+ {
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ e.match(text, what);
+ }
+ run = tim.elapsed();
+ result = std::min(run, result);
+ }
+ return result / iter;
+}
+
+double time_find_all(const std::string& re, const std::string& text, bool icase)
+{
+ regex::rpattern e(re, (icase ? regex::MULTILINE | regex::NORMALIZE | regex::NOCASE : regex::MULTILINE | regex::NORMALIZE), regex::MODE_SAFE);
+ regex::match_results what;
+ boost::timer tim;
+ int iter = 1;
+ int counter, repeats;
+ double result = 0;
+ double run;
+ do
+ {
+ bool r;
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ e.match(text.begin(), text.end(), what);
+ while(what.backref(0).matched)
+ {
+ e.match(what.backref(0).end(), text.end(), what);
+ }
+ }
+ result = tim.elapsed();
+ iter *= 2;
+ }while(result < 0.5);
+ iter /= 2;
+
+ if(result > 10)
+ return result / iter;
+
+ // repeat test and report least value for consistency:
+ for(repeats = 0; repeats < REPEAT_COUNT; ++repeats)
+ {
+ tim.restart();
+ for(counter = 0; counter < iter; ++counter)
+ {
+ e.match(text.begin(), text.end(), what);
+ while(what.backref(0).matched)
+ {
+ e.match(what.backref(0).end(), text.end(), what);
+ }
+ }
+ run = tim.elapsed();
+ result = std::min(run, result);
+ }
+ return result / iter;
+}
+
+}
+
+#else
+
+namespace gs{
+
+double time_match(const std::string& re, const std::string& text, bool icase)
+{
+ return -1;
+}
+
+double time_find_all(const std::string& re, const std::string& text, bool icase)
+{
+ return -1;
+}
+
+}
+
+#endif
+
diff --git a/performance/vc71-results.htm b/performance/vc71-results.htm
new file mode 100644
index 00000000..7a8ff40b
--- /dev/null
+++ b/performance/vc71-results.htm
@@ -0,0 +1,671 @@
+
+
+ Regular Expression Performance Comparison
+
+
+
+
+
+
+ Regular Expression Performance Comparison
+ The Boost and GRETA regular expression libraries have slightly different
+ interfaces, and it has been suggested that GRETA's interface allows for a more
+ efficient implementation. The following tables provide comparisons between:
+ GRETA.
+ The Boost regex library.
+ Henry Spencer's regular expression library
+ - this is provided for comparison as a typical non-backtracking implementation.
+
+ Times were obtained on a 2.8GHz Pentium 4 PC running Windows XP, and the
+ code was compiled with Visual C++ 7.1 with all optimisations turned on. As ever
+ care should be taken in interpreting the results, only sensible regular
+ expressions (rather than pathological cases) are given, most are taken from the
+ Boost regex examples, or from the Library of
+ Regular Expressions. In addition, some variation in the relative
+ performance of these libraries can be expected on other machines - as memory
+ access and processor caching effects can be quite large for most finite state
+ machine algorithms.
+ Comparison 1: Long Search
+ For each of the following regular expressions the time taken to find all
+ occurrences of the expression within a long English language text was measured
+ (mtent12.txt
+ from Project Gutenberg, 19Mb).
+
+
+ Expression |
+ GRETA |
+ GRETA
+ (non-recursive mode) |
+ Boost |
+ Boost + C++ locale |
+ POSIX |
+ PCRE |
+
+
+ Twain |
+ 9.29
+ (0.00309s) |
+ 32.9
+ (0.011s) |
+ 1.34
+ (0.000445s) |
+ 1.37
+ (0.000455s) |
+ 6.23
+ (0.00207s) |
+ 1
+ (0.000333s) |
+
+
+ Huck[[:alpha:]]+ |
+ 12.9
+ (0.00309s) |
+ 44.4
+ (0.0106s) |
+ 1.79
+ (0.00043s) |
+ 1.82
+ (0.000436s) |
+ 1
+ (0.00024s) |
+ 1.06
+ (0.000254s) |
+
+
+ [[:alpha:]]+ing |
+ 7.6
+ (0.0178s) |
+ 15.2
+ (0.0357s) |
+ 1
+ (0.00235s) |
+ 0.867
+ (0.00204s) |
+ 4.26
+ (0.01s) |
+ 6
+ (0.0141s) |
+
+
+ ^[^ ]*?Twain |
+ 5.92
+ (0.00626s) |
+ 16.3
+ (0.0172s) |
+ 1
+ (0.00106s) |
+ 0.666
+ (0.000704s) |
+ NA |
+ 2.04
+ (0.00215s) |
+
+
+ Tom|Sawyer|Huckleberry|Finn |
+ 8
+ (0.00828s) |
+ 20
+ (0.0207s) |
+ 1
+ (0.00104s) |
+ 0.585
+ (0.000605s) |
+ 42.3
+ (0.0438s) |
+ 1.12
+ (0.00115s) |
+
+
+ (Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn) |
+ 6.42
+ (0.012s) |
+ 16.3
+ (0.0307s) |
+ 1
+ (0.00188s) |
+ 0.719
+ (0.00135s) |
+ NA |
+ 1.21
+ (0.00227s) |
+
+
+
+ Comparison 2: Medium Sized Search
+ For each of the following regular expressions the time taken to find all
+ occurrences of the expression within a medium sized English language text was
+ measured (the first 50K from mtent12.txt).
+
+
+ Expression |
+ GRETA |
+ GRETA
+ (non-recursive mode) |
+ Boost |
+ Boost + C++ locale |
+ POSIX |
+ PCRE |
+
+
+ Twain |
+ 9.29
+ (0.00309s) |
+ 32.5
+ (0.0108s) |
+ 1.34
+ (0.000445s) |
+ 1.37
+ (0.000455s) |
+ 6.24
+ (0.00207s) |
+ 1
+ (0.000333s) |
+
+
+ Huck[[:alpha:]]+ |
+ 12.9
+ (0.00309s) |
+ 47
+ (0.0113s) |
+ 1.77
+ (0.000425s) |
+ 1.84
+ (0.00044s) |
+ 1
+ (0.00024s) |
+ 1.04
+ (0.00025s) |
+
+
+ [[:alpha:]]+ing |
+ 7.61
+ (0.0178s) |
+ 15.2
+ (0.0356s) |
+ 1
+ (0.00234s) |
+ 0.867
+ (0.00203s) |
+ 4.27
+ (0.01s) |
+ 5.94
+ (0.0139s) |
+
+
+ ^[^ ]*?Twain |
+ 5.72
+ (0.00626s) |
+ 15.5
+ (0.0169s) |
+ 1
+ (0.00109s) |
+ 0.644
+ (0.000704s) |
+ NA |
+ 1.93
+ (0.00211s) |
+
+
+ Tom|Sawyer|Huckleberry|Finn |
+ 7.85
+ (0.00828s) |
+ 19.9
+ (0.021s) |
+ 1
+ (0.00105s) |
+ 0.575
+ (0.000606s) |
+ 41.5
+ (0.0438s) |
+ 1.09
+ (0.00115s) |
+
+
+ (Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn) |
+ 6.49
+ (0.012s) |
+ 16.5
+ (0.0307s) |
+ 1
+ (0.00186s) |
+ 0.737
+ (0.00137s) |
+ 216
+ (0.401s) |
+ 1.24
+ (0.00231s) |
+
+
+
+ Comparison 3: C++ Code Search
+ For each of the following regular expressions the time taken to find all
+ occurrences of the expression within the C++ source file
+ boost/crc.hpp was measured.
+
+
+ Expression |
+ GRETA |
+ GRETA
+ (non-recursive mode) |
+ Boost |
+ Boost + C++ locale |
+ POSIX |
+ PCRE |
+
+
+ ^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?(class|struct)[[:space:]]*(\<\w+\>([
+ ]*\([^)]*\))?[[:space:]]*)*(\<\w*\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?(\{|:[^;\{()]*\{) |
+ 9.58
+ (0.0019s) |
+ 40.3
+ (0.00798s) |
+ 1
+ (0.000198s) |
+ 0.901
+ (0.000178s) |
+ 607
+ (0.12s) |
+ 3.16
+ (0.000626s) |
+
+
+ (^[
+ ]*#(?:[^\\\n]|\\[^\n_[:punct:][:alnum:]]*[\n[:punct:][:word:]])*)|(//[^\n]*|/\*.*?\*/)|\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\>|('(?:[^\\']|\\.)*'|"(?:[^\\"]|\\.)*")|\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned|using|virtual|void|volatile|wchar_t|while)\> |
+ 1
+ (0.0116s) |
+ 2.7
+ (0.0313s) |
+ 1.22
+ (0.0141s) |
+ 0.946
+ (0.011s) |
+ NA |
+ 1.41
+ (0.0163s) |
+
+
+ ^[ ]*#[ ]*include[ ]+("[^"]+"|<[^>]+>) |
+ 6.05
+ (0.00195s) |
+ 25.7
+ (0.0083s) |
+ 1
+ (0.000323s) |
+ 0.939
+ (0.000303s) |
+ 107
+ (0.0344s) |
+ 1.69
+ (0.000547s) |
+
+
+ ^[ ]*#[ ]*include[ ]+("boost/[^"]+"|]+>) |
+ 5.8
+ (0.00196s) |
+ 24.6
+ (0.0083s) |
+ 1
+ (0.000337s) |
+ 1.07
+ (0.000362s) |
+ 122
+ (0.0413s) |
+ 1.59
+ (0.000538s) |
+
+
+
+
+ Comparison 4: HTML Document Search
+
+ For each of the following regular expressions the time taken to find all
+ occurrences of the expression within the html file libs/libraries.htm
+ was measured.
+
+
+ Expression |
+ GRETA |
+ GRETA
+ (non-recursive mode) |
+ Boost |
+ Boost + C++ locale |
+ POSIX |
+ PCRE |
+
+
+ beman|john|dave |
+ 6.69
+ (0.00321s) |
+ 18.9
+ (0.00908s) |
+ 1
+ (0.000479s) |
+ 0.561
+ (0.000269s) |
+ 23.8
+ (0.0114s) |
+ 1
+ (0.000479s) |
+
+
+ .*?
+
+ |
+ 5.89
+ (0.00164s) |
+ 19.6
+ (0.00548s) |
+ 1
+ (0.000279s) |
+ 1.05
+ (0.000293s) |
+ NA |
+ 1.11
+ (0.000308s) |
+
+
+ ]+href=("[^"]*"|[^[:space:]]+)[^>]*> |
+ 3.94
+ (0.00219s) |
+ 10.4
+ (0.00579s) |
+ 1.09
+ (0.000606s) |
+ 0.825
+ (0.000459s) |
+ 221
+ (0.123s) |
+ 1
+ (0.000557s) |
+
+
+ ]*>.*? |
+ 6.07
+ (0.0016s) |
+ 19.8
+ (0.00524s) |
+ 1.37
+ (0.000362s) |
+ 0.722
+ (0.000191s) |
+ NA |
+ 1
+ (0.000264s) |
+
+
+ ]+src=("[^"]*"|[^[:space:]]+)[^>]*>
|
+ 6.77
+ (0.00162s) |
+ 22.5
+ (0.0054s) |
+ 1.1
+ (0.000264s) |
+ 1.2
+ (0.000289s) |
+ 120
+ (0.0288s) |
+ 1
+ (0.00024s) |
+
+
+ ]+face=("[^"]*"|[^[:space:]]+)[^>]*>.*? |
+ 6.77
+ (0.00162s) |
+ 22.5
+ (0.0054s) |
+ 1.12
+ (0.000269s) |
+ 1.2
+ (0.000289s) |
+ NA |
+ 1
+ (0.00024s) |
+
+
+
+ Comparison 3: Simple Matches
+
+ For each of the following regular expressions the time taken to match against
+ the text indicated was measured.
+
+
+ Expression |
+ Text |
+ GRETA |
+ GRETA
+ (non-recursive mode) |
+ Boost |
+ Boost + C++ locale |
+ POSIX |
+ PCRE |
+
+
+ abc |
+ abc |
+ 1.43
+ (2.25e-007s) |
+ 1.85
+ (2.91e-007s) |
+ 1.27
+ (2.01e-007s) |
+ 1.29
+ (2.03e-007s) |
+ 1.94
+ (3.06e-007s) |
+ 1
+ (1.58e-007s) |
+
+
+ ^([0-9]+)(\-| |$)(.*)$ |
+ 100- this is a line of ftp response which contains a message string |
+ 1
+ (6.97e-007s) |
+ 2.69
+ (1.87e-006s) |
+ 1.89
+ (1.32e-006s) |
+ 1.86
+ (1.3e-006s) |
+ 298
+ (0.000208s) |
+ 1.29
+ (8.98e-007s) |
+
+
+ ([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4} |
+ 1234-5678-1234-456 |
+ 2.41
+ (2.14e-006s) |
+ 2.97
+ (2.64e-006s) |
+ 2.37
+ (2.1e-006s) |
+ 2.24
+ (1.99e-006s) |
+ 29.6
+ (2.63e-005s) |
+ 1
+ (8.88e-007s) |
+
+
+ ^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$ |
+ john_maddock@compuserve.com |
+ 1.83
+ (3.7e-006s) |
+ 2.34
+ (4.74e-006s) |
+ 1.59
+ (3.21e-006s) |
+ 1.55
+ (3.13e-006s) |
+ 172
+ (0.000347s) |
+ 1
+ (2.02e-006s) |
+
+
+ ^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$ |
+ foo12@foo.edu |
+ 1.71
+ (3.09e-006s) |
+ 2.19
+ (3.97e-006s) |
+ 1.71
+ (3.09e-006s) |
+ 1.64
+ (2.98e-006s) |
+ 123
+ (0.000222s) |
+ 1
+ (1.81e-006s) |
+
+
+ ^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$ |
+ bob.smith@foo.tv |
+ 1.72
+ (3.09e-006s) |
+ 2.21
+ (3.97e-006s) |
+ 1.72
+ (3.09e-006s) |
+ 1.7
+ (3.06e-006s) |
+ 133
+ (0.00024s) |
+ 1
+ (1.79e-006s) |
+
+
+ ^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$ |
+ EH10 2QQ |
+ 1.29
+ (9.37e-007s) |
+ 1.71
+ (1.24e-006s) |
+ 1.29
+ (9.35e-007s) |
+ 1.18
+ (8.59e-007s) |
+ 7.79
+ (5.65e-006s) |
+ 1
+ (7.26e-007s) |
+
+
+ ^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$ |
+ G1 1AA |
+ 1.41
+ (9.26e-007s) |
+ 2
+ (1.32e-006s) |
+ 1.38
+ (9.07e-007s) |
+ 1.31
+ (8.6e-007s) |
+ 7.41
+ (4.88e-006s) |
+ 1
+ (6.59e-007s) |
+
+
+ ^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$ |
+ SW1 1ZZ |
+ 1.45
+ (9.54e-007s) |
+ 1.88
+ (1.24e-006s) |
+ 1.42
+ (9.36e-007s) |
+ 1.32
+ (8.69e-007s) |
+ 7.77
+ (5.12e-006s) |
+ 1
+ (6.59e-007s) |
+
+
+ ^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$ |
+ 4/1/2001 |
+ 1.74
+ (1.01e-006s) |
+ 2.3
+ (1.34e-006s) |
+ 1.33
+ (7.73e-007s) |
+ 1.3
+ (7.54e-007s) |
+ 9.85
+ (5.73e-006s) |
+ 1
+ (5.82e-007s) |
+
+
+ ^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$ |
+ 12/12/2001 |
+ 1.77
+ (1.01e-006s) |
+ 2.23
+ (1.28e-006s) |
+ 1.45
+ (8.31e-007s) |
+ 1.38
+ (7.93e-007s) |
+ 11.1
+ (6.34e-006s) |
+ 1
+ (5.73e-007s) |
+
+
+ ^[-+]?[[:digit:]]*\.?[[:digit:]]*$ |
+ 123 |
+ 1.23
+ (7.65e-007s) |
+ 1.66
+ (1.03e-006s) |
+ 1.4
+ (8.69e-007s) |
+ 1.31
+ (8.12e-007s) |
+ 4.86
+ (3.02e-006s) |
+ 1
+ (6.21e-007s) |
+
+
+ ^[-+]?[[:digit:]]*\.?[[:digit:]]*$ |
+ +3.14159 |
+ 1.59
+ (1.05e-006s) |
+ 1.97
+ (1.3e-006s) |
+ 1.45
+ (9.54e-007s) |
+ 1.32
+ (8.69e-007s) |
+ 9.51
+ (6.26e-006s) |
+ 1
+ (6.59e-007s) |
+
+
+ ^[-+]?[[:digit:]]*\.?[[:digit:]]*$ |
+ -3.14159 |
+ 1.64
+ (1.07e-006s) |
+ 2
+ (1.3e-006s) |
+ 1.44
+ (9.35e-007s) |
+ 1.35
+ (8.78e-007s) |
+ 9.53
+ (6.19e-006s) |
+ 1
+ (6.49e-007s) |
+
+
+
+
+ Copyright John Maddock September 2002, all rights reserved.
+
+