diff --git a/build/Jamfile b/build/Jamfile index ca4e7cbf..81199be6 100644 --- a/build/Jamfile +++ b/build/Jamfile @@ -9,6 +9,8 @@ lib boost_regex : ../src/$(SOURCES).cpp $(BOOST_ROOT) BOOST_REGEX_NO_LIB=1 BOOST_REGEX_STATIC_LINK=1 + multi + multi : debug release ; @@ -23,12 +25,77 @@ dll boost_regex : ../src/$(SOURCES).cpp ; +rule boost-regex-stage-tag ( toolset variant : properties * ) +{ + local lib-thread-opt = s ; + if multi in $(properties) + { + lib-thread-opt = m ; + } + + local lib-rt-opt = s ; + if dynamic in $(properties) + { + lib-rt-opt = d ; + } + + local lib-link-opt = s ; + if DLL in $(properties) + { + lib-link-opt = i ; + } + + local lib-debug-opt = "" ; + if [ MATCH .*(debug).* : $(variant) ] + { + lib-debug-opt = d ; + } + + local lib-toolset = $(toolset) ; + + local warning-var = regex.$(toolset)-warning-issued ; + local warning ; + + switch $(toolset) + { + + case borland : + if ! ( BORLAND_VERSION) in 4 5 6 ) + { + BORLAND_VERSION = 5 ; # chose default version + warning = "BORLAND_VERSION not set to 4, 5, or 6 + staged Boost.Regex library will be named appropriately for version" $(BORLAND_VERSION) ; + } + lib-toolset = bcb$(BORLAND_VERSION) ; + + case msvc : + warning = "msvc toolset builds Boost.Regex library for vc6; use vc7 or vc7.1 toolsets for other versions" ; + lib-toolset = vc6 ; + + case msvc-stlport : + warning = "msvc-stlport toolset only builds Boost.Regex library for use with vc6" ; + lib-toolset = vc6-stlport ; + if debug in $(properties) + { + lib-debug-opt = dd ; + } + } + + if $(warning) && ! $($(warning-var)) + { + ECHO Warning: $(warning) ; + $(warning-var) = issued ; + } + return $(properties) <$(variant)>_$(lib-toolset)_$(lib-thread-opt)$(lib-rt-opt)$(lib-link-opt)$(lib-debug-opt) ; +} + stage bin-stage : boost_regex boost_regex : - "_debug" + boost-regex-stage-tag : debug release ; + diff --git a/doc/Attic/configuration.html b/doc/Attic/configuration.html new file mode 100644 index 00000000..7cb8dfa8 --- /dev/null +++ b/doc/Attic/configuration.html @@ -0,0 +1,163 @@ + + + + Boost.Regex: Configuration and setup + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Configuration and setup

+
+

Boost.Regex Index

+
+

+
+

Contents

+
+
Compiler setup
+
Locale and traits class selection
+
Linkage Options
+
Algorithm Selection
+
Algorithm Tuning
+
+

Compiler setup.

+

You shouldn't need to do anything special to configure boost.regex for use with + your compiler - the boost.config subsystem + should already take care of it, if you do have problems (or you are using a + particularly obscure compiler or platform) then boost.config has + a configure script.

+

Locale and traits class selection.

+

The following macros (see user.hpp) + control how boost.regex interacts with the users locale:

+

+ + + + + + + + + + + + + +
BOOST_REGEX_USE_C_LOCALEForces boost.regex to use the global C locale in it's traits class support: + this is the default behaviour on non-windows platforms, but MS Windows + platforms normally use the Win32 API for locale support.
BOOST_REGEX_USE_CPP_LOCALEForces boost.regex to use std::locale in it's default traits class, regular + expressions can then be imbued with an instance specific locale.
BOOST_REGEX_NO_W32Tells boost.regex not to use any Win32 API's even when available (implies + BOOST_REGEX_USE_C_LOCALE unless BOOST_REGEX_USE_CPP_LOCALE is set).
+

+

Linkage Options

+

+ + + + + + + + + +
BOOST_REGEX_STATIC_LINKFor Microsoft and Borland C++ builds, this tells boost.regex that it is going + to be linked to a static library even when using a dynamic C runtime.
BOOST_REGEX_NO_LIBFor Microsoft and Borland C++ builds, this tells boost.regex that it should + not automatically select the library to link to.
+

+

Algorithm Selection

+

+ + + + + + + + + + + + + +
BOOST_REGEX_V3Tells boost.regex to use the boost-1.30.0 matching algorithm, define only if + you need maximum compatibility with previous behaviour.
BOOST_REGEX_RECURSIVETells boost.regex to use a stack-recursive matching algorithm.  This is + generally the fastest option (although there is very little in it), but can + cause stack overflow in extreme cases, on Win32 this can be handled safely, but + this is not the case on other platforms.
BOOST_REGEX_NON_RECURSIVETells boost.regex to use a non-stack recursive matching algorithm, this can be + slightly slower than the alternative, but is always safe no matter how + pathological the regular expression.  This is the default on non-Win32 + platforms.
+

+

Algorithm Tuning

+

The following option applies only if BOOST_REGEX_RECURSIVE is set.

+

+ + + + + +
BOOST_REGEX_HAS_MS_STACK_GUARDTells boost.regex that Microsoft style __try - __except blocks are supported, + and can be used to safely trap stack overflow.
+

+

The following options apply only if BOOST_REGEX_NON_RECURSIVE is set.

+

+ + + + + + + + + + + + + +
BOOST_REGEX_BLOCKSIZEIn non-recursive mode, boost.regex uses largish blocks of memory to act as a + stack for the state machine, the larger the block size then the fewer + allocations that will take place.  This defaults to 4096 bytes, which is + large enough to match the vast majority of regular expressions without + further allocations, however, you can choose smaller or larger values depending + upon your platforms characteristics.
BOOST_REGEX_MAX_BLOCKSTells boost.regex how many blocks of size BOOST_REGEX_BLOCKSIZE it is + permitted to use.  If this value is exceeded then boost.regex will stop + trying to find a match and throw a std::runtime_error.  Defaults to 1024, + don't forget to tweek this value if you alter BOOST_REGEX_BLOCKSIZE by much.
BOOST_REGEX_MAX_CACHE_BLOCKSTells boost.regex how many memory blocks to store in it's internal cache - + memory blocks are taken from this cache rather than by calling ::operator + new.  Generally speeking this can be an order of magnitude faster than + calling ::opertator new each time a memory block is required, but has the + downside that boost.regex can end up caching a large chunk of memory (by + default up to 16 blocks each of BOOST_REGEX_BLOCKSIZE size).  If memory is + tight then try defining this to 0 (disables all caching), or if that is too + slow, then a value of 1 or 2, may be sufficient.  On the other hand, on + large multi-processor, multi-threaded systems, you may find that a higher value + is in order.
+

+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/regex_iterator.html b/doc/Attic/regex_iterator.html new file mode 100644 index 00000000..95376f4a --- /dev/null +++ b/doc/Attic/regex_iterator.html @@ -0,0 +1,370 @@ + + + + Boost.Regex: regex_iterator + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

regex_iterator

+
+

Boost.Regex Index

+
+

+
+

Contents

+
+
Synopsis
Description
+ Examples
+

Synopsis

+

The iterator type regex_iterator will enumerate all of the regular expression + matches found in some sequence: dereferencing a regex_iterator yields a + reference to a match_results object.

+
+template <class BidirectionalIterator, 
+          class charT = iterator_traits<BidirectionalIterator>::value_type,
+          class traits = regex_traits<charT>,
+          class Allocator = allocator<charT> >
+class regex_iterator 
+{
+public:
+   typedef          basic_regex<charT, traits, Allocator>                   regex_type;
+   typedef          match_results<BidirectionalIterator>                    value_type;
+   typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
+   typedef          const value_type*                                       pointer;
+   typedef          const value_type&                                       reference;
+   typedef          std::forward_iterator_tag                               iterator_category;
+   
+   regex_iterator();
+   regex_iterator(BidirectionalIterator a, BidirectionalIterator b, 
+                  const regex_type& re, 
+                  match_flag_type m = match_default);
+   regex_iterator(const regex_iterator&);
+   regex_iterator& operator=(const regex_iterator&);
+   bool operator==(const regex_iterator&);
+   bool operator!=(const regex_iterator&);
+   const value_type& operator*();
+   const value_type* operator->();
+   regex_iterator& operator++();
+   regex_iterator operator++(int);
+};
+
+
+

Description

+

A regex_iterator is constructed from a pair of iterators, and enumerates all + occurances of a regular expression within that iterator range.

+
regex_iterator();
+ +

+ Effects: constructs an end of sequence regex_iterator.

regex_iterator(BidirectionalIterator a, BidirectionalIterator b, 
+               const regex_type& re, 
+               match_flag_type m = match_default);
+ +

+ Effects: constructs a regex_iterator that will enumerate all occurances + of the expression re, within the sequence [a,b), and found + using match flags m.  The object re must exist for the + lifetime of the regex_iterator.

regex_iterator(const regex_iterator& that);
+ +

+ Effects: constructs a copy of that.

+

+ Postconditions: *this == that.

regex_iterator& operator=(const regex_iterator&);
+ +

+ Effects: sets *this equal to those in that.

+

+ Postconditions: *this == that.

bool operator==(const regex_iterator& that);
+ +

+ Effects: returns true if *this is equal to that.

bool operator!=(const regex_iterator&);
+ +

+ Effects: returns !(*this == that).

+
RE.8.1.1 regex_iterator dereference
+
const value_type& operator*();
+ +

+ Effects: dereferencing a regex_iterator object it yields a + const reference to a match_results object, + whose members are set as follows:

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value

+

(*it).size()

+
+

re.mark_count()

+
+

(*it).empty()

+
+

false

+
+

(*it).prefix().first

+
+

The end of the last match found, or the start of the underlying sequence if + this is the first match enumerated

+
+

(*it).prefix().last

+
+

(*it)[0].first

+
+

(*it).prefix().matched

+
+

(*it).prefix().first != (*it).prefix().second

+
+

(*it).suffix().first

+
+

(*it)[0].second

+
+

(*it).suffix().last

+
+

The end of the underlying sequence.

+
+

(*it).suffix().matched

+
+

(*it).suffix().first != (*it).suffix().second

+
+

(*it)[0].first

+
+

The start of the sequence of characters that matched the regular expression

+
+

(*it)[0].second

+
+

The end of the sequence of characters that matched the regular expression

+
+

(*it)[0].matched

+
+

+ true if a full match was found, and false if it was a + partial match (found as a result of the match_partial flag being + set).

+

(*it)[n].first

+
+

For all integers n < (*it).size(), the start of the sequence that matched + sub-expression n. Alternatively, if sub-expression n did not participate + in the match, then last.

+
+

(*it)[n].second

+
+

For all integers n < (*it).size(), the end of the sequence that matched + sub-expression n. Alternatively, if sub-expression n did not participate + in the match, then last.

+
+

(*it)[n].matched

+
+

For all integers n < (*it).size(), true if sub-expression n participated + in the match, false otherwise.

+

+
(*it).position(n)For all integers n < (*it).size(), then the + distance from the start of the underlying sequence to the start of + sub-expression match n.

const value_type* operator->();
+ +

+ Effects: returns &(*this).

regex_iterator& operator++();
+

Effects: moves the iterator to the next match in the + underlying sequence, or the end of sequence iterator if none if found. +  When the last match found matched a zero length string, then the + regex_iterator will find the next match as follows: if there exists a non-zero + length match that starts at the same location as the last one, then returns it, + otherwise starts looking for the next (possibly zero length) match from one + position to the right of the last match.

+ +

+ Returns: *this.

regex_iterator operator++(int);
+ +

+ Effects: constructs a copy result of *this, + then calls ++(*this).

+

+ Returns: result.

+

Examples

+

The following example + takes a C++ source file and builds up an index of class names, and the location + of that class in the file.

+
+#include <string>
+#include <map>
+#include <fstream>
+#include <iostream>
+#include <boost/regex.hpp>
+
+using namespace std;
+
+// purpose:
+// takes the contents of a file in the form of a string
+// and searches for all the C++ class definitions, storing
+// their locations in a map of strings/int's
+
+typedef std::map<std::string, std::string::difference_type, std::less<std::string> > map_type;
+
+const char* re = 
+   // possibly leading whitespace:   
+   "^[[:space:]]*" 
+   // possible template declaration:
+   "(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
+   // class or struct:
+   "(class|struct)[[:space:]]*" 
+   // leading declspec macros etc:
+   "("
+      "\\<\\w+\\>"
+      "("
+         "[[:blank:]]*\\([^)]*\\)"
+      ")?"
+      "[[:space:]]*"
+   ")*" 
+   // the class name
+   "(\\<\\w*\\>)[[:space:]]*" 
+   // template specialisation parameters
+   "(<[^;:{]+>)?[[:space:]]*"
+   // terminate in { or :
+   "(\\{|:[^;\\{()]*\\{)";
+
+
+boost::regex expression(re);
+map_type class_index;
+
+bool regex_callback(const boost::match_results<std::string::const_iterator>& what)
+{
+   // what[0] contains the whole string
+   // what[5] contains the class name.
+   // what[6] contains the template specialisation if any.
+   // add class name and position to map:
+   class_index[what[5].str() + what[6].str()] = what.position(5);
+   return true;
+}
+
+void load_file(std::string& s, std::istream& is)
+{
+   s.erase();
+   s.reserve(is.rdbuf()->in_avail());
+   char c;
+   while(is.get(c))
+   {
+      if(s.capacity() == s.size())
+         s.reserve(s.capacity() * 3);
+      s.append(1, c);
+   }
+}
+
+int main(int argc, const char** argv)
+{
+   std::string text;
+   for(int i = 1; i < argc; ++i)
+   {
+      cout << "Processing file " << argv[i] << endl;
+      std::ifstream fs(argv[i]);
+      load_file(text, fs);
+      // construct our iterators:
+      boost::regex_iterator<std::string::const_iterator> m1(text.begin(), text.end(), expression);
+      boost::regex_iterator<std::string::const_iterator> m2;
+      std::for_each(m1, m2, ®ex_callback);
+      // copy results:
+      cout << class_index.size() << " matches found" << endl;
+      map_type::iterator c, d;
+      c = class_index.begin();
+      d = class_index.end();
+      while(c != d)
+      {
+         cout << "class \"" << (*c).first << "\" found at index: " << (*c).second << endl;
+         ++c;
+      }
+      class_index.erase(class_index.begin(), class_index.end());
+   }
+   return 0;
+}
+
+
+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/regex_replace.html b/doc/Attic/regex_replace.html index aed7b8fa..ff45afa3 100644 --- a/doc/Attic/regex_replace.html +++ b/doc/Attic/regex_replace.html @@ -85,8 +85,8 @@ basic_string<charT> regex_replace(const basic_string<charT>& s, calls regex_replace(back_inserter(result), s.begin(), s.end(), e, fmt, flags), and then returns result.

Examples

-

The following example takes - C/C++ source code as input, and outputs syntax highlighted HTML code.

+

The following example + takes C/C++ source code as input, and outputs syntax highlighted HTML code.

#include <fstream>
 #include <sstream>
@@ -139,12 +139,14 @@ boost::regex e1, e2;
       // temporary string stream
       std::ostringstream t(std::ios::out | std::ios::binary);
       std::ostream_iterator<char, char> oi(t);
-      boost::regex_replace(oi, in.begin(), in.end(), e2, pre_format);
+      boost::regex_replace(oi, in.begin(), in.end(),
+      e2, pre_format, boost::match_default | boost::format_all);
       // then output to final output stream
       // adding syntax highlighting:
       std::string s(t.str());
       std::ostream_iterator<char, char> out(os);
-      boost::regex_replace(out, s.begin(), s.end(), e1, format_string);
+      boost::regex_replace(out, s.begin(), s.end(),
+      e1, format_string, boost::match_default | boost::format_all);
       os << footer_text;
    }
    }
diff --git a/doc/Attic/regex_token_iterator.html b/doc/Attic/regex_token_iterator.html
new file mode 100644
index 00000000..0a2796e9
--- /dev/null
+++ b/doc/Attic/regex_token_iterator.html
@@ -0,0 +1,279 @@
+
+
+   
+      Boost.Regex: regex_token_iterator
+      
+      
+   
+      

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

regex_token_iterator

+
+

Boost.Regex Index

+
+

+
+

Contents

+
+
Synopsis
Description
+ Examples
+

Synopsis

+

The template class regex_token_iterator is an iterator adapter; + that is to say it represents a new view of an existing iterator sequence, by + enumerating all the occurrences of a regular expression within that sequence, + and presenting one or more new strings for each match found. Each position + enumerated by the iterator is a string that represents what matched a + particular sub-expression within the regular expression. When class regex_token_iterator + is used to enumerate a single sub-expression with index -1, then the iterator + performs field splitting: that is to say it enumerates one string for each + section of the character container sequence that does not match the regular + expression specified.

+
+template <class BidirectionalIterator, 
+          class charT = iterator_traits<BidirectionalIterator>::value_type,
+          class traits = regex_traits<charT>,
+          class Allocator = allocator<charT> >
+class regex_token_iterator 
+{
+public:
+   typedef          basic_regex<charT, traits, Allocator>                   regex_type;
+   typedef          basic_string<charT>                                     value_type;
+   typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
+   typedef          const value_type*                                       pointer;
+   typedef          const value_type&                                       reference;
+   typedef          std::forward_iterator_tag                               iterator_category;
+   
+   regex_token_iterator();
+   regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, 
+                        int submatch = 0, match_flag_type m = match_default);
+   regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, 
+                        const std::vector<int>& submatches, match_flag_type m = match_default);
+   template <std::size_t N>
+   regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, 
+                        const int (&submatches)[N], match_flag_type m = match_default);
+   regex_token_iterator(const regex_token_iterator&);
+   regex_token_iterator& operator=(const regex_token_iterator&);
+   bool operator==(const regex_token_iterator&);
+   bool operator!=(const regex_token_iterator&);
+   const value_type& operator*();
+   const value_type* operator->();
+   regex_token_iterator& operator++();
+   regex_token_iterator operator++(int);
+};
+
+

Description

+
regex_token_iterator();
+ +

+ Effects: constructs an end of sequence iterator.

regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, 
+                     int submatch = 0, match_flag_type m = match_default);
+ +

+ Preconditions: !re.empty().

+

+ Effects: constructs a regex_token_iterator that will enumerate one + string for each regular expression match of the expression re found + within the sequence [a,b), using match flags m.  The + string enumerated is the sub-expression submatch for each match + found; if submatch is -1, then enumerates all the text sequences that + did not match the expression re (that is to performs field + splitting).

regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, 
+                     const std::vector<int>& submatches, match_flag_type m = match_default);
+ +

+ Preconditions: submatches.size() && !re.empty().

+

+ Effects: constructs a regex_token_iterator that will enumerate submatches.size() + strings for each regular expression match of the expression re found + within the sequence [a,b), using match flags m.  For + each match found one string will be enumerated for each sub-expression + index contained within submatches vector; if submatches[0] + is -1, then the first string enumerated for each match will be all of the text + from end of the last match to the start of the current match, in addition there + will be one extra string enumerated when no more matches can be found: from the + end of the last match found, to the end of the underlying sequence.

template <std::size_t N>
+regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, 
+                     const int (&submatches)[R], match_flag_type m = match_default);
+ +

+ Preconditions: !re.empty().

+

Effects: constructs a regex_token_iterator that will + enumerate R strings for each regular expression match of the + expression re found within the sequence [a,b), using match + flags m.  For each match found one string will be + enumerated for each sub-expression index contained within the submatches + array; if submatches[0] is -1, then the first string enumerated + for each match will be all of the text from end of the last match to the start + of the current match, in addition there will be one extra string enumerated + when no more matches can be found: from the end of the last match found, to the + end of the underlying sequence.

+
regex_token_iterator(const regex_token_iterator& that);
+ +

+ Effects: constructs a copy of that.

+

+ Postconditions: *this == that.

regex_token_iterator& operator=(const regex_token_iterator& that);
+ +

+ Effects: sets *this to be equal to that.

+

+ Postconditions: *this == that.

bool operator==(const regex_token_iterator&);
+ +

+ Effects: returns true if *this is the same position as that.

bool operator!=(const regex_token_iterator&);
+ +

+ Effects: returns !(*this == that).

const value_type& operator*();
+ +

+ Effects: returns the current string being enumerated.

const value_type* operator->();
+ +

+ Effects: returns &(*this).

regex_token_iterator& operator++();
+ +

+ Effects: Moves on to the next string to be enumerated.

+

+ Returns: *this.

regex_token_iterator& operator++(int);
+ +

+ Effects: constructs a copy result of *this, + then calls ++(*this).

+

+ Returns: result. +

Examples

+

The following example + takes a string and splits it into a series of tokens:

+
+#include <iostream>
+#include <boost/regex.hpp>
+
+using namespace std;
+
+int main(int argc)
+{
+   string s;
+   do{
+      if(argc == 1)
+      {
+         cout << "Enter text to split (or \"quit\" to exit): ";
+         getline(cin, s);
+         if(s == "quit") break;
+      }
+      else
+         s = "This is a string of tokens";
+
+      boost::regex re("\\s+");
+      boost::regex_token_iterator<std::string::const_iterator> i(s.begin(), s.end(), re, -1);
+      boost::regex_token_iterator<std::string::const_iterator> j;
+
+      unsigned count = 0;
+      while(i != j)
+      {
+         cout << *i++ << endl;
+         count++;
+      }
+      cout << "There were " << count << " tokens found." << endl;
+
+   }while(argc == 1);
+   return 0;
+}
+
+
+

The following example + takes a html file and outputs a list of all the linked files:

+
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <boost/regex.hpp>
+
+boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"",
+               boost::regex::normal | boost::regbase::icase);
+
+void load_file(std::string& s, std::istream& is)
+{
+   s.erase();
+   //
+   // attempt to grow string buffer to match file size,
+   // this doesn't always work...
+   s.reserve(is.rdbuf()->in_avail());
+   char c;
+   while(is.get(c))
+   {
+      // use logarithmic growth stategy, in case
+      // in_avail (above) returned zero:
+      if(s.capacity() == s.size())
+         s.reserve(s.capacity() * 3);
+      s.append(1, c);
+   }
+}
+
+int main(int argc, char** argv)
+{
+   std::string s;
+   int i;
+   for(i = 1; i < argc; ++i)
+   {
+      std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
+      s.erase();
+      std::ifstream is(argv[i]);
+      load_file(s, is);
+      boost::regex_token_iterator<std::string::const_iterator>
+         i(s.begin(), s.end(), e, 1);
+      boost::regex_token_iterator<std::string::const_iterator> j;
+      while(i != j)
+      {
+         std::cout << *i++ << std::endl;
+      }
+   }
+   //
+   // alternative method:
+   // test the array-literal constructor, and split out the whole
+   // match as well as $1....
+   //
+   for(i = 1; i < argc; ++i)
+   {
+      std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
+      s.erase();
+      std::ifstream is(argv[i]);
+      load_file(s, is);
+      const int subs[] = {1, 0,};
+      boost::regex_token_iterator<std::string::const_iterator>
+         i(s.begin(), s.end(), e, subs);
+      boost::regex_token_iterator<std::string::const_iterator> j;
+      while(i != j)
+      {
+         std::cout << *i++ << std::endl;
+      }
+   }
+
+   return 0;
+}
+
+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/Attic/standards.html b/doc/Attic/standards.html new file mode 100644 index 00000000..5083732a --- /dev/null +++ b/doc/Attic/standards.html @@ -0,0 +1,80 @@ + + + + Boost.Regex: Standards Conformance + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Standards Conformance

+
+

Boost.Regex Index

+
+

+
+

+

C++

+

Boost.regex is intended to conform to the + regular expression standardisation proposal, which will appear in a + future C++ standard technical report (and hopefully in a future version of the + standard).  Currently there are some differences in how the regular + expression traits classes are defined, these will be fixed in a future release.

+

ECMAScript / JavaScript

+

All of the ECMAScript regular expression syntax features are supported, except + that:

+

Negated class escapes (\S, \D and \W) are not permitted inside character class + definitions ( [...] ).

+

The escape sequence \u matches any upper case character (the same as + [[:upper:]]) rather than a unicode escape sequence; use \x{DDDD} for + unicode escape sequences.

+

Perl

+

Almost all perl features are supported, except for:

+

\N{name}  Use [[:name:]] instead.

+

\pP and \PP

+

(?imsx-imsx)

+

(?<=pattern)

+

(?<!pattern)

+

(?{code})

+

(??{code})

+

(?(condition)yes-pattern) and (?(condition)yes-pattern|no-pattern)

+

These embarressments / limitations will be removed in due course, mainly + dependent upon user demand.

+

POSIX

+

All the POSIX basic and extended regular expression features are supported, + except that:

+

No character collating names are recognised except those specified in the POSIX + standard for the C locale, unless they are explicitly registered with the + traits class.

+

Character equivalence classes ( [[=a=]] etc) are probably buggy except on + Win32.  Implimenting this feature requires knowledge of the format of the + string sort keys produced by the system; if you need this, and the default + implementation doesn't work on your platfrom, then you will need to supply a + custom traits class.

+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/configuration.html b/doc/configuration.html new file mode 100644 index 00000000..7cb8dfa8 --- /dev/null +++ b/doc/configuration.html @@ -0,0 +1,163 @@ + + + + Boost.Regex: Configuration and setup + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Configuration and setup

+
+

Boost.Regex Index

+
+

+
+

Contents

+
+
Compiler setup
+
Locale and traits class selection
+
Linkage Options
+
Algorithm Selection
+
Algorithm Tuning
+
+

Compiler setup.

+

You shouldn't need to do anything special to configure boost.regex for use with + your compiler - the boost.config subsystem + should already take care of it, if you do have problems (or you are using a + particularly obscure compiler or platform) then boost.config has + a configure script.

+

Locale and traits class selection.

+

The following macros (see user.hpp) + control how boost.regex interacts with the users locale:

+

+ + + + + + + + + + + + + +
BOOST_REGEX_USE_C_LOCALEForces boost.regex to use the global C locale in it's traits class support: + this is the default behaviour on non-windows platforms, but MS Windows + platforms normally use the Win32 API for locale support.
BOOST_REGEX_USE_CPP_LOCALEForces boost.regex to use std::locale in it's default traits class, regular + expressions can then be imbued with an instance specific locale.
BOOST_REGEX_NO_W32Tells boost.regex not to use any Win32 API's even when available (implies + BOOST_REGEX_USE_C_LOCALE unless BOOST_REGEX_USE_CPP_LOCALE is set).
+

+

Linkage Options

+

+ + + + + + + + + +
BOOST_REGEX_STATIC_LINKFor Microsoft and Borland C++ builds, this tells boost.regex that it is going + to be linked to a static library even when using a dynamic C runtime.
BOOST_REGEX_NO_LIBFor Microsoft and Borland C++ builds, this tells boost.regex that it should + not automatically select the library to link to.
+

+

Algorithm Selection

+

+ + + + + + + + + + + + + +
BOOST_REGEX_V3Tells boost.regex to use the boost-1.30.0 matching algorithm, define only if + you need maximum compatibility with previous behaviour.
BOOST_REGEX_RECURSIVETells boost.regex to use a stack-recursive matching algorithm.  This is + generally the fastest option (although there is very little in it), but can + cause stack overflow in extreme cases, on Win32 this can be handled safely, but + this is not the case on other platforms.
BOOST_REGEX_NON_RECURSIVETells boost.regex to use a non-stack recursive matching algorithm, this can be + slightly slower than the alternative, but is always safe no matter how + pathological the regular expression.  This is the default on non-Win32 + platforms.
+

+

Algorithm Tuning

+

The following option applies only if BOOST_REGEX_RECURSIVE is set.

+

+ + + + + +
BOOST_REGEX_HAS_MS_STACK_GUARDTells boost.regex that Microsoft style __try - __except blocks are supported, + and can be used to safely trap stack overflow.
+

+

The following options apply only if BOOST_REGEX_NON_RECURSIVE is set.

+

+ + + + + + + + + + + + + +
BOOST_REGEX_BLOCKSIZEIn non-recursive mode, boost.regex uses largish blocks of memory to act as a + stack for the state machine, the larger the block size then the fewer + allocations that will take place.  This defaults to 4096 bytes, which is + large enough to match the vast majority of regular expressions without + further allocations, however, you can choose smaller or larger values depending + upon your platforms characteristics.
BOOST_REGEX_MAX_BLOCKSTells boost.regex how many blocks of size BOOST_REGEX_BLOCKSIZE it is + permitted to use.  If this value is exceeded then boost.regex will stop + trying to find a match and throw a std::runtime_error.  Defaults to 1024, + don't forget to tweek this value if you alter BOOST_REGEX_BLOCKSIZE by much.
BOOST_REGEX_MAX_CACHE_BLOCKSTells boost.regex how many memory blocks to store in it's internal cache - + memory blocks are taken from this cache rather than by calling ::operator + new.  Generally speeking this can be an order of magnitude faster than + calling ::opertator new each time a memory block is required, but has the + downside that boost.regex can end up caching a large chunk of memory (by + default up to 16 blocks each of BOOST_REGEX_BLOCKSIZE size).  If memory is + tight then try defining this to 0 (disables all caching), or if that is too + slow, then a value of 1 or 2, may be sufficient.  On the other hand, on + large multi-processor, multi-threaded systems, you may find that a higher value + is in order.
+

+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/index.html b/doc/index.html index 504d335d..d6a883a3 100644 --- a/doc/index.html +++ b/doc/index.html @@ -27,7 +27,9 @@

Contents

-
Overview
Installation
+
Overview
+
Configuration and setup
+
Installation
Borland C++ Builder
@@ -61,6 +63,7 @@
regex_iterator
+
regex_token_iterator
Misc.
@@ -68,7 +71,7 @@
POSIX API Compatibility Functions
Partial matches
-
Regular Expression Syntax
+
Regular Expression Syntax
Format String Syntax
@@ -95,6 +98,7 @@
Examples
Headers
Redistributables and Library Names
+
Standards Conformance
History
Contacts and Acknowledgements
@@ -117,3 +121,4 @@ for any purpose. It is provided "as is" without express or implied warranty.

+ diff --git a/doc/regex_iterator.html b/doc/regex_iterator.html new file mode 100644 index 00000000..95376f4a --- /dev/null +++ b/doc/regex_iterator.html @@ -0,0 +1,370 @@ + + + + Boost.Regex: regex_iterator + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

regex_iterator

+
+

Boost.Regex Index

+
+

+
+

Contents

+
+
Synopsis
Description
+ Examples
+

Synopsis

+

The iterator type regex_iterator will enumerate all of the regular expression + matches found in some sequence: dereferencing a regex_iterator yields a + reference to a match_results object.

+
+template <class BidirectionalIterator, 
+          class charT = iterator_traits<BidirectionalIterator>::value_type,
+          class traits = regex_traits<charT>,
+          class Allocator = allocator<charT> >
+class regex_iterator 
+{
+public:
+   typedef          basic_regex<charT, traits, Allocator>                   regex_type;
+   typedef          match_results<BidirectionalIterator>                    value_type;
+   typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
+   typedef          const value_type*                                       pointer;
+   typedef          const value_type&                                       reference;
+   typedef          std::forward_iterator_tag                               iterator_category;
+   
+   regex_iterator();
+   regex_iterator(BidirectionalIterator a, BidirectionalIterator b, 
+                  const regex_type& re, 
+                  match_flag_type m = match_default);
+   regex_iterator(const regex_iterator&);
+   regex_iterator& operator=(const regex_iterator&);
+   bool operator==(const regex_iterator&);
+   bool operator!=(const regex_iterator&);
+   const value_type& operator*();
+   const value_type* operator->();
+   regex_iterator& operator++();
+   regex_iterator operator++(int);
+};
+
+
+

Description

+

A regex_iterator is constructed from a pair of iterators, and enumerates all + occurances of a regular expression within that iterator range.

+
regex_iterator();
+ +

+ Effects: constructs an end of sequence regex_iterator.

regex_iterator(BidirectionalIterator a, BidirectionalIterator b, 
+               const regex_type& re, 
+               match_flag_type m = match_default);
+ +

+ Effects: constructs a regex_iterator that will enumerate all occurances + of the expression re, within the sequence [a,b), and found + using match flags m.  The object re must exist for the + lifetime of the regex_iterator.

regex_iterator(const regex_iterator& that);
+ +

+ Effects: constructs a copy of that.

+

+ Postconditions: *this == that.

regex_iterator& operator=(const regex_iterator&);
+ +

+ Effects: sets *this equal to those in that.

+

+ Postconditions: *this == that.

bool operator==(const regex_iterator& that);
+ +

+ Effects: returns true if *this is equal to that.

bool operator!=(const regex_iterator&);
+ +

+ Effects: returns !(*this == that).

+
RE.8.1.1 regex_iterator dereference
+
const value_type& operator*();
+ +

+ Effects: dereferencing a regex_iterator object it yields a + const reference to a match_results object, + whose members are set as follows:

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Element +

+
+

+ Value

+

(*it).size()

+
+

re.mark_count()

+
+

(*it).empty()

+
+

false

+
+

(*it).prefix().first

+
+

The end of the last match found, or the start of the underlying sequence if + this is the first match enumerated

+
+

(*it).prefix().last

+
+

(*it)[0].first

+
+

(*it).prefix().matched

+
+

(*it).prefix().first != (*it).prefix().second

+
+

(*it).suffix().first

+
+

(*it)[0].second

+
+

(*it).suffix().last

+
+

The end of the underlying sequence.

+
+

(*it).suffix().matched

+
+

(*it).suffix().first != (*it).suffix().second

+
+

(*it)[0].first

+
+

The start of the sequence of characters that matched the regular expression

+
+

(*it)[0].second

+
+

The end of the sequence of characters that matched the regular expression

+
+

(*it)[0].matched

+
+

+ true if a full match was found, and false if it was a + partial match (found as a result of the match_partial flag being + set).

+

(*it)[n].first

+
+

For all integers n < (*it).size(), the start of the sequence that matched + sub-expression n. Alternatively, if sub-expression n did not participate + in the match, then last.

+
+

(*it)[n].second

+
+

For all integers n < (*it).size(), the end of the sequence that matched + sub-expression n. Alternatively, if sub-expression n did not participate + in the match, then last.

+
+

(*it)[n].matched

+
+

For all integers n < (*it).size(), true if sub-expression n participated + in the match, false otherwise.

+

+
(*it).position(n)For all integers n < (*it).size(), then the + distance from the start of the underlying sequence to the start of + sub-expression match n.

const value_type* operator->();
+ +

+ Effects: returns &(*this).

regex_iterator& operator++();
+

Effects: moves the iterator to the next match in the + underlying sequence, or the end of sequence iterator if none if found. +  When the last match found matched a zero length string, then the + regex_iterator will find the next match as follows: if there exists a non-zero + length match that starts at the same location as the last one, then returns it, + otherwise starts looking for the next (possibly zero length) match from one + position to the right of the last match.

+ +

+ Returns: *this.

regex_iterator operator++(int);
+ +

+ Effects: constructs a copy result of *this, + then calls ++(*this).

+

+ Returns: result.

+

Examples

+

The following example + takes a C++ source file and builds up an index of class names, and the location + of that class in the file.

+
+#include <string>
+#include <map>
+#include <fstream>
+#include <iostream>
+#include <boost/regex.hpp>
+
+using namespace std;
+
+// purpose:
+// takes the contents of a file in the form of a string
+// and searches for all the C++ class definitions, storing
+// their locations in a map of strings/int's
+
+typedef std::map<std::string, std::string::difference_type, std::less<std::string> > map_type;
+
+const char* re = 
+   // possibly leading whitespace:   
+   "^[[:space:]]*" 
+   // possible template declaration:
+   "(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
+   // class or struct:
+   "(class|struct)[[:space:]]*" 
+   // leading declspec macros etc:
+   "("
+      "\\<\\w+\\>"
+      "("
+         "[[:blank:]]*\\([^)]*\\)"
+      ")?"
+      "[[:space:]]*"
+   ")*" 
+   // the class name
+   "(\\<\\w*\\>)[[:space:]]*" 
+   // template specialisation parameters
+   "(<[^;:{]+>)?[[:space:]]*"
+   // terminate in { or :
+   "(\\{|:[^;\\{()]*\\{)";
+
+
+boost::regex expression(re);
+map_type class_index;
+
+bool regex_callback(const boost::match_results<std::string::const_iterator>& what)
+{
+   // what[0] contains the whole string
+   // what[5] contains the class name.
+   // what[6] contains the template specialisation if any.
+   // add class name and position to map:
+   class_index[what[5].str() + what[6].str()] = what.position(5);
+   return true;
+}
+
+void load_file(std::string& s, std::istream& is)
+{
+   s.erase();
+   s.reserve(is.rdbuf()->in_avail());
+   char c;
+   while(is.get(c))
+   {
+      if(s.capacity() == s.size())
+         s.reserve(s.capacity() * 3);
+      s.append(1, c);
+   }
+}
+
+int main(int argc, const char** argv)
+{
+   std::string text;
+   for(int i = 1; i < argc; ++i)
+   {
+      cout << "Processing file " << argv[i] << endl;
+      std::ifstream fs(argv[i]);
+      load_file(text, fs);
+      // construct our iterators:
+      boost::regex_iterator<std::string::const_iterator> m1(text.begin(), text.end(), expression);
+      boost::regex_iterator<std::string::const_iterator> m2;
+      std::for_each(m1, m2, ®ex_callback);
+      // copy results:
+      cout << class_index.size() << " matches found" << endl;
+      map_type::iterator c, d;
+      c = class_index.begin();
+      d = class_index.end();
+      while(c != d)
+      {
+         cout << "class \"" << (*c).first << "\" found at index: " << (*c).second << endl;
+         ++c;
+      }
+      class_index.erase(class_index.begin(), class_index.end());
+   }
+   return 0;
+}
+
+
+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/regex_replace.html b/doc/regex_replace.html index aed7b8fa..ff45afa3 100644 --- a/doc/regex_replace.html +++ b/doc/regex_replace.html @@ -85,8 +85,8 @@ basic_string<charT> regex_replace(const basic_string<charT>& s, calls regex_replace(back_inserter(result), s.begin(), s.end(), e, fmt, flags), and then returns result.

Examples

-

The following example takes - C/C++ source code as input, and outputs syntax highlighted HTML code.

+

The following example + takes C/C++ source code as input, and outputs syntax highlighted HTML code.

#include <fstream>
 #include <sstream>
@@ -139,12 +139,14 @@ boost::regex e1, e2;
       // temporary string stream
       std::ostringstream t(std::ios::out | std::ios::binary);
       std::ostream_iterator<char, char> oi(t);
-      boost::regex_replace(oi, in.begin(), in.end(), e2, pre_format);
+      boost::regex_replace(oi, in.begin(), in.end(),
+      e2, pre_format, boost::match_default | boost::format_all);
       // then output to final output stream
       // adding syntax highlighting:
       std::string s(t.str());
       std::ostream_iterator<char, char> out(os);
-      boost::regex_replace(out, s.begin(), s.end(), e1, format_string);
+      boost::regex_replace(out, s.begin(), s.end(),
+      e1, format_string, boost::match_default | boost::format_all);
       os << footer_text;
    }
    }
diff --git a/doc/regex_token_iterator.html b/doc/regex_token_iterator.html
new file mode 100644
index 00000000..0a2796e9
--- /dev/null
+++ b/doc/regex_token_iterator.html
@@ -0,0 +1,279 @@
+
+
+   
+      Boost.Regex: regex_token_iterator
+      
+      
+   
+      

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

regex_token_iterator

+
+

Boost.Regex Index

+
+

+
+

Contents

+
+
Synopsis
Description
+ Examples
+

Synopsis

+

The template class regex_token_iterator is an iterator adapter; + that is to say it represents a new view of an existing iterator sequence, by + enumerating all the occurrences of a regular expression within that sequence, + and presenting one or more new strings for each match found. Each position + enumerated by the iterator is a string that represents what matched a + particular sub-expression within the regular expression. When class regex_token_iterator + is used to enumerate a single sub-expression with index -1, then the iterator + performs field splitting: that is to say it enumerates one string for each + section of the character container sequence that does not match the regular + expression specified.

+
+template <class BidirectionalIterator, 
+          class charT = iterator_traits<BidirectionalIterator>::value_type,
+          class traits = regex_traits<charT>,
+          class Allocator = allocator<charT> >
+class regex_token_iterator 
+{
+public:
+   typedef          basic_regex<charT, traits, Allocator>                   regex_type;
+   typedef          basic_string<charT>                                     value_type;
+   typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type;
+   typedef          const value_type*                                       pointer;
+   typedef          const value_type&                                       reference;
+   typedef          std::forward_iterator_tag                               iterator_category;
+   
+   regex_token_iterator();
+   regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, 
+                        int submatch = 0, match_flag_type m = match_default);
+   regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, 
+                        const std::vector<int>& submatches, match_flag_type m = match_default);
+   template <std::size_t N>
+   regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, 
+                        const int (&submatches)[N], match_flag_type m = match_default);
+   regex_token_iterator(const regex_token_iterator&);
+   regex_token_iterator& operator=(const regex_token_iterator&);
+   bool operator==(const regex_token_iterator&);
+   bool operator!=(const regex_token_iterator&);
+   const value_type& operator*();
+   const value_type* operator->();
+   regex_token_iterator& operator++();
+   regex_token_iterator operator++(int);
+};
+
+

Description

+
regex_token_iterator();
+ +

+ Effects: constructs an end of sequence iterator.

regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, 
+                     int submatch = 0, match_flag_type m = match_default);
+ +

+ Preconditions: !re.empty().

+

+ Effects: constructs a regex_token_iterator that will enumerate one + string for each regular expression match of the expression re found + within the sequence [a,b), using match flags m.  The + string enumerated is the sub-expression submatch for each match + found; if submatch is -1, then enumerates all the text sequences that + did not match the expression re (that is to performs field + splitting).

regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, 
+                     const std::vector<int>& submatches, match_flag_type m = match_default);
+ +

+ Preconditions: submatches.size() && !re.empty().

+

+ Effects: constructs a regex_token_iterator that will enumerate submatches.size() + strings for each regular expression match of the expression re found + within the sequence [a,b), using match flags m.  For + each match found one string will be enumerated for each sub-expression + index contained within submatches vector; if submatches[0] + is -1, then the first string enumerated for each match will be all of the text + from end of the last match to the start of the current match, in addition there + will be one extra string enumerated when no more matches can be found: from the + end of the last match found, to the end of the underlying sequence.

template <std::size_t N>
+regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, 
+                     const int (&submatches)[R], match_flag_type m = match_default);
+ +

+ Preconditions: !re.empty().

+

Effects: constructs a regex_token_iterator that will + enumerate R strings for each regular expression match of the + expression re found within the sequence [a,b), using match + flags m.  For each match found one string will be + enumerated for each sub-expression index contained within the submatches + array; if submatches[0] is -1, then the first string enumerated + for each match will be all of the text from end of the last match to the start + of the current match, in addition there will be one extra string enumerated + when no more matches can be found: from the end of the last match found, to the + end of the underlying sequence.

+
regex_token_iterator(const regex_token_iterator& that);
+ +

+ Effects: constructs a copy of that.

+

+ Postconditions: *this == that.

regex_token_iterator& operator=(const regex_token_iterator& that);
+ +

+ Effects: sets *this to be equal to that.

+

+ Postconditions: *this == that.

bool operator==(const regex_token_iterator&);
+ +

+ Effects: returns true if *this is the same position as that.

bool operator!=(const regex_token_iterator&);
+ +

+ Effects: returns !(*this == that).

const value_type& operator*();
+ +

+ Effects: returns the current string being enumerated.

const value_type* operator->();
+ +

+ Effects: returns &(*this).

regex_token_iterator& operator++();
+ +

+ Effects: Moves on to the next string to be enumerated.

+

+ Returns: *this.

regex_token_iterator& operator++(int);
+ +

+ Effects: constructs a copy result of *this, + then calls ++(*this).

+

+ Returns: result. +

Examples

+

The following example + takes a string and splits it into a series of tokens:

+
+#include <iostream>
+#include <boost/regex.hpp>
+
+using namespace std;
+
+int main(int argc)
+{
+   string s;
+   do{
+      if(argc == 1)
+      {
+         cout << "Enter text to split (or \"quit\" to exit): ";
+         getline(cin, s);
+         if(s == "quit") break;
+      }
+      else
+         s = "This is a string of tokens";
+
+      boost::regex re("\\s+");
+      boost::regex_token_iterator<std::string::const_iterator> i(s.begin(), s.end(), re, -1);
+      boost::regex_token_iterator<std::string::const_iterator> j;
+
+      unsigned count = 0;
+      while(i != j)
+      {
+         cout << *i++ << endl;
+         count++;
+      }
+      cout << "There were " << count << " tokens found." << endl;
+
+   }while(argc == 1);
+   return 0;
+}
+
+
+

The following example + takes a html file and outputs a list of all the linked files:

+
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <boost/regex.hpp>
+
+boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"",
+               boost::regex::normal | boost::regbase::icase);
+
+void load_file(std::string& s, std::istream& is)
+{
+   s.erase();
+   //
+   // attempt to grow string buffer to match file size,
+   // this doesn't always work...
+   s.reserve(is.rdbuf()->in_avail());
+   char c;
+   while(is.get(c))
+   {
+      // use logarithmic growth stategy, in case
+      // in_avail (above) returned zero:
+      if(s.capacity() == s.size())
+         s.reserve(s.capacity() * 3);
+      s.append(1, c);
+   }
+}
+
+int main(int argc, char** argv)
+{
+   std::string s;
+   int i;
+   for(i = 1; i < argc; ++i)
+   {
+      std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
+      s.erase();
+      std::ifstream is(argv[i]);
+      load_file(s, is);
+      boost::regex_token_iterator<std::string::const_iterator>
+         i(s.begin(), s.end(), e, 1);
+      boost::regex_token_iterator<std::string::const_iterator> j;
+      while(i != j)
+      {
+         std::cout << *i++ << std::endl;
+      }
+   }
+   //
+   // alternative method:
+   // test the array-literal constructor, and split out the whole
+   // match as well as $1....
+   //
+   for(i = 1; i < argc; ++i)
+   {
+      std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
+      s.erase();
+      std::ifstream is(argv[i]);
+      load_file(s, is);
+      const int subs[] = {1, 0,};
+      boost::regex_token_iterator<std::string::const_iterator>
+         i(s.begin(), s.end(), e, subs);
+      boost::regex_token_iterator<std::string::const_iterator> j;
+      while(i != j)
+      {
+         std::cout << *i++ << std::endl;
+      }
+   }
+
+   return 0;
+}
+
+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/doc/standards.html b/doc/standards.html new file mode 100644 index 00000000..5083732a --- /dev/null +++ b/doc/standards.html @@ -0,0 +1,80 @@ + + + + Boost.Regex: Standards Conformance + + + + +

+ + + + + + +
+

C++ Boost

+
+

Boost.Regex

+

Standards Conformance

+
+

Boost.Regex Index

+
+

+
+

+

C++

+

Boost.regex is intended to conform to the + regular expression standardisation proposal, which will appear in a + future C++ standard technical report (and hopefully in a future version of the + standard).  Currently there are some differences in how the regular + expression traits classes are defined, these will be fixed in a future release.

+

ECMAScript / JavaScript

+

All of the ECMAScript regular expression syntax features are supported, except + that:

+

Negated class escapes (\S, \D and \W) are not permitted inside character class + definitions ( [...] ).

+

The escape sequence \u matches any upper case character (the same as + [[:upper:]]) rather than a unicode escape sequence; use \x{DDDD} for + unicode escape sequences.

+

Perl

+

Almost all perl features are supported, except for:

+

\N{name}  Use [[:name:]] instead.

+

\pP and \PP

+

(?imsx-imsx)

+

(?<=pattern)

+

(?<!pattern)

+

(?{code})

+

(??{code})

+

(?(condition)yes-pattern) and (?(condition)yes-pattern|no-pattern)

+

These embarressments / limitations will be removed in due course, mainly + dependent upon user demand.

+

POSIX

+

All the POSIX basic and extended regular expression features are supported, + except that:

+

No character collating names are recognised except those specified in the POSIX + standard for the C locale, unless they are explicitly registered with the + traits class.

+

Character equivalence classes ( [[=a=]] etc) are probably buggy except on + Win32.  Implimenting this feature requires knowledge of the format of the + string sort keys produced by the system; if you need this, and the default + implementation doesn't work on your platfrom, then you will need to supply a + custom traits class.

+

+


+

+

Revised + + 11 April 2003 + +

+

© Copyright John Maddock 1998- 2003

+

Permission to use, copy, modify, distribute and sell this software + and its documentation for any purpose is hereby granted without fee, provided + that the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting documentation. + Dr John Maddock makes no representations about the suitability of this software + for any purpose. It is provided "as is" without express or implied warranty.

+ + diff --git a/example/snippets/regex_iterator_example.cpp b/example/snippets/regex_iterator_example.cpp new file mode 100644 index 00000000..6ec3d85e --- /dev/null +++ b/example/snippets/regex_iterator_example.cpp @@ -0,0 +1,115 @@ +/* + * + * Copyright (c) 2003 + * Dr John Maddock + * + * Permission to use, copy, modify, distribute and sell this software + * and its documentation for any purpose is hereby granted without fee, + * provided that the above copyright notice appear in all copies and + * that both that copyright notice and this permission notice appear + * in supporting documentation. Dr John Maddock makes no representations + * about the suitability of this software for any purpose. + * It is provided "as is" without express or implied warranty. + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_iterator_example_2.cpp + * VERSION see + * DESCRIPTION: regex_iterator example 2: searches a cpp file for class definitions, + * using global data. + */ + +#include +#include +#include +#include +#include + +using namespace std; + +// purpose: +// takes the contents of a file in the form of a string +// and searches for all the C++ class definitions, storing +// their locations in a map of strings/int's + +typedef std::map > map_type; + +const char* re = + // possibly leading whitespace: + "^[[:space:]]*" + // possible template declaration: + "(template[[:space:]]*<[^;:{]+>[[:space:]]*)?" + // class or struct: + "(class|struct)[[:space:]]*" + // leading declspec macros etc: + "(" + "\\<\\w+\\>" + "(" + "[[:blank:]]*\\([^)]*\\)" + ")?" + "[[:space:]]*" + ")*" + // the class name + "(\\<\\w*\\>)[[:space:]]*" + // template specialisation parameters + "(<[^;:{]+>)?[[:space:]]*" + // terminate in { or : + "(\\{|:[^;\\{()]*\\{)"; + + +boost::regex expression(re); +map_type class_index; + +bool regex_callback(const boost::match_results& what) +{ + // what[0] contains the whole string + // what[5] contains the class name. + // what[6] contains the template specialisation if any. + // add class name and position to map: + class_index[what[5].str() + what[6].str()] = what.position(5); + return true; +} + +void load_file(std::string& s, std::istream& is) +{ + s.erase(); + s.reserve(is.rdbuf()->in_avail()); + char c; + while(is.get(c)) + { + if(s.capacity() == s.size()) + s.reserve(s.capacity() * 3); + s.append(1, c); + } +} + +int main(int argc, const char** argv) +{ + std::string text; + for(int i = 1; i < argc; ++i) + { + cout << "Processing file " << argv[i] << endl; + std::ifstream fs(argv[i]); + load_file(text, fs); + // construct our iterators: + boost::regex_iterator m1(text.begin(), text.end(), expression); + boost::regex_iterator m2; + std::for_each(m1, m2, ®ex_callback); + // copy results: + cout << class_index.size() << " matches found" << endl; + map_type::iterator c, d; + c = class_index.begin(); + d = class_index.end(); + while(c != d) + { + cout << "class \"" << (*c).first << "\" found at index: " << (*c).second << endl; + ++c; + } + class_index.erase(class_index.begin(), class_index.end()); + } + return 0; +} + + diff --git a/example/snippets/regex_merge_example.cpp b/example/snippets/regex_merge_example.cpp index e6b49bdd..7741b444 100644 --- a/example/snippets/regex_merge_example.cpp +++ b/example/snippets/regex_merge_example.cpp @@ -73,12 +73,12 @@ int main(int argc, const char** argv) // temporary string stream std::ostringstream t(std::ios::out | std::ios::binary); std::ostream_iterator oi(t); - boost::regex_merge(oi, in.begin(), in.end(), e2, pre_format); + boost::regex_merge(oi, in.begin(), in.end(), e2, pre_format, boost::match_default | boost::format_all); // then output to final output stream // adding syntax highlighting: std::string s(t.str()); std::ostream_iterator out(os); - boost::regex_merge(out, s.begin(), s.end(), e1, format_string); + boost::regex_merge(out, s.begin(), s.end(), e1, format_string, boost::match_default | boost::format_all); os << footer_text; } } diff --git a/example/snippets/regex_replace_example.cpp b/example/snippets/regex_replace_example.cpp index ec2c0626..b00345ff 100644 --- a/example/snippets/regex_replace_example.cpp +++ b/example/snippets/regex_replace_example.cpp @@ -73,12 +73,12 @@ int main(int argc, const char** argv) // temporary string stream std::ostringstream t(std::ios::out | std::ios::binary); std::ostream_iterator oi(t); - boost::regex_replace(oi, in.begin(), in.end(), e2, pre_format); + boost::regex_replace(oi, in.begin(), in.end(), e2, pre_format, boost::match_default | boost::format_all); // then output to final output stream // adding syntax highlighting: std::string s(t.str()); std::ostream_iterator out(os); - boost::regex_replace(out, s.begin(), s.end(), e1, format_string); + boost::regex_replace(out, s.begin(), s.end(), e1, format_string, boost::match_default | boost::format_all); os << footer_text; } } @@ -135,3 +135,4 @@ const char* footer_text = "
\n\n\n"; + diff --git a/example/snippets/regex_token_iterator_example_1.cpp b/example/snippets/regex_token_iterator_example_1.cpp new file mode 100644 index 00000000..8ba8dcb5 --- /dev/null +++ b/example/snippets/regex_token_iterator_example_1.cpp @@ -0,0 +1,75 @@ +/* + * + * Copyright (c) 12003 + * Dr John Maddock + * + * Permission to use, copy, modify, distribute and sell this software + * and its documentation for any purpose is hereby granted without fee, + * provided that the above copyright notice appear in all copies and + * that both that copyright notice and this permission notice appear + * in supporting documentation. Dr John Maddock makes no representations + * about the suitability of this software for any purpose. + * It is provided "as is" without express or implied warranty. + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_token_iterator_example_1.cpp + * VERSION see + * DESCRIPTION: regex_token_iterator example: split a string into tokens. + */ + + +#include + +#include +using namespace std; + + +#if defined(BOOST_MSVC) || (defined(__BORLANDC__) && (__BORLANDC__ == 0x550)) +// +// problem with std::getline under MSVC6sp3 +istream& getline(istream& is, std::string& s) +{ + s.erase(); + char c = is.get(); + while(c != '\n') + { + s.append(1, c); + c = is.get(); + } + return is; +} +#endif + + +int main(int argc) +{ + string s; + do{ + if(argc == 1) + { + cout << "Enter text to split (or \"quit\" to exit): "; + getline(cin, s); + if(s == "quit") break; + } + else + s = "This is a string of tokens"; + + boost::regex re("\\s+"); + boost::regex_token_iterator i(s.begin(), s.end(), re, -1); + boost::regex_token_iterator j; + + unsigned count = 0; + while(i != j) + { + cout << *i++ << endl; + count++; + } + cout << "There were " << count << " tokens found." << endl; + + }while(argc == 1); + return 0; +} + diff --git a/example/snippets/regex_token_iterator_example_2.cpp b/example/snippets/regex_token_iterator_example_2.cpp new file mode 100644 index 00000000..71b2188b --- /dev/null +++ b/example/snippets/regex_token_iterator_example_2.cpp @@ -0,0 +1,92 @@ +/* + * + * Copyright (c) 2003 + * Dr John Maddock + * + * Permission to use, copy, modify, distribute and sell this software + * and its documentation for any purpose is hereby granted without fee, + * provided that the above copyright notice appear in all copies and + * that both that copyright notice and this permission notice appear + * in supporting documentation. Dr John Maddock makes no representations + * about the suitability of this software for any purpose. + * It is provided "as is" without express or implied warranty. + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_token_iterator_example_2.cpp + * VERSION see + * DESCRIPTION: regex_token_iterator example: spit out linked URL's. + */ + + +#include +#include +#include +#include + +boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"", + boost::regex::normal | boost::regbase::icase); + +void load_file(std::string& s, std::istream& is) +{ + s.erase(); + // + // attempt to grow string buffer to match file size, + // this doesn't always work... + s.reserve(is.rdbuf()->in_avail()); + char c; + while(is.get(c)) + { + // use logarithmic growth stategy, in case + // in_avail (above) returned zero: + if(s.capacity() == s.size()) + s.reserve(s.capacity() * 3); + s.append(1, c); + } +} + +int main(int argc, char** argv) +{ + std::string s; + int i; + for(i = 1; i < argc; ++i) + { + std::cout << "Findings URL's in " << argv[i] << ":" << std::endl; + s.erase(); + std::ifstream is(argv[i]); + load_file(s, is); + boost::regex_token_iterator + i(s.begin(), s.end(), e, 1); + boost::regex_token_iterator j; + while(i != j) + { + std::cout << *i++ << std::endl; + } + } + // + // alternative method: + // test the array-literal constructor, and split out the whole + // match as well as $1.... + // + for(i = 1; i < argc; ++i) + { + std::cout << "Findings URL's in " << argv[i] << ":" << std::endl; + s.erase(); + std::ifstream is(argv[i]); + load_file(s, is); + const int subs[] = {1, 0,}; + boost::regex_token_iterator + i(s.begin(), s.end(), e, subs); + boost::regex_token_iterator j; + while(i != j) + { + std::cout << *i++ << std::endl; + } + } + + return 0; +} + + diff --git a/example/timer/regex_timer.cpp b/example/timer/regex_timer.cpp index e1533ed2..85199902 100644 --- a/example/timer/regex_timer.cpp +++ b/example/timer/regex_timer.cpp @@ -103,6 +103,15 @@ istream& getline(istream& is, std::string& s) return is; } #endif +#if defined(__GNUC__) && (__GNUC__ == 3) +istream& getline(istream& is, std::string& s) +{ + std::getline(is, s); + if(s.size() && (s[s.size() -1] == '\r')) + s.erase(s.size() - 1); + return is; +} +#endif int main(int argc, char**argv) diff --git a/performance/time_pcre.cpp b/performance/time_pcre.cpp index dd73d553..5956b521 100644 --- a/performance/time_pcre.cpp +++ b/performance/time_pcre.cpp @@ -57,7 +57,7 @@ double time_match(const std::string& re, const std::string& text, bool icase) tim.restart(); for(counter = 0; counter < iter; ++counter) { - erroffset = pcre_exec(ppcre, pe, text.c_str(), text.size(), 0, 0, what, sizeof(what)); + erroffset = pcre_exec(ppcre, pe, text.c_str(), text.size(), 0, 0, what, sizeof(what)/sizeof(int)); } result = tim.elapsed(); iter *= 2; @@ -70,7 +70,7 @@ double time_match(const std::string& re, const std::string& text, bool icase) tim.restart(); for(counter = 0; counter < iter; ++counter) { - erroffset = pcre_exec(ppcre, pe, text.c_str(), text.size(), 0, 0, what, sizeof(what)); + erroffset = pcre_exec(ppcre, pe, text.c_str(), text.size(), 0, 0, what, sizeof(what)/sizeof(int)); } run = tim.elapsed(); result = std::min(run, result); @@ -119,12 +119,12 @@ double time_find_all(const std::string& re, const std::string& text, bool icase) { matches = 0; startoff = 0; - exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what)); + exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what)/sizeof(int)); while(exec_result >= 0) { ++matches; startoff = what[1]; - exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what)); + exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what)/sizeof(int)); } } result = tim.elapsed(); @@ -147,12 +147,12 @@ double time_find_all(const std::string& re, const std::string& text, bool icase) { matches = 0; startoff = 0; - exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what)); + exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what)/sizeof(int)); while(exec_result >= 0) { ++matches; startoff = what[1]; - exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what)); + exec_result = pcre_exec(ppcre, pe, text.c_str(), text.size(), startoff, 0, what, sizeof(what)/sizeof(int)); } } run = tim.elapsed();