Almost complete POSIX regex support now...

[SVN r22624]
2025-07-29 12:07:28 +02:00 · 2004-04-09 16:04:34 +00:00
parent 38b58f2007
commit 50b5391c8f
21 changed files with 1446 additions and 605 deletions
--- a/build/Jamfile
+++ b/build/Jamfile
@ -59,7 +59,7 @@ lib boost_regex : ../src/$(SOURCES) <template>regex-options
    ;


-dll boost_regex : ../src/$(SOURCES).cpp <template>regex-dll-options
+dll boost_regex : ../src/$(SOURCES) <template>regex-dll-options
    : 
        common-variant-tag
    : 
--- a/include/boost/regex/config.hpp
+++ b/include/boost/regex/config.hpp
@ -60,6 +60,7 @@
 #  include <boost/throw_exception.hpp>
 #  include <boost/scoped_ptr.hpp>
 #  include <boost/shared_ptr.hpp>
+#  include <boost/mpl/bool_fwd.hpp>
 #  ifndef BOOST_NO_STD_LOCALE
 #     include <locale>
 #  endif
--- a/include/boost/regex/v4/basic_regex.hpp
+++ b/include/boost/regex/v4/basic_regex.hpp
@ -154,6 +154,11 @@ public:
   {
      return this->m_can_be_null;
   }
+   const regex_data<charT, traits>& get_data()const
+   {
+      basic_regex_implementation<charT, traits> const* p = this;
+      return *static_cast<const regex_data<charT, traits>*>(p);
+   }
 };

 } // namespace re_detail
@ -470,6 +475,11 @@ public:
      assert(m_pimpl.get());
      return m_pimpl->can_be_null();
   }
+   const re_detail::regex_data<charT, traits>& get_data()const
+   {
+      assert(m_pimpl.get());
+      return m_pimpl->get_data();
+   }

 private:
   shared_ptr<re_detail::basic_regex_implementation<charT, traits> > m_pimpl;
--- a/include/boost/regex/v4/basic_regex_creator.hpp
+++ b/include/boost/regex/v4/basic_regex_creator.hpp
@ -28,6 +28,106 @@ namespace boost{

 namespace re_detail{

+template <class charT>
+struct digraph : public std::pair<charT, charT>
+{
+   digraph(charT c1 = 0, charT c2 = 0) : std::pair<charT, charT>(c1, c2){}
+   digraph(const std::basic_string<charT>& s) : std::pair<charT, charT>()
+   {
+      BOOST_ASSERT(s.size() <= 2);
+      BOOST_ASSERT(s.size());
+      this->first = s[0];
+      this->second = (s.size() > 1) ? s[1] : 0;
+   }
+};
+
+template <class charT, class traits>
+class basic_char_set
+{
+public:
+   typedef digraph<charT>             digraph_type;
+   typedef std::basic_string<charT>   string_type;
+   typedef typename traits::char_class_type mask_type;
+
+   basic_char_set()
+   {
+      m_negate = false;
+      m_has_digraphs = false;
+      m_classes = 0;
+      m_empty = true;
+   }
+
+   void add_single(const digraph_type& s)
+   {
+      m_singles.push_back(s);
+      if(s.second)
+         m_has_digraphs = true;
+      m_empty = false;
+   }
+   void add_range(const digraph_type& first, const digraph_type& end)
+   {
+      m_ranges.push_back(first);
+      m_ranges.push_back(end);
+      if(first.second || end.second)
+         m_has_digraphs = true;
+      m_empty = false;
+   }
+   void add_class(mask_type m)
+   {
+      m_classes |= m;
+      m_empty = false;
+   }
+   void negate()
+   { 
+      m_negate = true;
+      m_empty = false;
+   }
+
+   //
+   // accessor functions:
+   //
+   bool has_digraphs()const
+   {
+      return m_has_digraphs;
+   }
+   bool is_negated()const
+   {
+      return m_negate;
+   }
+   typedef typename std::vector<digraph_type>::const_iterator  list_iterator;
+   list_iterator singles_begin()const
+   {
+      return m_singles.begin();
+   }
+   list_iterator singles_end()const
+   {
+      return m_singles.end();
+   }
+   list_iterator ranges_begin()const
+   {
+      return m_ranges.begin();
+   }
+   list_iterator ranges_end()const
+   {
+      return m_ranges.end();
+   }
+   mask_type classes()const
+   {
+      return m_classes;
+   }
+   bool empty()const
+   {
+      return m_empty;
+   }
+private:
+   std::vector<digraph_type> m_singles;        // a list of single characters to match
+   std::vector<digraph_type> m_ranges;         // a list of end points of our ranges
+   bool                      m_negate;         // true if the set is to be negated
+   bool                      m_has_digraphs;   // true if we have digraphs present
+   mask_type                 m_classes;        // character classes to match
+   bool                      m_empty;          // whether we've added anything yet
+};
+   
 template <class charT, class traits>
 class basic_regex_creator
 {
@ -54,38 +154,63 @@ public:
      m_pdata->m_flags = flags;
      m_icase = flags & regex_constants::icase;
   }
+   regbase::flag_type flags()
+   {
+      return m_pdata->m_flags;
+   }
   re_syntax_base* append_state(syntax_element_type t, std::size_t s = sizeof(re_syntax_base));
   re_syntax_base* insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s = sizeof(re_syntax_base));
   re_literal* append_literal(charT c);
+   re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set);
+   re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set, mpl::false_*);
+   re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set, mpl::true_*);
   void finalize(const charT* p1, const charT* p2);
 protected:
-   regex_data<charT, traits>*    m_pdata;     // pointer to the basic_regex_data struct we are filling in
-   const traits&                 m_traits;    // convenience reference to traits class
-   re_syntax_base*               m_last_state;// the last state we added
-   bool                          m_icase;     // true for case insensitive matches
-   typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character
-   typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character
+   regex_data<charT, traits>*    m_pdata;              // pointer to the basic_regex_data struct we are filling in
+   const traits&                 m_traits;             // convenience reference to traits class
+   re_syntax_base*               m_last_state;         // the last state we added
+   bool                          m_icase;              // true for case insensitive matches
+   unsigned                      m_repeater_id;        // the id of the next repeater
+   unsigned                      m_backrefs;           // bitmask of permitted backrefs
+   boost::uintmax_t              m_bad_repeats;        // bitmask of repeats we can't deduce a startmap for;
+   typename traits::char_class_type m_word_mask;       // mask used to determine if a character is a word character
+   typename traits::char_class_type m_mask_space;      // mask used to determine if a character is a word character
+   typename traits::char_class_type m_lower_mask;       // mask used to determine if a character is a lowercase character
+   typename traits::char_class_type m_upper_mask;      // mask used to determine if a character is an uppercase character
+   typename traits::char_class_type m_alpha_mask;      // mask used to determine if a character is an alphabetic character
 private:
   basic_regex_creator& operator=(const basic_regex_creator&);
   basic_regex_creator(const basic_regex_creator&);

   void fixup_pointers(re_syntax_base* state);
   void create_startmaps(re_syntax_base* state);
-   void create_startmap(re_syntax_base* state, unsigned char* map, unsigned int* pnull, unsigned char mask, re_syntax_base* terminal);
+   void create_startmap(re_syntax_base* state, unsigned char* map, unsigned int* pnull, unsigned char mask);
   unsigned get_restart_type(re_syntax_base* state);
+   void set_all_masks(unsigned char* bits, unsigned char);
+   bool is_bad_repeat(re_syntax_base* pt);
+   void set_bad_repeat(re_syntax_base* pt);
 };

 template <class charT, class traits>
 basic_regex_creator<charT, traits>::basic_regex_creator(regex_data<charT, traits>* data)
-   : m_pdata(data), m_traits(data->m_traits), m_last_state(0)
+   : m_pdata(data), m_traits(data->m_traits), m_last_state(0), m_repeater_id(0), m_backrefs(0)
 {
   m_pdata->m_data.clear();
   static const charT w = 'w';
   static const charT s = 's';
+   static const charT l[] = { 'l', 'o', 'w', 'e', 'r', };
+   static const charT u[] = { 'u', 'p', 'p', 'e', 'r', };
+   static const charT a[] = { 'a', 'l', 'p', 'h', 'a', };
   m_word_mask = m_traits.lookup_classname(&w, &w +1);
   m_mask_space = m_traits.lookup_classname(&s, &s +1);
+   m_lower_mask = m_traits.lookup_classname(l, l + 5);
+   m_upper_mask = m_traits.lookup_classname(u, u + 5);
+   m_alpha_mask = m_traits.lookup_classname(a, a + 5);
   BOOST_ASSERT(m_word_mask); 
   BOOST_ASSERT(m_mask_space); 
+   BOOST_ASSERT(m_lower_mask); 
+   BOOST_ASSERT(m_upper_mask); 
+   BOOST_ASSERT(m_alpha_mask); 
 }

 template <class charT, class traits>
@ -148,6 +273,213 @@ re_literal* basic_regex_creator<charT, traits>::append_literal(charT c)
   return result;
 }

+template <class charT, class traits>
+inline re_syntax_base* basic_regex_creator<charT, traits>::append_set(
+   const basic_char_set<charT, traits>& char_set)
+{
+   typedef mpl::bool_<sizeof(charT) == 1> truth_type;
+   return char_set.has_digraphs() 
+      ? append_set(char_set, static_cast<mpl::false_*>(0))
+      : append_set(char_set, static_cast<truth_type*>(0));
+}
+
+template <class charT, class traits>
+re_syntax_base* basic_regex_creator<charT, traits>::append_set(
+   const basic_char_set<charT, traits>& char_set, mpl::false_*)
+{
+   typedef std::basic_string<charT> string_type;
+   typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
+   typedef typename traits::char_class_type mask_type;
+   
+   re_set_long<mask_type>* result = static_cast<re_set_long<mask_type>*>(append_state(syntax_element_long_set, sizeof(re_set_long<mask_type>)));
+   //
+   // fill in the basics:
+   //
+   result->csingles = static_cast<unsigned int>(std::distance(char_set.singles_begin(), char_set.singles_end()));
+   result->cranges = static_cast<unsigned int>(std::distance(char_set.ranges_begin(), char_set.ranges_end())) / 2;
+   result->cequivalents = 0;
+   result->cclasses = char_set.classes();
+   if(flags() & regbase::icase)
+   {
+      // adjust classes as needed:
+      if(((result->cclasses & m_lower_mask) == m_lower_mask) || ((result->cclasses & m_upper_mask) == m_upper_mask))
+         result->cclasses |= m_alpha_mask;
+   }
+
+   result->isnot = char_set.is_negated();
+   result->singleton = !char_set.has_digraphs();
+   //
+   // remember where the state is for later:
+   //
+   std::ptrdiff_t offset = getoffset(result);
+   //
+   // now extend with all the singles:
+   //
+   item_iterator first, last;
+   first = char_set.singles_begin();
+   last = char_set.singles_end();
+   while(first != last)
+   {
+      charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (first->second ? 3 : 2)));
+      p[0] = m_traits.translate(first->first, m_icase);
+      if(first->second)
+      {
+         p[1] = m_traits.translate(first->second, m_icase);
+         p[2] = 0;
+      }
+      else
+         p[1] = 0;
+      ++first;
+   }
+   //
+   // now extend with all the ranges:
+   //
+   first = char_set.ranges_begin();
+   last = char_set.ranges_end();
+   while(first != last)
+   {
+      // first grab the endpoints of the range:
+      digraph<charT> c1 = *first;
+      c1.first = this->m_traits.translate(c1.first, this->m_icase);
+      c1.second = this->m_traits.translate(c1.second, this->m_icase);
+      ++first;
+      digraph<charT> c2 = *first;
+      c2.first = this->m_traits.translate(c2.first, this->m_icase);
+      c2.second = this->m_traits.translate(c2.second, this->m_icase);
+      ++first;
+      string_type s1, s2;
+      // different actions now depending upon whether collation is turned on:
+      if(flags() & regex_constants::collate)
+      {
+         // we need to transform our range into sort keys:
+         s1 = this->m_traits.transform(&c1.first, (c1.second ? &c1.second +1 : &c1.second));
+         s2 = this->m_traits.transform(&c2.first, (c2.second ? &c2.second +1 : &c2.second));
+      }
+      else
+      {
+         if(c1.second)
+            s1 = string_type(&c1.first, &c1.second+1);
+         else
+            s1 = string_type(1, c1.first);
+         if(c2.second)
+            s2 = string_type(&c2.first, &c2.second+1);
+         else
+            s2 = string_type(1, c2.first);
+      }
+      if(s1 > s2)
+      {
+         // Oops error:
+         return 0;
+      }
+      charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s1.size() + s2.size() + 2) ) );
+      std::memcpy(p, s1.c_str(), sizeof(charT) * (s1.size() + 1));
+      p += s1.size() + 1;
+      std::memcpy(p, s2.c_str(), sizeof(charT) * (s2.size() + 1));
+   }
+   //
+   // finally reset the address of our last state:
+   //
+   m_last_state = result = static_cast<re_set_long<mask_type>*>(getaddress(offset));
+   return result;
+}
+
+template <class charT, class traits>
+re_syntax_base* basic_regex_creator<charT, traits>::append_set(
+   const basic_char_set<charT, traits>& char_set, mpl::true_*)
+{
+   typedef std::basic_string<charT> string_type;
+   typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
+   
+   re_set* result = static_cast<re_set*>(append_state(syntax_element_set, sizeof(re_set)));
+   bool negate = char_set.is_negated();
+   std::memset(result->_map, 0, sizeof(result->_map));
+   //
+   // handle singles first:
+   //
+   item_iterator first, last;
+   first = char_set.singles_begin();
+   last = char_set.singles_end();
+   while(first != last)
+   {
+      for(unsigned int i = 0; i < (1 << CHAR_BIT); ++i)
+      {
+         if(this->m_traits.translate(static_cast<charT>(i), this->m_icase)
+            == this->m_traits.translate(first->first, this->m_icase))
+            result->_map[i] = true;
+      }
+      ++first;
+   }
+   //
+   // OK now handle ranges:
+   //
+   first = char_set.ranges_begin();
+   last = char_set.ranges_end();
+   while(first != last)
+   {
+      // first grab the endpoints of the range:
+      charT c1 = this->m_traits.translate(first->first, this->m_icase);
+      ++first;
+      charT c2 = this->m_traits.translate(first->first, this->m_icase);
+      ++first;
+      // different actions now depending upon whether collation is turned on:
+      if(flags() & regex_constants::collate)
+      {
+         // we need to transform our range into sort keys:
+         string_type s1 = this->m_traits.transform(&c1, &c1 +1);
+         string_type s2 = this->m_traits.transform(&c2, &c2 +1);
+         if(s1 > s2)
+         {
+            // Oops error:
+            return 0;
+         }
+         for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
+         {
+            charT c3 = static_cast<charT>(i);
+            string_type s3 = this->m_traits.transform(&c3, &c3 +1);
+            if((s1 <= s3) && (s3 <= s2))
+               result->_map[i] = true;
+         }
+      }
+      else
+      {
+         if(c1 > c2)
+         {
+            // Oops error:
+            return 0;
+         }
+         // everything in range matches:
+         std::memset(result->_map + static_cast<unsigned char>(c1), true, 1 + static_cast<unsigned char>(c2) - static_cast<unsigned char>(c1));
+      }
+   }
+   //
+   // and now the classes:
+   //
+   typedef typename traits::char_class_type mask_type;
+   mask_type m = char_set.classes();
+   if(flags() & regbase::icase)
+   {
+      // adjust m as needed:
+      if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask))
+         m |= m_alpha_mask;
+   }
+   if(m != 0)
+   {
+      for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
+      {
+         if(this->m_traits.is_class(static_cast<charT>(i), m))
+            result->_map[i] = true;
+      }
+   }
+   if(negate)
+   {
+      for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
+      {
+         result->_map[i] = !(result->_map[i]);
+      }
+   }
+   return result;
+}
+
 template <class charT, class traits>
 void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT* p2)
 {
@ -174,7 +506,8 @@ void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT*
   std::memset(m_pdata->m_startmap, 0, sizeof(m_pdata->m_startmap));
   m_pdata->m_can_be_null = 0;

-   create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all, 0);
+   m_bad_repeats = 0;
+   create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all);
   // get the restart type:
   m_pdata->m_restart_type = get_restart_type(m_pdata->m_first_state);
 }
@ -186,12 +519,15 @@ void basic_regex_creator<charT, traits>::fixup_pointers(re_syntax_base* state)
   {
      switch(state->type)
      {
-      case syntax_element_alt:
      case syntax_element_rep:
      case syntax_element_dot_rep:
      case syntax_element_char_rep:
      case syntax_element_short_set_rep:
      case syntax_element_long_set_rep:
+         // set the id of this repeat:
+         static_cast<re_repeat*>(state)->id = m_repeater_id++;
+         // fall through:
+      case syntax_element_alt:
         std::memset(static_cast<re_alt*>(state)->_map, 0, sizeof(static_cast<re_alt*>(state)->_map));
         static_cast<re_alt*>(state)->can_be_null = 0;
         // fall through:
@ -227,8 +563,10 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
         // create other startmaps *first*, since we can use the
         // results from these when creating out own:
         create_startmaps(state->next.p);
-         create_startmap(state->next.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_take, state);
-         create_startmap(static_cast<re_alt*>(state)->alt.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_skip, state);
+         m_bad_repeats = 0;
+         create_startmap(state->next.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_take);
+         m_bad_repeats = 0;
+         create_startmap(static_cast<re_alt*>(state)->alt.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_skip);
         return;
      default:
         state = state->next.p;
@ -237,9 +575,10 @@ void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
 }

 template <class charT, class traits>
-void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* map, unsigned int* pnull, unsigned char mask, re_syntax_base* terminal)
+void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* map, unsigned int* pnull, unsigned char mask)
 {
-   while(state && (state != terminal))
+   int not_last_jump = 1;
+   while(state)
   {
      switch(state->type)
      {
@ -270,34 +609,20 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
         }
         // now figure out if we can match a NULL string at this point:
         if(pnull)
-            create_startmap(state->next.p, 0, pnull, mask, terminal);
+            create_startmap(state->next.p, 0, pnull, mask);
         return;
      }
      case syntax_element_backref:
      case syntax_element_wild:
      {
         // can't be null, any character can match:
-         if(map)
-         {
-            map[0] |= mask_init;
-            for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
-            {
-               map[i] |= mask;
-            }
-         }
+         set_all_masks(map, mask);
         return;
      }
      case syntax_element_match:
      {
         // must be null, any character can match:
-         if(map)
-         {
-            map[0] |= mask_init;
-            for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
-            {
-               map[i] |= mask;
-            }
-         }
+         set_all_masks(map, mask);
         if(pnull)
            *pnull |= mask;
         return;
@ -305,7 +630,7 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
      case syntax_element_word_start:
      {
         // recurse, then AND with all the word characters:
-         create_startmap(state->next.p, map, pnull, mask, terminal);
+         create_startmap(state->next.p, map, pnull, mask);
         if(map)
         {
            map[0] |= mask_init;
@ -320,7 +645,7 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
      case syntax_element_word_end:
      {
         // recurse, then AND with all the word characters:
-         create_startmap(state->next.p, map, pnull, mask, terminal);
+         create_startmap(state->next.p, map, pnull, mask);
         if(map)
         {
            map[0] |= mask_init;
@ -340,13 +665,35 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
         return;
      }
      case syntax_element_long_set:
-         assert(0);
+         if(map)
+         {
+            typedef typename traits::char_class_type mask_type;
+            map[0] |= mask_init;
+            for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
+            {
+               charT c = static_cast<charT>(i);
+               if(&c != re_is_set_member(&c, &c + 1, static_cast<re_set_long<mask_type>*>(state), *m_pdata))
+                  map[i] |= mask;
+            }
+         }
+         return;
      case syntax_element_set:
-         assert(0);
+         if(map)
+         {
+            map[0] |= mask_init;
+            for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
+            {
+               if(static_cast<re_set*>(state)->_map[
+                  static_cast<unsigned char>(m_traits.translate(static_cast<charT>(i), this->m_icase))])
+                  map[i] |= mask;
+            }
+         }
+         return;
      case syntax_element_jump:
         // take the jump:
         state = static_cast<re_alt*>(state)->alt.p;
-         break;;
+         not_last_jump = -1;
+         break;
      case syntax_element_alt:
      case syntax_element_rep:
      case syntax_element_dot_rep:
@ -360,6 +707,7 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
               if(map)
               {
                  // copy previous results:
+                  map[0] |= mask_init;
                  for(unsigned int i = 0; i <= UCHAR_MAX; ++i)
                  {
                     if(rep->_map[i] & mask_any)
@ -376,8 +724,17 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
            {
               // we haven't created a startmap for this alternative yet
               // so take the union of the two options:
-               create_startmap(state->next.p, map, pnull, mask, state);
-               create_startmap(rep->alt.p, map, pnull, mask, state);
+               if(is_bad_repeat(state))
+               {
+                  set_all_masks(map, mask);
+                  return;
+               }
+               set_bad_repeat(state);
+               create_startmap(state->next.p, map, pnull, mask);
+               if((state->type == syntax_element_alt) 
+                  || (static_cast<re_repeat*>(state)->min == 0)
+                  || (not_last_jump == 0))
+                  create_startmap(rep->alt.p, map, pnull, mask);
            }
         }
         return;
@ -395,6 +752,7 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
      default:
         state = state->next.p;
      }
+      ++not_last_jump;
   }
 }

@ -416,8 +774,9 @@ unsigned basic_regex_creator<charT, traits>::get_restart_type(re_syntax_base* st
         return regbase::restart_line;
      case syntax_element_word_boundary:
      case syntax_element_word_start:
-         return regbase::restart_line;
+         return regbase::restart_word;
      case syntax_element_buffer_start:
+      case syntax_element_restart_continue:
         return regbase::restart_continue;
      default:
         state = 0;
@ -427,6 +786,68 @@ unsigned basic_regex_creator<charT, traits>::get_restart_type(re_syntax_base* st
   return regbase::restart_any;
 }

+template <class charT, class traits>
+void basic_regex_creator<charT, traits>::set_all_masks(unsigned char* bits, unsigned char mask)
+{
+   //
+   // set mask in all of bits elements, 
+   // if bits[0] has mask_init not set then we can 
+   // optimise this to a call to memset:
+   //
+   if(bits)
+   {
+      if(bits[0] == 0)
+         (std::memset)(bits, mask, 1u << CHAR_BIT);
+      else
+      {
+         for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
+            bits[i] |= mask;
+      }
+      bits[0] |= mask_init;
+   }
+}
+
+template <class charT, class traits>
+bool basic_regex_creator<charT, traits>::is_bad_repeat(re_syntax_base* pt)
+{
+   switch(pt->type)
+   {
+   case syntax_element_rep:
+   case syntax_element_dot_rep:
+   case syntax_element_char_rep:
+   case syntax_element_short_set_rep:
+   case syntax_element_long_set_rep:
+      {
+         unsigned id = static_cast<re_repeat*>(pt)->id;
+         if(id > sizeof(m_bad_repeats) * CHAR_BIT)
+            return true;  // run out of bits, assume we can't traverse this one.
+         return m_bad_repeats & (1u << id);
+      }
+   default:
+      return false;
+   }
+}
+
+template <class charT, class traits>
+void basic_regex_creator<charT, traits>::set_bad_repeat(re_syntax_base* pt)
+{
+   switch(pt->type)
+   {
+   case syntax_element_rep:
+   case syntax_element_dot_rep:
+   case syntax_element_char_rep:
+   case syntax_element_short_set_rep:
+   case syntax_element_long_set_rep:
+      {
+         unsigned id = static_cast<re_repeat*>(pt)->id;
+         if(id <= sizeof(m_bad_repeats) * CHAR_BIT)
+            m_bad_repeats |= (1u << id);
+      }
+   default:
+      break;
+   }
+}
+
 } // namespace re_detail

 } // namespace boost
--- a/include/boost/regex/v4/basic_regex_parser.hpp
+++ b/include/boost/regex/v4/basic_regex_parser.hpp
@ -45,6 +45,12 @@ public:
   bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
   bool parse_repeat_range(bool isbasic);
   bool parse_alt();
+   bool parse_set();
+   bool parse_backref();
+   void parse_set_literal(basic_char_set<charT, traits>& char_set);
+   bool parse_inner_set(basic_char_set<charT, traits>& char_set);
+   digraph<charT> get_next_set_literal();
+   charT unescape_character();

 private:
   typedef bool (basic_regex_parser::*parser_proc_type)();
@ -54,7 +60,6 @@ private:
   const charT*               m_position;       // our current parser position
   unsigned                   m_mark_count;     // how many sub-expressions we have
   std::ptrdiff_t             m_paren_start;    // where the last seen ')' began (where repeats are inserted).
-   unsigned                   m_repeater_id;    // the id of the next repeater
   std::ptrdiff_t             m_alt_insert_point; // where to insert the next alternative

   basic_regex_parser& operator=(const basic_regex_parser&);
@ -63,7 +68,7 @@ private:

 template <class charT, class traits>
 basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
-   : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_paren_start(0), m_repeater_id(0), m_alt_insert_point(0)
+   : basic_regex_creator<charT, traits>(data), m_mark_count(0), m_paren_start(0), m_alt_insert_point(0)
 {
 }

@ -151,6 +156,8 @@ bool basic_regex_parser<charT, traits>::parse_basic()
         ++m_position;
         return parse_repeat();
      }
+   case regex_constants::syntax_open_set:
+      return parse_set();
   default:
      return parse_literal();
   }
@ -160,7 +167,7 @@ bool basic_regex_parser<charT, traits>::parse_basic()
 template <class charT, class traits>
 bool basic_regex_parser<charT, traits>::parse_extended()
 {
-   bool result;
+   bool result = true;
   switch(this->m_traits.syntax_type(*m_position))
   {
   case regex_constants::syntax_open_mark:
@ -205,6 +212,8 @@ bool basic_regex_parser<charT, traits>::parse_extended()
      break;
   case regex_constants::syntax_or:
      return parse_alt();
+   case regex_constants::syntax_open_set:
+      return parse_set();
   default:
      result = parse_literal();
      break;
@ -260,6 +269,11 @@ bool basic_regex_parser<charT, traits>::parse_open_paren()
   // restore the alternate insertion point:
   //
   this->m_alt_insert_point = last_alt_point;
+   //
+   // allow backrefs to this mark:
+   //
+   if((markid > 0) && (markid < sizeof(unsigned) * CHAR_BIT))
+      this->m_backrefs |= 1u << (markid - 1);

   return true;
 }
@ -276,7 +290,7 @@ bool basic_regex_parser<charT, traits>::parse_basic_escape()
   case regex_constants::syntax_close_mark:
      return false;
   case regex_constants::syntax_plus:
-      if(this->m_pdata->m_flags & regex_constants::bk_plus_qm)
+      if(this->flags() & regex_constants::bk_plus_qm)
      {
         ++m_position;
         return parse_repeat(1);
@ -284,7 +298,7 @@ bool basic_regex_parser<charT, traits>::parse_basic_escape()
      else
         return parse_literal();
   case regex_constants::syntax_question:
-      if(this->m_pdata->m_flags & regex_constants::bk_plus_qm)
+      if(this->flags() & regex_constants::bk_plus_qm)
      {
         ++m_position;
         return parse_repeat(0, 1);
@ -292,22 +306,24 @@ bool basic_regex_parser<charT, traits>::parse_basic_escape()
      else
         return parse_literal();
   case regex_constants::syntax_open_brace:
-      if(this->m_pdata->m_flags & regbase::no_intervals)
+      if(this->flags() & regbase::no_intervals)
         return parse_literal();
      ++m_position;
      return parse_repeat_range(true);
   case regex_constants::syntax_close_brace:
-      if(this->m_pdata->m_flags & regbase::no_intervals)
+      if(this->flags() & regbase::no_intervals)
         return parse_literal();
      fail(REG_EBRACE, this->m_position - this->m_base);
      result = false;
      break;
   case regex_constants::syntax_or:
-      if(this->m_pdata->m_flags & regbase::bk_vbar)
+      if(this->flags() & regbase::bk_vbar)
         return parse_alt();
      else
         result = parse_literal();
      break;
+   case regex_constants::syntax_digit:
+      return parse_backref();
   default:
      result = parse_literal();
      break;
@ -319,8 +335,35 @@ template <class charT, class traits>
 bool basic_regex_parser<charT, traits>::parse_extended_escape()
 {
   ++m_position;
+   bool negate = false; // in case this is a character class escape: \w \d etc
   switch(this->m_traits.escape_syntax_type(*m_position))
   {
+   case regex_constants::escape_type_not_class:
+      negate = true;
+      // fall through:
+   case regex_constants::escape_type_class:
+      {
+         typedef typename traits::char_class_type mask_type;
+         mask_type m = this->m_traits.lookup_classname(m_position, m_position+1);
+         if(m != 0)
+         {
+            basic_char_set<charT, traits> char_set;
+            if(negate)
+               char_set.negate();
+            char_set.add_class(m);
+            if(0 == this->append_set(char_set))
+               fail(REG_ERANGE, m_position - m_base);
+            ++m_position;
+            return true;
+         }
+         //
+         // not a class, just a regular unknown escape:
+         //
+         this->append_literal(unescape_character());
+         break;
+      }
+   case regex_constants::syntax_digit:
+      return parse_backref();
   case regex_constants::escape_type_left_word:
      ++m_position;
      this->append_state(syntax_element_word_start);
@ -329,8 +372,29 @@ bool basic_regex_parser<charT, traits>::parse_extended_escape()
      ++m_position;
      this->append_state(syntax_element_word_end);
      break;
+   case regex_constants::escape_type_start_buffer:
+      ++m_position;
+      this->append_state(syntax_element_buffer_start);
+      break;
+   case regex_constants::escape_type_end_buffer:
+      ++m_position;
+      this->append_state(syntax_element_buffer_end);
+      break;
+   case regex_constants::escape_type_word_assert:
+      ++m_position;
+      this->append_state(syntax_element_word_boundary);
+      break;
+   case regex_constants::escape_type_not_word_assert:
+      ++m_position;
+      this->append_state(syntax_element_within_word);
+      break;
+   case regex_constants::escape_type_Z:
+      ++m_position;
+      this->append_state(syntax_element_soft_buffer_end);
+      break;
   default:
-      return parse_literal();
+      this->append_literal(unescape_character());
+      break;
   }
   return true;
 }
@ -355,7 +419,7 @@ bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_
   // when we get to here we may have a non-greedy ? mark still to come:
   //
   if((m_position != m_end) 
-      && (0 == (this->m_pdata->m_flags & (regbase::main_option_type | regbase::no_perl_ex))))
+      && (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))))
   {
      // OK we have a perl regex, check for a '?':
      if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
@ -417,7 +481,6 @@ bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_
   rep->max = high;
   rep->greedy = greedy;
   rep->leading = false;
-   rep->id = m_repeater_id++;
   // store our repeater position for later:
   std::ptrdiff_t rep_off = this->getoffset(rep);
   // and append a back jump to the repeat:
@ -535,7 +598,7 @@ bool basic_regex_parser<charT, traits>::parse_alt()
   //
   // if we didn't actually add any trailing states then that's an error:
   //
-   if(this->m_alt_insert_point == this->m_pdata->m_data.size())
+   if(this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))
      fail(REG_EMPTY, this->m_position - this->m_base);
   //
   // fix up the jump we added to point to the end of the states
@ -548,6 +611,311 @@ bool basic_regex_parser<charT, traits>::parse_alt()
   return result;
 }

+template <class charT, class traits>
+bool basic_regex_parser<charT, traits>::parse_set()
+{
+   ++m_position;
+   if(m_position == m_end)
+      fail(REG_EBRACK, m_position - m_base);
+   basic_char_set<charT, traits> char_set;
+
+   const charT* base = m_position;  // where the '[' was
+   const charT* item_base = m_position;  // where the '[' or '^' was
+
+   while(m_position != m_end)
+   {
+      switch(this->m_traits.syntax_type(*m_position))
+      {
+      case regex_constants::syntax_caret:
+         if(m_position == base)
+         {
+            char_set.negate();
+            ++m_position;
+            item_base = m_position;
+         }
+         else
+            parse_set_literal(char_set);
+         break;
+      case regex_constants::syntax_close_set:
+         if(m_position == item_base)
+         {
+            parse_set_literal(char_set);
+            break;
+         }
+         else
+         {
+            ++m_position;
+            if(0 == this->append_set(char_set))
+               fail(REG_ERANGE, m_position - m_base);
+         }
+         return true;
+      case regex_constants::syntax_open_set:
+         if(parse_inner_set(char_set))
+            break;
+         return true;
+      default:
+         parse_set_literal(char_set);
+         break;
+      }
+   }
+   return m_position != m_end;
+}
+
+template <class charT, class traits>
+bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
+{
+   //
+   // we have either a character class [:name:]
+   // a collating element [.name.]
+   // or an equivalence class [=name=]
+   //
+   if(m_end == ++m_position)
+      fail(REG_EBRACK, m_position - m_base);
+   switch(this->m_traits.syntax_type(*m_position))
+   {
+   case regex_constants::syntax_colon:
+      {
+      // check that character classes are actually enabled:
+      if((this->flags() & (regbase::main_option_type | regbase::no_char_classes)) 
+         == (regbase::basic_syntax_group  | regbase::no_char_classes))
+      {
+         --m_position;
+         parse_set_literal(char_set);
+         return true;
+      }
+      // skip the ':'
+      if(m_end == ++m_position)
+         fail(REG_EBRACK, m_position - m_base);
+      const charT* name_first = m_position;
+      // skip at least one character, then find the matching ':]'
+      if(m_end == ++m_position)
+         fail(REG_EBRACK, m_position - m_base);
+      while((m_position != m_end) 
+         && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon)) 
+         ++m_position;
+      const charT* name_last = m_position;
+      if(m_end == m_position)
+         fail(REG_EBRACK, m_position - m_base);
+      if((m_end == ++m_position) 
+         || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
+         fail(REG_EBRACK, m_position - m_base);
+      typedef typename traits::char_class_type mask_type;
+      mask_type m = this->m_traits.lookup_classname(name_first, name_last);
+      if(0 == m)
+      {
+         if(char_set.empty() && (name_last - name_first == 1))
+         {
+            // maybe a special case:
+            ++m_position;
+            if( (m_position != m_end) 
+               && (this->m_traits.syntax_type(*m_position) 
+                  == regex_constants::syntax_close_set))
+            {
+               if(this->m_traits.escape_syntax_type(*name_first) 
+                  == regex_constants::escape_type_left_word)
+               {
+                  ++m_position;
+                  this->append_state(syntax_element_word_start);
+                  return false;
+               }
+               if(this->m_traits.escape_syntax_type(*name_first) 
+                  == regex_constants::escape_type_right_word)
+               {
+                  ++m_position;
+                  this->append_state(syntax_element_word_end);
+                  return false;
+               }
+            }
+         }
+         fail(REG_ECTYPE, name_first - m_base);
+      }
+      char_set.add_class(m);
+      ++m_position;
+      break;
+   }
+   default:
+      --m_position;
+      parse_set_literal(char_set);
+      break;
+   }
+   return true;
+}
+
+template <class charT, class traits>
+void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
+{
+   digraph<charT> start_range = get_next_set_literal();
+   if(m_end == m_position)
+      fail(REG_EBRACK, m_position - m_base);
+   if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
+   {
+      // we have a range:
+      if(m_end == ++m_position)
+         fail(REG_EBRACK, m_position - m_base);
+      if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
+      {
+         digraph<charT> end_range = get_next_set_literal();
+         char_set.add_range(start_range, end_range);
+         if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
+            fail(REG_ERANGE, m_position - m_base);
+         return;
+      }
+      --m_position;
+   }
+   char_set.add_single(start_range);
+}
+
+template <class charT, class traits>
+digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal()
+{
+   digraph<charT> result;
+   switch(this->m_traits.syntax_type(*m_position))
+   {
+   case regex_constants::syntax_escape:
+      // check to see if escapes are supported first:
+      if(this->flags() & regex_constants::no_escape_in_lists)
+      {
+         result = *m_position++;
+         break;
+      }
+      ++m_position;
+      result = unescape_character();
+      break;
+   default:
+      result = *m_position++;
+   }
+   return result;
+}
+
+template <class charT, class traits>
+charT basic_regex_parser<charT, traits>::unescape_character()
+{
+   charT result(0);
+   if(m_position == m_end)
+      fail(REG_EESCAPE, m_position - m_base);
+   switch(this->m_traits.syntax_type(*m_position))
+   {
+   case regex_constants::escape_type_control_a:
+      result = charT('\a');
+      break;
+   case regex_constants::escape_type_e:
+      result = charT(27);
+      break;
+   case regex_constants::escape_type_control_f:
+      result = charT('\f');
+      break;
+   case regex_constants::escape_type_control_n:
+      result = charT('\n');
+      break;
+   case regex_constants::escape_type_control_r:
+      result = charT('\r');
+      break;
+   case regex_constants::escape_type_control_t:
+      result = charT('\t');
+      break;
+   case regex_constants::escape_type_control_v:
+      result = charT('\v');
+      break;
+   case regex_constants::escape_type_word_assert:
+      result = charT('\b');
+      break;
+   case regex_constants::escape_type_ascii_control:
+      ++m_position;
+      if(m_position == m_end)
+      {
+         fail(REG_EESCAPE, m_position - m_base);
+         return result;
+      }
+      if((*m_position < charT('@'))
+            || (*m_position > charT(125)) )
+      {
+         fail(REG_EESCAPE, m_position - m_base);
+         return result;
+      }
+      result = static_cast<charT>(*m_position - charT('@'));
+      break;
+   case regex_constants::escape_type_hex:
+      ++m_position;
+      if(m_position == m_end)
+      {
+         fail(REG_EESCAPE, m_position - m_base);
+         break;
+      }
+      // maybe have \x{ddd}
+      if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
+      {
+         ++m_position;
+         if(m_position == m_end)
+         {
+            fail(REG_EESCAPE, m_position - m_base);
+            break;
+         }
+         int i = this->m_traits.toi(m_position, m_end, 16);
+         if((m_position == m_end)
+            || (i < 0)
+            || (i > (std::numeric_limits<charT>::max)())
+            || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
+         {
+            fail(REG_BADBR, m_position - m_base);
+         }
+         ++m_position;
+         result = charT(i);
+      }
+      else
+      {
+         std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), m_end - m_position);
+         int i = this->m_traits.toi(m_position, m_position + len, 16);
+         if((i < 0)
+            || (i >> (sizeof(charT) * CHAR_BIT)))
+         {
+            fail(REG_EESCAPE, m_position - m_base);
+         }
+         result = charT(i);
+      }
+      return result;
+   case regex_constants::syntax_digit:
+      {
+      // an octal escape sequence, the first character must be a zero
+      // followed by up to 3 octal digits:
+      std::ptrdiff_t len = (std::min)(std::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
+      int val = this->m_traits.toi(m_position, m_position + len, 8);
+      if(val < 0) 
+         fail(REG_EESCAPE, m_position - m_base);
+      return static_cast<charT>(val);
+      }
+   default:
+      result = *m_position;
+      break;
+   }
+   ++m_position;
+   return result;
+}
+
+template <class charT, class traits>
+bool basic_regex_parser<charT, traits>::parse_backref()
+{
+   if(m_position == m_end)
+   {
+      fail(REG_EESCAPE, m_position - m_end);
+   }
+   int i = this->m_traits.toi(m_position, m_position + 1, 10);
+   if((i > 0) && (this->m_backrefs & (1u << (i-1))))
+   {
+      re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
+      pb->index = i;
+   }
+   else if(i == 0)
+   {
+      // not a backref at all but an octal escape sequence:
+      --m_position;
+      charT c = unescape_character();
+      this->append_literal(c);
+   }
+   else
+      fail(REG_ESUBREG, m_position - m_end);
+   return true;
+}
+
 } // namespace re_detail
 } // namespace boost

--- a/include/boost/regex/v4/cpp_regex_traits.hpp
+++ b/include/boost/regex/v4/cpp_regex_traits.hpp
@ -304,6 +304,27 @@ template <class charT>
 class cpp_regex_traits_implementation : public cpp_regex_traits_char_layer<charT>
 {
 public:
+   typedef typename cpp_regex_traits<charT>::char_class_type char_class_type;
+   BOOST_STATIC_CONSTANT(char_class_type, mask_blank = 1u << 16);
+   BOOST_STATIC_CONSTANT(char_class_type, mask_word = 1u << 17);
+   BOOST_STATIC_CONSTANT(char_class_type, mask_unicode = 1u << 18);
+   BOOST_STATIC_CONSTANT(char_class_type, 
+      mask_base = 
+         std::ctype<charT>::alnum 
+         | std::ctype<charT>::alpha
+         | std::ctype<charT>::cntrl
+         | std::ctype<charT>::digit
+         | std::ctype<charT>::graph
+         | std::ctype<charT>::lower
+         | std::ctype<charT>::print
+         | std::ctype<charT>::punct
+         | std::ctype<charT>::space
+         | std::ctype<charT>::upper
+         | std::ctype<charT>::xdigit);
+
+   //BOOST_STATIC_ASSERT(0 == (mask_base & (mask_word | mask_unicode)));
+
+
   typedef std::basic_string<charT> string_type;
   //cpp_regex_traits_implementation();
   cpp_regex_traits_implementation(const std::locale& l);
@ -316,10 +337,25 @@ public:
      }
      return get_default_error_string(n);
   }
+   char_class_type lookup_classname(const charT* p1, const charT* p2) const
+   {
+      char_class_type result = lookup_classname_imp(p1, p2);
+      if(result == 0)
+      {
+         string_type s(p1, p2);
+         this->m_pctype->tolower(&*s.begin(), &*s.end());
+         result = lookup_classname_imp(&*s.begin(), &*s.end());
+      }
+      return result;
+   }
   re_detail::parser_buf<charT>   m_sbuf;            // buffer for parsing numbers.
   std::basic_istream<charT>      m_is;              // stream for parsing numbers.
 private:
   std::map<int, std::string>     m_error_strings;   // error messages indexed by numberic ID
+   //
+   // helpers:
+   //
+   char_class_type lookup_classname_imp(const charT* p1, const charT* p2) const;
 };

 template <class charT>
@ -349,7 +385,7 @@ cpp_regex_traits_implementation<charT>::cpp_regex_traits_implementation(const st
   //
   if((int)cat >= 0)
   {
-      for(int i = 0; i <= boost::regex_constants::error_unknown; ++i)
+      for(boost::regex_constants::error_type i = 0; i <= boost::regex_constants::error_unknown; ++i)
      {
         const char* p = get_default_error_string(i);
         string_type default_message;
@ -369,6 +405,39 @@ cpp_regex_traits_implementation<charT>::cpp_regex_traits_implementation(const st
   }
 }

+template <class charT>
+typename cpp_regex_traits_implementation<charT>::char_class_type 
+   cpp_regex_traits_implementation<charT>::lookup_classname_imp(const charT* p1, const charT* p2) const
+{
+   static const char_class_type masks[] = 
+   {
+      0,
+      std::ctype<char>::alnum, 
+      std::ctype<char>::alpha,
+      cpp_regex_traits_implementation<charT>::mask_blank,
+      std::ctype<char>::cntrl,
+      std::ctype<char>::digit,
+      std::ctype<char>::digit,
+      std::ctype<char>::graph,
+      std::ctype<char>::lower,
+      std::ctype<char>::lower,
+      std::ctype<char>::print,
+      std::ctype<char>::punct,
+      std::ctype<char>::space,
+      std::ctype<char>::space,
+      std::ctype<char>::upper,
+      cpp_regex_traits_implementation<charT>::mask_unicode,
+      std::ctype<char>::upper,
+      std::ctype<char>::alnum | cpp_regex_traits_implementation<charT>::mask_word, 
+      std::ctype<char>::alnum | cpp_regex_traits_implementation<charT>::mask_word, 
+      std::ctype<char>::xdigit,
+   };
+   std::size_t id = 1 + re_detail::get_default_class_id(p1, p2);
+   assert(id < sizeof(masks) / sizeof(masks[0]));
+   return masks[id];
+}
+
+
 template <class charT>
 boost::shared_ptr<cpp_regex_traits_implementation<charT> > create_cpp_regex_traits(const std::locale& l BOOST_APPEND_EXPLICIT_TEMPLATE_TYPE(charT))
 {
@ -376,6 +445,15 @@ boost::shared_ptr<cpp_regex_traits_implementation<charT> > create_cpp_regex_trai
   return boost::shared_ptr<cpp_regex_traits_implementation<charT> >(new cpp_regex_traits_implementation<charT>(l));
 }

+//
+// helpers to suppress warnings:
+//
+template <class charT>
+inline bool is_extended(charT c)
+{ return c > 256; }
+inline bool is_extended(char)
+{ return false; }
+
 } // re_detail

 template <class charT>
@ -390,25 +468,6 @@ public:
   typedef std::locale                  locale_type;
   typedef boost::uint_least32_t        char_class_type;

-   BOOST_STATIC_CONSTANT(char_class_type, mask_blank = 1u << 16);
-   BOOST_STATIC_CONSTANT(char_class_type, mask_word = 1u << 17);
-   BOOST_STATIC_CONSTANT(char_class_type, mask_unicode = 1u << 18);
-   BOOST_STATIC_CONSTANT(char_class_type, 
-      mask_base = 
-         std::ctype<char>::alnum 
-         | std::ctype<char>::alpha
-         | std::ctype<char>::cntrl
-         | std::ctype<char>::digit
-         | std::ctype<char>::graph
-         | std::ctype<char>::lower
-         | std::ctype<char>::print
-         | std::ctype<char>::punct
-         | std::ctype<char>::space
-         | std::ctype<char>::upper
-         | std::ctype<char>::xdigit);
-
-   //BOOST_STATIC_ASSERT(0 == (mask_base & (mask_word | mask_unicode)));
-
   cpp_regex_traits()
      : m_pimpl(re_detail::create_cpp_regex_traits<charT>(std::locale()))
   { }
@ -438,33 +497,7 @@ public:
   }
   char_class_type lookup_classname(const charT* p1, const charT* p2) const
   {
-      static const char_class_type masks[] = 
-      {
-         0,
-         std::ctype<char>::alnum, 
-         std::ctype<char>::alpha,
-         cpp_regex_traits<charT>::mask_blank,
-         std::ctype<char>::cntrl,
-         std::ctype<char>::digit,
-         std::ctype<char>::digit,
-         std::ctype<char>::graph,
-         std::ctype<char>::lower,
-         std::ctype<char>::lower,
-         std::ctype<char>::print,
-         std::ctype<char>::punct,
-         std::ctype<char>::space,
-         std::ctype<char>::space,
-         cpp_regex_traits<charT>::mask_unicode,
-         std::ctype<char>::upper,
-         std::ctype<char>::upper,
-         std::ctype<char>::alnum | cpp_regex_traits<charT>::mask_word, 
-         std::ctype<char>::alnum | cpp_regex_traits<charT>::mask_word, 
-         std::ctype<char>::xdigit,
-      };
-      int id = re_detail::get_default_class_id(p1, p2);
-      assert(id >= -1);
-      assert(id < sizeof(masks) / sizeof(masks[0]));
-      return masks[1 + id];
+      return m_pimpl->lookup_classname(p1, p2);
   }
   string_type lookup_collatename(const charT* p1, const charT* p2) const
   {
@ -472,16 +505,17 @@ public:
   }
   bool is_class(charT c, char_class_type f) const
   {
-      if((f & cpp_regex_traits<charT>::mask_base) 
+      typedef typename std::ctype<charT>::mask ctype_mask;
+      if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_base) 
         && (m_pimpl->m_pctype->is(
-            static_cast<std::ctype<charT>::mask>(f & cpp_regex_traits<charT>::mask_base), c)))
+            static_cast<ctype_mask>(f & re_detail::cpp_regex_traits_implementation<charT>::mask_base), c)))
         return true;
-      else if((f & cpp_regex_traits<charT>::mask_unicode) && (c >= 256))
+      else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_unicode) && re_detail::is_extended(c))
         return true;
-      else if((f & cpp_regex_traits<charT>::mask_word) && (c == '_'))
+      else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_word) && (c == '_'))
         return true;
-      else if((f & cpp_regex_traits<charT>::mask_blank) 
-         && m_pimpl->m_pctype->is(static_cast<std::ctype<charT>::mask>(f & cpp_regex_traits<charT>::mask_base), c)
+      else if((f & re_detail::cpp_regex_traits_implementation<charT>::mask_blank) 
+         && m_pimpl->m_pctype->is(std::ctype<charT>::space, c)
         && !re_detail::is_separator(c))
         return true;
      return false;
@ -515,6 +549,7 @@ private:
   // catalog name handler:
   //
   static std::string& get_catalog_name_inst();
+
 #ifdef BOOST_HAS_THREADS
   static static_mutex& get_mutex_inst();
 #endif
--- a/include/boost/regex/v4/error_type.hpp
+++ b/include/boost/regex/v4/error_type.hpp
@ -23,12 +23,15 @@
 #ifndef BOOST_REGEX_ERROR_TYPE_HPP
 #define BOOST_REGEX_ERROR_TYPE_HPP

+#ifdef __cplusplus
 namespace boost{
+#endif

 //
 // start with the POSIX API versions of these:
 //
 typedef unsigned reg_error_t;
+typedef reg_error_t reg_errcode_t;  // backwards compatibility

 static const reg_error_t REG_NOERROR = 0;   /* Success.  */
 static const reg_error_t REG_NOMATCH = 1;   /* Didn't find a match (for regexec).  */
@ -57,6 +60,7 @@ static const reg_error_t REG_ESTACK = 19;   /* out of stack space */
 static const reg_error_t REG_E_UNKNOWN = 20; /* unknown error */
 static const reg_error_t REG_ENOSYS = REG_E_UNKNOWN; /* Reserved. */

+#ifdef __cplusplus
 namespace regex_constants{

 typedef ::boost::reg_error_t error_type;
@ -80,5 +84,6 @@ static const error_type error_unknown = REG_E_UNKNOWN;

 }
 }
+#endif // __cplusplus

 #endif
--- a/include/boost/regex/v4/perl_matcher.hpp
+++ b/include/boost/regex/v4/perl_matcher.hpp
@ -91,17 +91,17 @@ template <class iterator, class charT, class traits_type, class char_classT>
 iterator BOOST_REGEX_CALL re_is_set_member(iterator next, 
                          iterator last, 
                          const re_set_long<char_classT>* set_, 
-                          const basic_regex<charT, traits_type>& e)
+                          const regex_data<charT, traits_type>& e)
 {   
   const charT* p = reinterpret_cast<const charT*>(set_+1);
   iterator ptr;
   unsigned int i;
-   bool icase = e.flags() & regex_constants::icase;
+   bool icase = e.m_flags & regex_constants::icase;

   if(next == last) return next;

   typedef typename traits_type::string_type traits_string_type;
-   const traits_type& traits_inst = e.get_traits();
+   const traits_type& traits_inst = e.m_traits;
   
   // dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never
   // referenced
@ -149,17 +149,17 @@ iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
      // try and match a range, NB only a single character can match
      if(set_->cranges)
      {
-         if((e.flags() & regex_constants::collate) == 0)
+         if((e.m_flags & regex_constants::collate) == 0)
            s1.assign(1, col);
         else
            s1 = traits_inst.transform(&col, &col + 1);
         for(i = 0; i < set_->cranges; ++i)
         {
-            if(STR_COMP(s1, p) <= 0)
+            if(STR_COMP(s1, p) >= 0)
            {
               while(*p)++p;
               ++p;
-               if(STR_COMP(s1, p) >= 0)
+               if(STR_COMP(s1, p) <= 0)
                  return set_->isnot ? next : ++next;
            }
            else
@ -412,7 +412,7 @@ private:
   void push_assertion(const re_syntax_base* ps, bool positive);
   void push_alt(const re_syntax_base* ps);
   void push_repeater_count(int i, repeater_count<BidiIterator>** s);
-   void push_single_repeat(unsigned c, const re_repeat* r, BidiIterator last_position, int id);
+   void push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int id);
   void push_non_greedy_repeat(const re_syntax_base* ps);


--- a/include/boost/regex/v4/perl_matcher_common.hpp
+++ b/include/boost/regex/v4/perl_matcher_common.hpp
@ -208,10 +208,10 @@ bool perl_matcher<BidiIterator, Allocator, traits>::find_imp()
   else
   {
      // start again:
-      search_base = position = (*m_presult)[0].second;
+      search_base = position = m_result[0].second;
      // If last match was null and match_not_null was not set then increment
      // our start position, otherwise we go into an infinite loop:
-      if(((m_match_flags & match_not_null) == 0) && (m_presult->length() == 0))
+      if(((m_match_flags & match_not_null) == 0) && (m_result.length() == 0))
      {
         if(position == last)
            return false;
@ -590,7 +590,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set()
   // let the traits class do the work:
   if(position == last)
      return false;
-   BidiIterator t = re_is_set_member(position, last, static_cast<const re_set_long<char_class_type>*>(pstate), re);
+   BidiIterator t = re_is_set_member(position, last, static_cast<const re_set_long<char_class_type>*>(pstate), re.get_data());
   if(t != position)
   {
      pstate = pstate->next.p;
--- a/include/boost/regex/v4/perl_matcher_non_recursive.hpp
+++ b/include/boost/regex/v4/perl_matcher_non_recursive.hpp
@ -103,10 +103,10 @@ struct save_state_init
 template <class BidiIterator>
 struct saved_single_repeat : public saved_state
 {
-   unsigned count;
+   std::size_t count;
   const re_repeat* rep;
   BidiIterator last_position;
-   saved_single_repeat(unsigned c, const re_repeat* r, BidiIterator lp, int arg_id) 
+   saved_single_repeat(std::size_t c, const re_repeat* r, BidiIterator lp, int arg_id) 
      : saved_state(arg_id), count(c), rep(r), last_position(lp){}
 };

@ -275,7 +275,7 @@ inline void perl_matcher<BidiIterator, Allocator, traits>::push_repeater_count(i
 }

 template <class BidiIterator, class Allocator, class traits>
-inline void perl_matcher<BidiIterator, Allocator, traits>::push_single_repeat(unsigned c, const re_repeat* r, BidiIterator last_position, int id)
+inline void perl_matcher<BidiIterator, Allocator, traits>::push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int id)
 {
   saved_single_repeat<BidiIterator>* pmp = static_cast<saved_single_repeat<BidiIterator>*>(m_backup_state);
   --pmp;
@ -585,11 +585,11 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_char_repeat()
   const re_repeat* rep = static_cast<const re_repeat*>(pstate);
   assert(1 == static_cast<const re_literal*>(rep->next.p)->length);
   const char_type what = *reinterpret_cast<const char_type*>(static_cast<const re_literal*>(rep->next.p) + 1);
-   unsigned count = 0;
+   std::size_t count = 0;
   //
   // start by working out how much we can skip:
   //
-   unsigned desired = rep->greedy ? rep->max : rep->min;
+   std::size_t desired = rep->greedy ? rep->max : rep->min;
   if(::boost::is_random_access_iterator<BidiIterator>::value)
   {
      BidiIterator end = position;
@ -652,11 +652,11 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_set_repeat()
 #endif
   const re_repeat* rep = static_cast<const re_repeat*>(pstate);
   const unsigned char* map = static_cast<const re_set*>(rep->next.p)->_map;
-   unsigned count = 0;
+   std::size_t count = 0;
   //
   // start by working out how much we can skip:
   //
-   unsigned desired = rep->greedy ? rep->max : rep->min;
+   std::size_t desired = rep->greedy ? rep->max : rep->min;
   if(::boost::is_random_access_iterator<BidiIterator>::value)
   {
      BidiIterator end = position;
@ -719,17 +719,17 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
 #endif
   const re_repeat* rep = static_cast<const re_repeat*>(pstate);
   const re_set_long<typename traits::char_class_type>* set = static_cast<const re_set_long<typename traits::char_class_type>*>(pstate->next.p);
-   unsigned count = 0;
+   std::size_t count = 0;
   //
   // start by working out how much we can skip:
   //
-   unsigned desired = rep->greedy ? rep->max : rep->min;
+   std::size_t desired = rep->greedy ? rep->max : rep->min;
   if(::boost::is_random_access_iterator<BidiIterator>::value)
   {
      BidiIterator end = position;
      std::advance(end, (std::min)((unsigned)re_detail::distance(position, last), desired));
      BidiIterator origin(position);
-      while((position != end) && (position != re_is_set_member(position, last, set, re)))
+      while((position != end) && (position != re_is_set_member(position, last, set, re.get_data())))
      {
         ++position;
      }
@ -737,7 +737,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
   }
   else
   {
-      while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re)))
+      while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data())))
      {
         ++position;
         ++count;
@ -926,7 +926,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_greedy_single_repeat(
   }

   const re_repeat* rep = pmp->rep;
-   unsigned count = pmp->count;
+   std::size_t count = pmp->count;
   assert(rep->next.p);
   assert(rep->alt.p);

@ -975,7 +975,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_slow_dot_repeat(bool
   }

   const re_repeat* rep = pmp->rep;
-   unsigned count = pmp->count;
+   std::size_t count = pmp->count;
   assert(rep->type == syntax_element_dot_rep);
   assert(rep->next.p);
   assert(rep->alt.p);
@ -1037,7 +1037,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_fast_dot_repeat(bool
   }

   const re_repeat* rep = pmp->rep;
-   unsigned count = pmp->count;
+   std::size_t count = pmp->count;

   assert(count < rep->max);
   position = pmp->last_position;
@ -1089,7 +1089,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_char_repeat(bool r)
   }

   const re_repeat* rep = pmp->rep;
-   unsigned count = pmp->count;
+   std::size_t count = pmp->count;
   pstate = rep->next.p;
   const char_type what = *reinterpret_cast<const char_type*>(static_cast<const re_literal*>(pstate) + 1);
   position = pmp->last_position;
@ -1153,7 +1153,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_short_set_repeat(bool
   }

   const re_repeat* rep = pmp->rep;
-   unsigned count = pmp->count;
+   std::size_t count = pmp->count;
   pstate = rep->next.p;
   const unsigned char* map = static_cast<const re_set*>(rep->next.p)->_map;
   position = pmp->last_position;
@ -1217,7 +1217,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_long_set_repeat(bool
   }

   const re_repeat* rep = pmp->rep;
-   unsigned count = pmp->count;
+   std::size_t count = pmp->count;
   pstate = rep->next.p;
   const re_set_long<typename traits::char_class_type>* set = static_cast<const re_set_long<typename traits::char_class_type>*>(pstate);
   position = pmp->last_position;
@ -1234,7 +1234,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::unwind_long_set_repeat(bool
      // wind forward until we can skip out of the repeat:
      do
      {
-         if(position == re_is_set_member(position, last, set, re))
+         if(position == re_is_set_member(position, last, set, re.get_data()))
         {
            // failed repeat match, discard this state and look for another:
            destroy_single_repeat();
--- a/include/boost/regex/v4/perl_matcher_recursive.hpp
+++ b/include/boost/regex/v4/perl_matcher_recursive.hpp
@ -637,7 +637,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
      BidiIterator end = position;
      std::advance(end, (std::min)((unsigned)re_detail::distance(position, last), desired));
      BidiIterator origin(position);
-      while((position != end) && (position != re_is_set_member(position, last, set, re)))
+      while((position != end) && (position != re_is_set_member(position, last, set, re.get_data())))
      {
         ++position;
      }
@ -645,7 +645,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
   }
   else
   {
-      while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re)))
+      while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data())))
      {
         ++position;
         ++count;
@ -665,7 +665,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
   {
      while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip))
      {
-         if(position != re_is_set_member(position, last, set, re))
+         if(position != re_is_set_member(position, last, set, re.get_data()))
         {
            ++position;
            ++count;
@ -685,7 +685,7 @@ bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set_repeat()
      if(position == last)
         return false;
      position = save_pos;
-      if(position != re_is_set_member(position, last, set, re))
+      if(position != re_is_set_member(position, last, set, re.get_data()))
      {
         ++position;
         ++count;
--- a/include/boost/regex/v4/regbase.hpp
+++ b/include/boost/regex/v4/regbase.hpp
@ -76,7 +76,7 @@ public:
      


-      basic = basic_syntax_group | collate,
+      basic = basic_syntax_group | collate | no_escape_in_lists,
      extended = no_bk_refs | collate | no_perl_ex | no_escape_in_lists,
      normal = 0,
      emacs = basic | no_char_classes | no_intervals,
@ -123,6 +123,8 @@ namespace regex_constants{
      bk_plus_qm = ::boost::regbase::bk_plus_qm,
      bk_vbar = ::boost::regbase::bk_vbar,
      no_intervals = ::boost::regbase::no_intervals,
+      no_char_classes = ::boost::regbase::no_char_classes,
+      no_escape_in_lists = ::boost::regbase::no_escape_in_lists,

      basic = ::boost::regbase::basic,
      extended = ::boost::regbase::extended,
--- a/include/boost/regex/v4/regex_grep.hpp
+++ b/include/boost/regex/v4/regex_grep.hpp
@ -54,23 +54,22 @@ inline unsigned int regex_grep(Predicate foo,
         return count; // we've reached the end, don't try and find an extra null match.
      if(m.length() == 0)
      {
+         if(m[0].second == last)
+            return count;
         // we found a NULL-match, now try to find
         // a non-NULL one at the same position:
-         BidiIterator last_end(m[0].second);
-         if(last_end == last)
-            return count;
+         match_results<BidiIterator, match_allocator_type> m2(m);
         matcher.setf(match_not_null | match_continuous);
         if(matcher.find())
         {
            ++count;
-            last_end = m[0].second;
            if(0 == foo(m))
               return count;
         }
         else
         {
            // reset match back to where it was:
-            m.set_second(last_end);
+            m = m2;
         }
         matcher.unsetf((match_not_null | match_continuous) & ~flags);
      }
--- a/include/boost/regex/v4/regex_traits_defaults.hpp
+++ b/include/boost/regex/v4/regex_traits_defaults.hpp
@ -132,17 +132,19 @@ int get_default_class_id(const charT* p1, const charT* p2)
      {data+40, data+45,}, // punct
      {data+45, data+46,}, // s
      {data+45, data+50,}, // space
-      {data+50, data+57,}, // unicode
      {data+57, data+58,}, // u
+      {data+50, data+57,}, // unicode
      {data+57, data+62,}, // upper
      {data+62, data+63,}, // w
      {data+62, data+66,}, // word
      {data+66, data+72,}, // xdigit
   };
+   static const character_pointer_range<charT>* ranges_begin = ranges;
+   static const character_pointer_range<charT>* ranges_end = ranges + (sizeof(ranges)/sizeof(ranges[0]));
   
   character_pointer_range<charT> t = { p1, p2, };
-   const character_pointer_range<charT>* p = std::lower_bound(ranges, ranges + (sizeof(ranges)/sizeof(ranges[0])), t);
-   if(t == *p)
+   const character_pointer_range<charT>* p = std::lower_bound(ranges_begin, ranges_end, t);
+   if((p != ranges_end) && (t == *p))
      return static_cast<int>(p - ranges);
   return -1;
 }
--- a/include/boost/regex/v4/states.hpp
+++ b/include/boost/regex/v4/states.hpp
@ -222,11 +222,15 @@ enum re_jump_size_type
 /*** proc re_is_set_member *********************************************
 Forward declaration: we'll need this one later...
 ***********************************************************************/
+
+template<class charT, class traits>
+struct regex_data;
+
 template <class iterator, class charT, class traits_type, class char_classT>
 iterator BOOST_REGEX_CALL re_is_set_member(iterator next, 
                          iterator last, 
                          const re_set_long<char_classT>* set_, 
-                          const basic_regex<charT, traits_type>& e);
+                          const regex_data<charT, traits_type>& e);

 } // namespace re_detail

--- a/src/regex_traits_defaults.cpp
+++ b/src/regex_traits_defaults.cpp
@ -49,8 +49,8 @@ const char* get_default_syntax(regex_constants::syntax_type n)
         ">",
         "",
         "",
-         "A",
-         "z",
+         "A`",
+         "z'",
         "\n",
         ",",
         "a",
--- a/test/Jamfile
+++ b/test/Jamfile
@ -55,9 +55,8 @@ template test-dll
 #
 template regression-dll
    : <template>test-dll                    # sources
-      regress/parse.cpp
-      regress/regress.cpp 
-      regress/tests.cpp
+      regress/main.cpp
+      regress/basic_tests.cpp 
      <lib>../../test/build/boost_prg_exec_monitor
    ;

@ -66,14 +65,7 @@ test-suite regex
      [ regex-test regex_regress
          : <template>regression     # sources
          :                          # requirements
-          : regress/tests.txt        # input files
-      ]
-
-      [ regex-test regex_wide_regress
-          : <template>regression     # sources
-            <template>../build/msvc-stlport-tricky
-          : <define>TEST_UNICODE=1   # requirements
-          : regress/tests.txt        # input files
+          :                          # input files
      ]

      [ regex-test posix_api_check
@ -115,15 +107,8 @@ test-suite regex

      [ regex-test regex_regress_dll
          : <template>regression-dll     # sources
-          :                          # requirements
-          : regress/tests.txt        # input files
-      ]
-
-      [ regex-test regex_wide_regress_dll
-          : <template>regression-dll     # sources
-            <template>../build/msvc-stlport-tricky
-          : <define>TEST_UNICODE=1   # requirements
-          : regress/tests.txt        # input files
+          :                              # requirements
+          :                              # input files
      ]

      [ compile concepts/concept_check.cpp
--- a/test/captures/Jamfile
+++ b/test/captures/Jamfile
@ -16,7 +16,7 @@ EX_SOURCES =
   wide_posix_api.cpp
   winstances.cpp ;
       
-lib boost_regex_extra : ../../src/$(EX_SOURCES).cpp <template>../../build/regex-options
+lib boost_regex_extra : ../../src/$(EX_SOURCES) <template>../../build/regex-options
    : 
        <define>BOOST_REGEX_MATCH_EXTRA=1
    : 
--- a/test/regress/basic_tests.cpp
+++ b/test/regress/basic_tests.cpp
@ -181,6 +181,11 @@ void basic_tests()
   TEST_INVALID_REGEX("a\\{1,b\\}", basic);
   TEST_INVALID_REGEX("a\\{1,2v\\}", basic);

+}
+
+void test_alt()
+{
+   using namespace boost::regex_constants;
   // now test the alternation operator |
   TEST_REGEX_SEARCH("a|b", perl, "a", match_default, make_array(0, 1, -2, -2));
   TEST_REGEX_SEARCH("a|b", perl, "b", match_default, make_array(0, 1, -2, -2));
@ -205,441 +210,430 @@ void basic_tests()
   TEST_REGEX_SEARCH("a|", basic|bk_vbar, "a|", match_default, make_array(0, 2, -2, -2));
   TEST_REGEX_SEARCH("a\\|b", basic|bk_vbar, "a", match_default, make_array(0, 1, -2, -2));
   TEST_REGEX_SEARCH("a\\|b", basic|bk_vbar, "b", match_default, make_array(0, 1, -2, -2));
+}
+
+void test_sets()
+{
+   using namespace boost::regex_constants;
+   // now test the set operator []
+   TEST_REGEX_SEARCH("[abc]", extended, "a", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("[abc]", extended, "b", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("[abc]", extended, "c", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("[abc]", extended, "d", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("[^bcd]", extended, "a", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("[^bcd]", extended, "b", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("[^bcd]", extended, "d", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("[^bcd]", extended, "e", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("a[b]c", extended, "abc", match_default, make_array(0, 3, -2, -2));
+   TEST_REGEX_SEARCH("a[ab]c", extended, "abc", match_default, make_array(0, 3, -2, -2));
+   TEST_REGEX_SEARCH("a[^ab]c", extended, "adc", match_default, make_array(0, 3, -2, -2));
+   TEST_REGEX_SEARCH("a[]b]c", extended, "a]c", match_default, make_array(0, 3, -2, -2));
+   TEST_REGEX_SEARCH("a[[b]c", extended, "a[c", match_default, make_array(0, 3, -2, -2));
+   TEST_REGEX_SEARCH("a[-b]c", extended, "a-c", match_default, make_array(0, 3, -2, -2));
+   TEST_REGEX_SEARCH("a[^]b]c", extended, "adc", match_default, make_array(0, 3, -2, -2));
+   TEST_REGEX_SEARCH("a[^-b]c", extended, "adc", match_default, make_array(0, 3, -2, -2));
+   TEST_REGEX_SEARCH("a[b-]c", extended, "a-c", match_default, make_array(0, 3, -2, -2));
+   TEST_INVALID_REGEX("a[b", extended);
+   TEST_INVALID_REGEX("a[]", extended);
+
+   // now some ranges:
+   TEST_REGEX_SEARCH("[b-e]", extended, "a", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("[b-e]", extended, "b", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("[b-e]", extended, "e", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("[b-e]", extended, "f", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("[^b-e]", extended, "a", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("[^b-e]", extended, "b", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("[^b-e]", extended, "e", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("[^b-e]", extended, "f", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("a[1-3]c", extended, "a2c", match_default, make_array(0, 3, -2, -2));
+   TEST_REGEX_SEARCH("a[-3]c", extended, "a-c", match_default, make_array(0, 3, -2, -2));
+   TEST_REGEX_SEARCH("a[-3]c", extended, "a3c", match_default, make_array(0, 3, -2, -2));
+   TEST_REGEX_SEARCH("a[^-3]c", extended, "a-c", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("a[^-3]c", extended, "a3c", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("a[^-3]c", extended, "axc", match_default, make_array(0, 3, -2, -2));
+   TEST_INVALID_REGEX("a[3-1]c", extended);
+   TEST_INVALID_REGEX("a[1-3-5]c", extended);
+   TEST_INVALID_REGEX("a[1-", extended);
+
+   // and some classes
+   TEST_REGEX_SEARCH("a[[:alpha:]]c", extended, "abc", match_default, make_array(0, 3, -2, -2));
+   TEST_INVALID_REGEX("a[[:unknown:]]c", extended);
+   TEST_INVALID_REGEX("a[[:", extended);
+   TEST_INVALID_REGEX("a[[:alpha", extended);
+   TEST_INVALID_REGEX("a[[:alpha:]", extended);
+   TEST_INVALID_REGEX("a[[:alpha,:]", extended);
+   TEST_INVALID_REGEX("a[[:]:]]b", extended);
+   TEST_INVALID_REGEX("a[[:-:]]b", extended);
+   TEST_INVALID_REGEX("a[[:alph:]]", extended);
+   TEST_INVALID_REGEX("a[[:alphabet:]]", extended);
+   TEST_REGEX_SEARCH("[[:alnum:]]+", extended, "-%@a0X_-", match_default, make_array(3, 6, -2, -2));
+   TEST_REGEX_SEARCH("[[:alpha:]]+", extended, " -%@aX_0-", match_default, make_array(4, 6, -2, -2));
+   TEST_REGEX_SEARCH("[[:blank:]]+", extended, "a  \tb", match_default, make_array(1, 4, -2, -2));
+   TEST_REGEX_SEARCH("[[:cntrl:]]+", extended, " a\n\tb", match_default, make_array(2, 4, -2, -2));
+   TEST_REGEX_SEARCH("[[:digit:]]+", extended, "a019b", match_default, make_array(1, 4, -2, -2));
+   TEST_REGEX_SEARCH("[[:graph:]]+", extended, " a%b ", match_default, make_array(1, 4, -2, -2));
+   TEST_REGEX_SEARCH("[[:lower:]]+", extended, "AabC", match_default, make_array(1, 3, -2, -2));
+   TEST_REGEX_SEARCH("[[:print:]]+", extended, "AabC", match_default, make_array(0, 4, -2, -2));
+   TEST_REGEX_SEARCH("[[:punct:]]+", extended, " %-&\t", match_default, make_array(1, 4, -2, -2));
+   TEST_REGEX_SEARCH("[[:space:]]+", extended, "a \n\t\rb", match_default, make_array(1, 5, -2, -2));
+   TEST_REGEX_SEARCH("[[:upper:]]+", extended, "aBCd", match_default, make_array(1, 3, -2, -2));
+   TEST_REGEX_SEARCH("[[:xdigit:]]+", extended, "p0f3Cx", match_default, make_array(1, 5, -2, -2));
+
+   //
+   // escapes are supported in character classes if we have either
+   // perl or awk regular expressions:
+   //
+   TEST_REGEX_SEARCH("[\\n]", perl, "\n", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("[\\n]", basic, "\n", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("[\\n]", basic, "\\", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("[[:class:]", basic|no_char_classes, ":", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("[[:class:]", basic|no_char_classes, "[", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("[[:class:]", basic|no_char_classes, "c", match_default, make_array(0, 1, -2, -2));
+   //
+   // test single character escapes:
+   //
+   TEST_REGEX_SEARCH("\\w", perl, "A", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\w", perl, "Z", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\w", perl, "a", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\w", perl, "z", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\w", perl, "_", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\w", perl, "}", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("\\w", perl, "`", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("\\w", perl, "[", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("\\w", perl, "@", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("\\W", perl, "a", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("\\W", perl, "z", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("\\W", perl, "A", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("\\W", perl, "Z", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("\\W", perl, "_", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("\\W", perl, "}", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\W", perl, "`", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\W", perl, "[", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\W", perl, "@", match_default, make_array(0, 1, -2, -2));
+}
+
+void test_anchors()
+{
+   // line anchors:
+   using namespace boost::regex_constants;
+   TEST_REGEX_SEARCH("^ab", extended, "ab", match_default, make_array(0, 2, -2, -2));
+   TEST_REGEX_SEARCH("^ab", extended, "xxabxx", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("^ab", extended, "xx\nabzz", match_default, make_array(3, 5, -2, -2));
+   TEST_REGEX_SEARCH("ab$", extended, "ab", match_default, make_array(0, 2, -2, -2));
+   TEST_REGEX_SEARCH("ab$", extended, "abxx", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("ab$", extended, "ab\nzz", match_default, make_array(0, 2, -2, -2));
+
+   TEST_REGEX_SEARCH("^ab", extended, "ab", match_default | match_not_bol | match_not_eol, make_array(-2, -2));
+   TEST_REGEX_SEARCH("^ab", extended, "xxabxx", match_default | match_not_bol | match_not_eol, make_array(-2, -2));
+   TEST_REGEX_SEARCH("^ab", extended, "xx\nabzz", match_default | match_not_bol | match_not_eol, make_array(3, 5, -2, -2));
+   TEST_REGEX_SEARCH("ab$", extended, "ab", match_default | match_not_bol | match_not_eol, make_array(-2, -2));
+   TEST_REGEX_SEARCH("ab$", extended, "abxx", match_default | match_not_bol | match_not_eol, make_array(-2, -2));
+   TEST_REGEX_SEARCH("ab$", extended, "ab\nzz", match_default | match_not_bol | match_not_eol, make_array(0, 2, -2, -2));
+
+   TEST_REGEX_SEARCH("^ab", extended, "ab", match_default | match_single_line, make_array(0, 2, -2, -2));
+   TEST_REGEX_SEARCH("^ab", extended, "xxabxx", match_default | match_single_line, make_array(-2, -2));
+   TEST_REGEX_SEARCH("^ab", extended, "xx\nabzz", match_default | match_single_line, make_array(-2, -2));
+   TEST_REGEX_SEARCH("ab$", extended, "ab", match_default | match_single_line, make_array(0, 2, -2, -2));
+   TEST_REGEX_SEARCH("ab$", extended, "abxx", match_default | match_single_line, make_array(-2, -2));
+   TEST_REGEX_SEARCH("ab$", extended, "ab\nzz", match_default | match_single_line, make_array(-2, -2));
+
+   TEST_REGEX_SEARCH("^ab", extended, "ab", match_default | match_not_bol | match_not_eol | match_single_line, make_array(-2, -2));
+   TEST_REGEX_SEARCH("^ab", extended, "xxabxx", match_default | match_not_bol | match_not_eol | match_single_line, make_array(-2, -2));
+   TEST_REGEX_SEARCH("^ab", extended, "xx\nabzz", match_default | match_not_bol | match_not_eol | match_single_line, make_array(-2, -2));
+   TEST_REGEX_SEARCH("ab$", extended, "ab", match_default | match_not_bol | match_not_eol | match_single_line, make_array(-2, -2));
+   TEST_REGEX_SEARCH("ab$", extended, "abxx", match_default | match_not_bol | match_not_eol | match_single_line, make_array(-2, -2));
+   TEST_REGEX_SEARCH("ab$", extended, "ab\nzz", match_default | match_not_bol | match_not_eol | match_single_line, make_array(-2, -2));
+}
+
+void test_backrefs()
+{
+   using namespace boost::regex_constants;
+   TEST_INVALID_REGEX("a(b)\\2c", perl);
+   TEST_INVALID_REGEX("a(b\\1)c", perl);
+   TEST_REGEX_SEARCH("a(b*)c\\1d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
+   TEST_REGEX_SEARCH("a(b*)c\\1d", perl, "abbcbd", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("a(b*)c\\1d", perl, "abbcbbbd", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("^(.)\\1", perl, "abc", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("a([bc])\\1d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
+   // strictly speaking this is at best ambiguous, at worst wrong, this is what most
+   // re implimentations will match though.
+   TEST_REGEX_SEARCH("a(([bc])\\2)*d", perl, "abbccd", match_default, make_array(0, 6, 3, 5, 3, 4, -2, -2));
+   TEST_REGEX_SEARCH("a(([bc])\\2)*d", perl, "abbcbd", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("a((b)*\\2)*d", perl, "abbbd", match_default, make_array(0, 5, 1, 4, 2, 3, -2, -2));
+   TEST_REGEX_SEARCH("(ab*)[ab]*\\1", perl, "ababaaa", match_default, make_array(0, 4, 0, 2, -2, -2));
+   TEST_REGEX_SEARCH("(a)\\1bcd", perl, "aabcd", match_default, make_array(0, 5, 0, 1, -2, -2));
+   TEST_REGEX_SEARCH("(a)\\1bc*d", perl, "aabcd", match_default, make_array(0, 5, 0, 1, -2, -2));
+   TEST_REGEX_SEARCH("(a)\\1bc*d", perl, "aabd", match_default, make_array(0, 4, 0, 1, -2, -2));
+   TEST_REGEX_SEARCH("(a)\\1bc*d", perl, "aabcccd", match_default, make_array(0, 7, 0, 1, -2, -2));
+   TEST_REGEX_SEARCH("(a)\\1bc*[ce]d", perl, "aabcccd", match_default, make_array(0, 7, 0, 1, -2, -2));
+   TEST_REGEX_SEARCH("^(a)\\1b(c)*cd$", perl, "aabcccd", match_default, make_array(0, 7, 0, 1, 4, 5, -2, -2));
+   TEST_REGEX_SEARCH("(ab*)[ab]*\\1", extended, "ababaaa", match_default, make_array(0, 7, 0, 1, -2, -2));
+}
+
+void test_character_escapes()
+{
+   using namespace boost::regex_constants;
+   // characters by code
+   TEST_REGEX_SEARCH("\\0101", perl, "A", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\00", perl, "\0", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\0", perl, "\0", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\0172", perl, "z", match_default, make_array(0, 1, -2, -2));
+   // extra escape sequences:
+   TEST_REGEX_SEARCH("\\a", perl, "\a", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\f", perl, "\f", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\n", perl, "\n", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\r", perl, "\r", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\v", perl, "\v", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\t", perl, "\t", match_default, make_array(0, 1, -2, -2));
+
+   // updated tests for version 2:
+   TEST_REGEX_SEARCH("\\x41", perl, "A", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\xff", perl, "\xff", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\xFF", perl, "\xff", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\c@", perl, "\0", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\cA", perl, "\x1", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\cz", perl, "\x3A", match_default, make_array(0, 1, -2, -2));
+   TEST_INVALID_REGEX("\\c=", extended);
+   TEST_INVALID_REGEX("\\c?", extended);
+   TEST_REGEX_SEARCH("=:", perl, "=:", match_default, make_array(0, 2, -2, -2));
+}
+
+void test_assertion_escapes()
+{
+   using namespace boost::regex_constants;
+   // word start:
+   TEST_REGEX_SEARCH("\\<abcd", perl, "  abcd", match_default, make_array(2, 6, -2, -2));
+   TEST_REGEX_SEARCH("\\<ab", perl, "cab", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("\\<ab", perl, "\nab", match_default, make_array(1, 3, -2, -2));
+   TEST_REGEX_SEARCH("\\<tag", perl, "::tag", match_default, make_array(2, 5, -2, -2));
+   // word end:
+   TEST_REGEX_SEARCH("abc\\>", perl, "abc", match_default, make_array(0, 3, -2, -2));
+   TEST_REGEX_SEARCH("abc\\>", perl, "abcd", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("abc\\>", perl, "abc\n", match_default, make_array(0, 3, -2, -2));
+   TEST_REGEX_SEARCH("abc\\>", perl, "abc::", match_default, make_array(0,3, -2, -2));
+   // word boundary:
+   TEST_REGEX_SEARCH("\\babcd", perl, "  abcd", match_default, make_array(2, 6, -2, -2));
+   TEST_REGEX_SEARCH("\\bab", perl, "cab", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("\\bab", perl, "\nab", match_default, make_array(1, 3, -2, -2));
+   TEST_REGEX_SEARCH("\\btag", perl, "::tag", match_default, make_array(2, 5, -2, -2));
+   TEST_REGEX_SEARCH("abc\\b", perl, "abc", match_default, make_array(0, 3, -2, -2));
+   TEST_REGEX_SEARCH("abc\\b", perl, "abcd", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("abc\\b", perl, "abc\n", match_default, make_array(0, 3, -2, -2));
+   TEST_REGEX_SEARCH("abc\\b", perl, "abc::", match_default, make_array(0, 3, -2, -2));
+   // within word:
+   TEST_REGEX_SEARCH("\\B", perl, "ab", match_default, make_array(1, 1, -2, -2));
+   TEST_REGEX_SEARCH("a\\Bb", perl, "ab", match_default, make_array(0, 2, -2, -2));
+   TEST_REGEX_SEARCH("a\\B", perl, "ab", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("a\\B", perl, "a", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("a\\B", perl, "a ", match_default, make_array(-2, -2));
+   // buffer operators:
+   TEST_REGEX_SEARCH("\\`abc", perl, "abc", match_default, make_array(0, 3, -2, -2));
+   TEST_REGEX_SEARCH("\\`abc", perl, "\nabc", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("\\`abc", perl, " abc", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("abc\\'", perl, "abc", match_default, make_array(0, 3, -2, -2));
+   TEST_REGEX_SEARCH("abc\\'", perl, "abc\n", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("abc\\'", perl, "abc ", match_default, make_array(-2, -2));
+
+   // word start:
+   TEST_REGEX_SEARCH("[[:<:]]abcd", perl, "  abcd", match_default, make_array(2, 6, -2, -2));
+   TEST_REGEX_SEARCH("[[:<:]]ab", perl, "cab", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("[[:<:]]ab", perl, "\nab", match_default, make_array(1, 3, -2, -2));
+   TEST_REGEX_SEARCH("[[:<:]]tag", perl, "::tag", match_default, make_array(2, 5, -2, -2));
+   // word end
+   TEST_REGEX_SEARCH("abc[[:>:]]", perl, "abc", match_default, make_array(0, 3, -2, -2));
+   TEST_REGEX_SEARCH("abc[[:>:]]", perl, "abcd", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("abc[[:>:]]", perl, "abc\n", match_default, make_array(0, 3, -2, -2));
+   TEST_REGEX_SEARCH("abc[[:>:]]", perl, "abc::", match_default, make_array(0, 3, -2, -2));
+}
+
+void test_tricky_cases()
+{
+   using namespace boost::regex_constants;
+   //TEST_REGEX_SEARCH("", perl, "", match_default, make_array(-2, -2));
+   //
+   // now follows various complex expressions designed to try and bust the matcher:
+   //
+   TEST_REGEX_SEARCH("a(((b)))c", perl, "abc", match_default, make_array(0, 3, 1, 2, 1, 2, 1, 2, -2, -2));
+   TEST_REGEX_SEARCH("a(b|(c))d", perl, "abd", match_default, make_array(0, 3, 1, 2, -1, -1, -2, -2));
+   TEST_REGEX_SEARCH("a(b|(c))d", perl, "acd", match_default, make_array(0, 3, 1, 2, 1, 2, -2, -2));
+   TEST_REGEX_SEARCH("a(b*|c)d", perl, "abbd", match_default, make_array(0, 4, 1, 3, -2, -2));
+   // just gotta have one DFA-buster, of course
+   TEST_REGEX_SEARCH("a[ab]{20}", perl, "aaaaabaaaabaaaabaaaab", match_default, make_array(0, 21, -2, -2));
+   // and an inline expansion in case somebody gets tricky
+   TEST_REGEX_SEARCH("a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab]", perl, "aaaaabaaaabaaaabaaaab", match_default, make_array(0, 21, -2, -2));
+   // and in case somebody just slips in an NFA...
+   TEST_REGEX_SEARCH("a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night)", perl, "aaaaabaaaabaaaabaaaabweeknights", match_default, make_array(0, 31, 21, 24, 24, 31, -2, -2));
+   // one really big one
+   TEST_REGEX_SEARCH("1234567890123456789012345678901234567890123456789012345678901234567890", perl, "a1234567890123456789012345678901234567890123456789012345678901234567890b", match_default, make_array(1, 71, -2, -2));
+   // fish for problems as brackets go past 8
+   TEST_REGEX_SEARCH("[ab][cd][ef][gh][ij][kl][mn]", perl, "xacegikmoq", match_default, make_array(1, 8, -2, -2));
+   TEST_REGEX_SEARCH("[ab][cd][ef][gh][ij][kl][mn][op]", perl, "xacegikmoq", match_default, make_array(1, 9, -2, -2));
+   TEST_REGEX_SEARCH("[ab][cd][ef][gh][ij][kl][mn][op][qr]", perl, "xacegikmoqy", match_default, make_array(1, 10, -2, -2));
+   TEST_REGEX_SEARCH("[ab][cd][ef][gh][ij][kl][mn][op][q]", perl, "xacegikmoqy", match_default, make_array(1, 10, -2, -2));
+   // and as parenthesis go past 9:
+   TEST_REGEX_SEARCH("(a)(b)(c)(d)(e)(f)(g)(h)", perl, "zabcdefghi", match_default, make_array(1, 9, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, -2, -2));
+   TEST_REGEX_SEARCH("(a)(b)(c)(d)(e)(f)(g)(h)(i)", perl, "zabcdefghij", match_default, make_array(1, 10, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, -2, -2));
+   TEST_REGEX_SEARCH("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)", perl, "zabcdefghijk", match_default, make_array(1, 11, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, -2, -2));
+   TEST_REGEX_SEARCH("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)", perl, "zabcdefghijkl", match_default, make_array(1, 12, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, -2, -2));
+   TEST_REGEX_SEARCH("(a)d|(b)c", perl, "abc", match_default, make_array(1, 3, -1, -1, 1, 2, -2, -2));
+   TEST_REGEX_SEARCH("_+((www)|(ftp)|(mailto)):_*", perl, "_wwwnocolon _mailto:", match_default, make_array(12, 20, 13, 19, -1, -1, -1, -1, 13, 19, -2, -2));
+   // subtleties of matching
+   TEST_REGEX_SEARCH("a(b)?c\\1d", perl, "acd", match_default, make_array(0, 3, -1, -1, -2, -2));
+   TEST_REGEX_SEARCH("a(b?c)+d", perl, "accd", match_default, make_array(0, 4, 2, 3, -2, -2));
+   TEST_REGEX_SEARCH("(wee|week)(knights|night)", perl, "weeknights", match_default, make_array(0, 10, 0, 3, 3, 10, -2, -2));
+   TEST_REGEX_SEARCH(".*", perl, "abc", match_default, make_array(0, 3, -2, -2));
+   TEST_REGEX_SEARCH("a(b|(c))d", perl, "abd", match_default, make_array(0, 3, 1, 2, -1, -1, -2, -2));
+   TEST_REGEX_SEARCH("a(b|(c))d", perl, "acd", match_default, make_array(0, 3, 1, 2, 1, 2, -2, -2));
+   TEST_REGEX_SEARCH("a(b*|c|e)d", perl, "abbd", match_default, make_array(0, 4, 1, 3, -2, -2));
+   TEST_REGEX_SEARCH("a(b*|c|e)d", perl, "acd", match_default, make_array(0, 3, 1, 2, -2, -2));
+   TEST_REGEX_SEARCH("a(b*|c|e)d", perl, "ad", match_default, make_array(0, 2, 1, 1, -2, -2));
+   TEST_REGEX_SEARCH("a(b?)c", perl, "abc", match_default, make_array(0, 3, 1, 2, -2, -2));
+   TEST_REGEX_SEARCH("a(b?)c", perl, "ac", match_default, make_array(0, 2, 1, 1, -2, -2));
+   TEST_REGEX_SEARCH("a(b+)c", perl, "abc", match_default, make_array(0, 3, 1, 2, -2, -2));
+   TEST_REGEX_SEARCH("a(b+)c", perl, "abbbc", match_default, make_array(0, 5, 1, 4, -2, -2));
+   TEST_REGEX_SEARCH("a(b*)c", perl, "ac", match_default, make_array(0, 2, 1, 1, -2, -2));
+   TEST_REGEX_SEARCH("(a|ab)(bc([de]+)f|cde)", perl, "abcdef", match_default, make_array(0, 6, 0, 1, 1, 6, 3, 5, -2, -2));
+   TEST_REGEX_SEARCH("a([bc]?)c", perl, "abc", match_default, make_array(0, 3, 1, 2, -2, -2));
+   TEST_REGEX_SEARCH("a([bc]?)c", perl, "ac", match_default, make_array(0, 2, 1, 1, -2, -2));
+   TEST_REGEX_SEARCH("a([bc]+)c", perl, "abc", match_default, make_array(0, 3, 1, 2, -2, -2));
+   TEST_REGEX_SEARCH("a([bc]+)c", perl, "abcc", match_default, make_array(0, 4, 1, 3, -2, -2));
+   TEST_REGEX_SEARCH("a([bc]+)bc", perl, "abcbc", match_default, make_array(0, 5, 1, 3, -2, -2));
+   TEST_REGEX_SEARCH("a(bb+|b)b", perl, "abb", match_default, make_array(0, 3, 1, 2, -2, -2));
+   TEST_REGEX_SEARCH("a(bbb+|bb+|b)b", perl, "abb", match_default, make_array(0, 3, 1, 2, -2, -2));
+   TEST_REGEX_SEARCH("a(bbb+|bb+|b)b", perl, "abbb", match_default, make_array(0, 4, 1, 3, -2, -2));
+   TEST_REGEX_SEARCH("a(bbb+|bb+|b)bb", perl, "abbb", match_default, make_array(0, 4, 1, 2, -2, -2));
+   TEST_REGEX_SEARCH("(.*).*", perl, "abcdef", match_default, make_array(0, 6, 0, 6, -2, -2));
+   TEST_REGEX_SEARCH("(a*)*", perl, "bc", match_default, make_array(0, 0, 0, 0, -2, -2));
+   TEST_REGEX_SEARCH("xyx*xz", perl, "xyxxxxyxxxz", match_default, make_array(5, 11, -2, -2));
+   // do we get the right subexpression when it is used more than once?
+   TEST_REGEX_SEARCH("a(b|c)*d", perl, "ad", match_default, make_array(0, 2, -1, -1, -2, -2));
+   TEST_REGEX_SEARCH("a(b|c)*d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
+   TEST_REGEX_SEARCH("a(b|c)+d", perl, "abd", match_default, make_array(0, 3, 1, 2, -2, -2));
+   TEST_REGEX_SEARCH("a(b|c)+d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
+   TEST_REGEX_SEARCH("a(b|c?)+d", perl, "ad", match_default, make_array(0, 2, 1, 1, -2, -2));
+   TEST_REGEX_SEARCH("a(b|c){0,0}d", perl, "ad", match_default, make_array(0, 2, -1, -1, -2, -2));
+   TEST_REGEX_SEARCH("a(b|c){0,1}d", perl, "ad", match_default, make_array(0, 2, -1, -1, -2, -2));
+   TEST_REGEX_SEARCH("a(b|c){0,1}d", perl, "abd", match_default, make_array(0, 3, 1, 2, -2, -2));
+   TEST_REGEX_SEARCH("a(b|c){0,2}d", perl, "ad", match_default, make_array(0, 2, -1, -1, -2, -2));
+   TEST_REGEX_SEARCH("a(b|c){0,2}d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
+   TEST_REGEX_SEARCH("a(b|c){0,}d", perl, "ad", match_default, make_array(0, 2, -1, -1, -2, -2));
+   TEST_REGEX_SEARCH("a(b|c){0,}d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
+   TEST_REGEX_SEARCH("a(b|c){1,1}d", perl, "abd", match_default, make_array(0, 3, 1, 2, -2, -2));
+   TEST_REGEX_SEARCH("a(b|c){1,2}d", perl, "abd", match_default, make_array(0, 3, 1, 2, -2, -2));
+   TEST_REGEX_SEARCH("a(b|c){1,2}d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
+   TEST_REGEX_SEARCH("a(b|c){1,}d", perl, "abd", match_default, make_array(0, 3, 1, 2, -2, -2));
+   TEST_REGEX_SEARCH("a(b|c){1,}d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
+   TEST_REGEX_SEARCH("a(b|c){2,2}d", perl, "acbd", match_default, make_array(0, 4, 2, 3, -2, -2));
+   TEST_REGEX_SEARCH("a(b|c){2,2}d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
+   TEST_REGEX_SEARCH("a(b|c){2,4}d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
+   TEST_REGEX_SEARCH("a(b|c){2,4}d", perl, "abcbd", match_default, make_array(0, 5, 3, 4, -2, -2));
+   TEST_REGEX_SEARCH("a(b|c){2,4}d", perl, "abcbcd", match_default, make_array(0, 6, 4, 5, -2, -2));
+   TEST_REGEX_SEARCH("a(b|c){2,}d", perl, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
+   TEST_REGEX_SEARCH("a(b|c){2,}d", perl, "abcbd", match_default, make_array(0, 5, 3, 4, -2, -2));
+   // perl only:
+   TEST_REGEX_SEARCH("a(b|c?)+d", perl, "abcd", match_default, make_array(0, 4, 3, 3, -2, -2));
+   TEST_REGEX_SEARCH("a(b+|((c)*))+d", perl, "abd", match_default, make_array(0, 3, 2, 2, 2, 2, -1, -1, -2, -2));
+   TEST_REGEX_SEARCH("a(b+|((c)*))+d", perl, "abcd", match_default, make_array(0, 4, 3, 3, 3, 3, 2, 3, -2, -2));
+   // posix only:
+   TEST_REGEX_SEARCH("a(b|c?)+d", extended, "abcd", match_default, make_array(0, 4, 2, 3, -2, -2));
+   TEST_REGEX_SEARCH("a(b|((c)*))+d", extended, "abcd", match_default, make_array(0, 4, 2, 3, 2, 3, 2, 3, -2, -2));
+   TEST_REGEX_SEARCH("a(b+|((c)*))+d", extended, "abd", match_default, make_array(0, 3, 1, 2, -1, -1, -1, -1, -2, -2));
+   TEST_REGEX_SEARCH("a(b+|((c)*))+d", extended, "abcd", match_default, make_array(0, 4, 2, 3, 2, 3, 2, 3, -2, -2));
+   // literals:
+   TEST_REGEX_SEARCH("\\**?/{}", literal, "\\**?/{}", match_default, make_array(0, 7, -2, -2));
+   // try to match C++ syntax elements:
+   // line comment:
+   TEST_REGEX_SEARCH("//[^\\n]*", perl, "++i //here is a line comment\n", match_default, make_array(4, 28, -2, -2));
+   // block comment:
+   TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", perl, "/* here is a block comment */", match_default, make_array(0, 29, 26, 27, -2, -2));
+   TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", perl, "/**/", match_default, make_array(0, 4, -1, -1, -2, -2));
+   TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", perl, "/***/", match_default, make_array(0, 5, -1, -1, -2, -2));
+   TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", perl, "/****/", match_default, make_array(0, 6, -1, -1, -2, -2));
+   TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", perl, "/*****/", match_default, make_array(0, 7, -1, -1, -2, -2));
+   TEST_REGEX_SEARCH("/\\*([^*]|\\*+[^*/])*\\*+/", perl, "/*****/*/", match_default, make_array(0, 7, -1, -1, -2, -2));
+   // preprossor directives:
+   TEST_REGEX_SEARCH("^[[:blank:]]*#([^\\n]*\\\\[[:space:]]+)*[^\\n]*", perl, "#define some_symbol", match_default, make_array(0, 19, -1, -1, -2, -2));
+   TEST_REGEX_SEARCH("^[[:blank:]]*#([^\\n]*\\\\[[:space:]]+)*[^\\n]*", perl, "#define some_symbol(x) #x", match_default, make_array(0, 25, -1, -1, -2, -2));
+   // perl only:
+   TEST_REGEX_SEARCH("^[[:blank:]]*#([^\\n]*\\\\[[:space:]]+)*[^\\n]*", perl, "#define some_symbol(x) \\  \r\n  foo();\\\r\n   printf(#x);", match_default, make_array(0, 53, 30, 42, -2, -2));
+   // literals:
+   TEST_REGEX_SEARCH("((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?", perl, "0xFF", match_default, make_array(0, 4, 0, 4,	0, 4,	-1, -1, -1, -1, -1, -1, -1, -1, -2, -2));
+   TEST_REGEX_SEARCH("((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?", perl, "35", match_default, make_array(0, 2, 0, 2, -1, -1, 0, 2, -1, -1, -1, -1, -1, -1, -2, -2));
+   TEST_REGEX_SEARCH("((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?", perl, "0xFFu", match_default, make_array(0, 5, 0, 4, 0, 4, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2));
+   TEST_REGEX_SEARCH("((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?", perl, "0xFFL", match_default, make_array(0, 5, 0, 4, 0, 4, -1, -1, 4, 5, -1, -1, -1, -1, -2, -2));
+   TEST_REGEX_SEARCH("((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)?", perl, "0xFFFFFFFFFFFFFFFFuint64", match_default, make_array(0, 24,	0, 18, 0, 18, -1, -1, 19, 24, 19, 24, 22, 24, -2, -2));
+   // strings:
+   TEST_REGEX_SEARCH("'([^\\\\']|\\\\.)*'", perl, "'\\x3A'", match_default, make_array(0, 6, 4, 5, -2, -2));
+   TEST_REGEX_SEARCH("'([^\\\\']|\\\\.)*'", perl, "'\\''", match_default, make_array(0, 4, 1, 3, -2, -2));
+   TEST_REGEX_SEARCH("'([^\\\\']|\\\\.)*'", perl, "'\\n'", match_default, make_array(0, 4, 1, 3, -2, -2));
+   // posix only:
+   TEST_REGEX_SEARCH("^[[:blank:]]*#([^\\n]*\\\\[[:space:]]+)*[^\\n]*", awk, "#define some_symbol(x) \\  \r\n  foo();\\\r\n   printf(#x);", match_default, make_array(0, 53, 28, 42, -2, -2));
+   // now try and test some unicode specific characters:
+   TEST_REGEX_SEARCH_W(L"[[:unicode:]]+", perl, L"a\u0300\u0400z", match_default, make_array(1, 3, -2, -2));
+   TEST_REGEX_SEARCH_W(L"[\x10-\xff]", perl, L"\u0300\u0400", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH_W(L"[\01-\05]{5}", perl, L"\u0300\u0400\u0300\u0400\u0300\u0400", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH_W(L"[\x300-\x400]+", perl, L"\u0300\u0400\u0300\u0400\u0300\u0400", match_default, make_array(0, 6, -2, -2));
+   TEST_REGEX_SEARCH_W(L"[\\x{300}-\\x{400}]+", perl, L"\u0300\u0400\u0300\u0400\u0300\u0400", match_default, make_array(0, 6, -2, -2));
+   TEST_REGEX_SEARCH_W(L"\\x{300}\\x{400}+", perl, L"\u0300\u0400\u0400\u0400\u0400\u0400", match_default, make_array(0, 6, -2, -2));
+   // finally try some case insensitive matches:
+   TEST_REGEX_SEARCH("0123456789@abcdefghijklmnopqrstuvwxyz\\[\\\\\\]\\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\\{\\|\\}", perl|icase, "0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}", match_default, make_array(0, 72, -2, -2));   
+   TEST_REGEX_SEARCH("a", perl|icase, "A", match_default, make_array(0, 1, -2, -2));   
+   TEST_REGEX_SEARCH("A", perl|icase, "a", match_default, make_array(0, 1, -2, -2));   
+   TEST_REGEX_SEARCH("[abc]+", perl|icase, "abcABC", match_default, make_array(0, 6, -2, -2));   
+   TEST_REGEX_SEARCH("[ABC]+", perl|icase, "abcABC", match_default, make_array(0, 6, -2, -2));   
+   TEST_REGEX_SEARCH("[a-z]+", perl|icase, "abcABC", match_default, make_array(0, 6, -2, -2));   
+   TEST_REGEX_SEARCH("[A-Z]+", perl|icase, "abzANZ", match_default, make_array(0, 6, -2, -2));   
+   TEST_REGEX_SEARCH("[a-Z]+", perl|icase, "abzABZ", match_default, make_array(0, 6, -2, -2));   
+   TEST_REGEX_SEARCH("[A-z]+", perl|icase, "abzABZ", match_default, make_array(0, 6, -2, -2));   
+   TEST_REGEX_SEARCH("[[:lower:]]+", perl|icase, "abyzABYZ", match_default, make_array(0, 8, -2, -2));   
+   TEST_REGEX_SEARCH("[[:upper:]]+", perl|icase, "abzABZ", match_default, make_array(0, 6, -2, -2));   
+   TEST_REGEX_SEARCH("[[:word:]]+", perl|icase, "abcZZZ", match_default, make_array(0, 6, -2, -2));   
+   TEST_REGEX_SEARCH("[[:alpha:]]+", perl|icase, "abyzABYZ", match_default, make_array(0, 8, -2, -2));   
+   TEST_REGEX_SEARCH("[[:alnum:]]+", perl|icase, "09abyzABYZ", match_default, make_array(0, 10, -2, -2));   
+
+   // known and suspected bugs:
+   TEST_REGEX_SEARCH("\\(", perl, "(", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\)", perl, ")", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\$", perl, "$", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\^", perl, "^", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\.", perl, ".", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\*", perl, "*", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\+", perl, "+", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\?", perl, "?", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\[", perl, "[", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\]", perl, "]", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\|", perl, "|", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\\\", perl, "\\", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("#", perl, "#", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\#", perl, "#", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("a-", perl, "a-", match_default, make_array(0, 2, -2, -2));
+   TEST_REGEX_SEARCH("\\-", perl, "-", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\{", perl, "{", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\}", perl, "}", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("0", perl, "0", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("1", perl, "1", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("9", perl, "9", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("b", perl, "b", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("B", perl, "B", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("<", perl, "<", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH(">", perl, ">", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("w", perl, "w", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("W", perl, "W", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("`", perl, "`", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH(" ", perl, " ", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\n", perl, "\n", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH(",", perl, ",", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("a", perl, "a", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("f", perl, "f", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("n", perl, "n", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("r", perl, "r", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("t", perl, "t", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("v", perl, "v", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("c", perl, "c", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("x", perl, "x", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH(":", perl, ":", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("(\\.[[:alnum:]]+){2}", perl, "w.a.b ", match_default, make_array(1, 5, 3, 5, -2, -2));
 #if 0
-
-; now test the set operator []
- match_default normal REG_EXTENDED
-; try some literals first
-[abc] a 0 1
-[abc] b 0 1
-[abc] c 0 1
-[abc] d -1 -1
-[^bcd] a 0 1
-[^bcd] b -1 -1
-[^bcd] d -1 -1
-[^bcd] e 0 1
-a[b]c abc 0 3
-a[ab]c abc 0 3
-a[^ab]c adc 0 3
-a[]b]c a]c 0 3
-a[[b]c a[c 0 3
-a[-b]c a-c 0 3
-a[^]b]c adc 0 3
-a[^-b]c adc 0 3
-a[b-]c a-c 0 3
-a[b !
-a[] !
-
-; then some ranges
-[b-e] a -1 -1
-[b-e] b 0 1
-[b-e] e 0 1
-[b-e] f -1 -1
-[^b-e] a 0 1
-[^b-e] b -1 -1
-[^b-e] e -1 -1
-[^b-e] f 0 1
-a[1-3]c a2c 0 3
-a[3-1]c !
-a[1-3-5]c !
-a[1- !
-
-; and some classes
-a[[:alpha:]]c abc 0 3
-a[[:unknown:]]c !
-a[[: !
-a[[:alpha !
-a[[:alpha:] !
-a[[:alpha,:] !
-a[[:]:]]b !
-a[[:-:]]b !
-a[[:alph:]] !
-a[[:alphabet:]] !
-[[:alnum:]]+ -%@a0X_- 3 6
-[[:alpha:]]+ -%@aX_0- 3 5
-[[:blank:]]+ "a  \tb" 1 4
-[[:cntrl:]]+ a\n\tb 1 3
-[[:digit:]]+ a019b 1 4
-[[:graph:]]+ " a%b " 1 4
-[[:lower:]]+ AabC 1 3
-; This test fails with STLPort, disable for now as this is a corner case anyway...
-;[[:print:]]+ "\na b\n" 1 4
-[[:punct:]]+ " %-&\t" 1 4
-[[:space:]]+ "a \n\t\rb" 1 5
-[[:upper:]]+ aBCd 1 3
-[[:xdigit:]]+ p0f3Cx 1 5
-
-; now test flag settings:
- escape_in_lists REG_NO_POSIX_TEST
-[\n] \n 0 1
- REG_NO_POSIX_TEST
-[\n] \n -1 -1
-[\n] \\ 0 1
-[[:class:] : 0 1
-[[:class:] [ 0 1
-[[:class:] c 0 1
-
-; line anchors
- match_default normal REG_EXTENDED
-^ab ab 0 2
-^ab xxabxx -1 -1
-^ab xx\nabzz 3 5
-ab$ ab 0 2
-ab$ abxx -1 -1
-ab$ ab\nzz 0 2
- match_default match_not_bol match_not_eol normal REG_EXTENDED REG_NOTBOL REG_NOTEOL
-^ab ab -1 -1
-^ab xxabxx -1 -1
-^ab xx\nabzz 3 5
-ab$ ab -1 -1
-ab$ abxx -1 -1
-ab$ ab\nzz 0 2
-
-; line anchors, single line mode
- match_default normal match_single_line REG_NO_POSIX_TEST
-^ab ab 0 2
-^ab xxabxx -1 -1
-^ab xx\nabzz -1 -1
-ab$ ab 0 2
-ab$ abxx -1 -1
-ab$ ab\nzz -1 -1
- match_default match_not_bol match_not_eol normal REG_NO_POSIX_TEST match_single_line
-^ab ab -1 -1
-^ab xxabxx -1 -1
-^ab xx\nabzz -1 -1
-ab$ ab -1 -1
-ab$ abxx -1 -1
-ab$ ab\nzz -1 -1
-
-; back references
- match_default normal REG_PERL
-a(b)\2c	!
-a(b\1)c	!
-a(b*)c\1d abbcbbd 0 7 1 3
-a(b*)c\1d abbcbd -1 -1
-a(b*)c\1d abbcbbbd -1 -1
-^(.)\1 abc -1 -1
-a([bc])\1d abcdabbd	4 8 5 6
-; strictly speaking this is at best ambiguous, at worst wrong, this is what most
-; re implimentations will match though.
-a(([bc])\2)*d abbccd 0 6 3 5 3 4
-
-a(([bc])\2)*d abbcbd -1 -1
-a((b)*\2)*d abbbd 0 5 1 4 2 3
-; perl only:
-(ab*)[ab]*\1 ababaaa 0 4 0 2
-(a)\1bcd aabcd 0 5 0 1
-(a)\1bc*d aabcd 0 5 0 1
-(a)\1bc*d aabd 0 4 0 1
-(a)\1bc*d aabcccd 0 7 0 1
-(a)\1bc*[ce]d aabcccd 0 7 0 1
-^(a)\1b(c)*cd$ aabcccd 0 7 0 1 4 5
-
-; posix only: 
- match_default extended REG_EXTENDED
-(ab*)[ab]*\1 ababaaa 0 7 0 1
-
-;
-; characters by code:
- match_default normal REG_PERL REG_STARTEND
-\0101 A 0 1
-\00 \0 0 1
-\0 \0 0 1
-\0172 z 0 1
-
-;
-; word operators:
-\w a 0 1
-\w z 0 1
-\w A 0 1
-\w Z 0 1
-\w _ 0 1
-\w } -1 -1
-\w ` -1 -1
-\w [ -1 -1
-\w @ -1 -1
-; non-word:
-\W a -1 -1
-\W z -1 -1
-\W A -1 -1
-\W Z -1 -1
-\W _ -1 -1
-\W } 0 1
-\W ` 0 1
-\W [ 0 1
-\W @ 0 1
-; word start:
-\<abcd "  abcd" 2 6
-\<ab cab -1 -1
-\<ab "\nab" 1 3
-\<tag ::tag 2 5
-;word end:
-abc\> abc 0 3
-abc\> abcd -1 -1
-abc\> abc\n 0 3
-abc\> abc:: 0 3
-; word boundary:
-\babcd "  abcd" 2 6
-\bab cab -1 -1
-\bab "\nab" 1 3
-\btag ::tag 2 5
-abc\b abc 0 3
-abc\b abcd -1 -1
-abc\b abc\n 0 3
-abc\b abc:: 0 3
-; within word:
-\B ab 1 1
-a\Bb ab 0 2
-a\B ab 0 1
-a\B a -1 -1
-a\B "a " -1 -1
-
-;
-; buffer operators:
-\`abc abc 0 3
-\`abc \nabc -1 -1
-\`abc " abc" -1 -1
-abc\' abc 0 3
-abc\' abc\n -1 -1
-abc\' "abc " -1 -1
-
-;
-; extra escape sequences:
-\a \a 0 1
-\f \f 0 1
-\n \n 0 1
-\r \r 0 1
-\t \t 0 1
-\v \v 0 1
-
-
-;
-; now follows various complex expressions designed to try and bust the matcher:
-a(((b)))c abc 0 3 1 2 1 2 1 2
-a(b|(c))d abd 0 3 1 2 -1 -1
-a(b|(c))d acd 0 3 1 2 1 2
-a(b*|c)d abbd 0 4 1 3
-; just gotta have one DFA-buster, of course
-a[ab]{20} aaaaabaaaabaaaabaaaab 0 21
-; and an inline expansion in case somebody gets tricky
-a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] aaaaabaaaabaaaabaaaab 0 21
-; and in case somebody just slips in an NFA...
-a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) aaaaabaaaabaaaabaaaabweeknights 0 31 21 24 24 31
-; one really big one
-1234567890123456789012345678901234567890123456789012345678901234567890 a1234567890123456789012345678901234567890123456789012345678901234567890b 1 71
-; fish for problems as brackets go past 8
-[ab][cd][ef][gh][ij][kl][mn] xacegikmoq 1 8
-[ab][cd][ef][gh][ij][kl][mn][op] xacegikmoq 1 9
-[ab][cd][ef][gh][ij][kl][mn][op][qr] xacegikmoqy 1 10
-[ab][cd][ef][gh][ij][kl][mn][op][q] xacegikmoqy 1 10
-; and as parenthesis go past 9:
-(a)(b)(c)(d)(e)(f)(g)(h) zabcdefghi 1 9 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9
-(a)(b)(c)(d)(e)(f)(g)(h)(i) zabcdefghij 1 10 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10
-(a)(b)(c)(d)(e)(f)(g)(h)(i)(j) zabcdefghijk 1 11 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11
-(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k) zabcdefghijkl 1 12 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12
-(a)d|(b)c abc 1 3 -1 -1 1 2
-"_+((www)|(ftp)|(mailto)):_*" "_wwwnocolon _mailto:" 12 20 13 19 -1 -1 -1 -1 13 19
-
-; subtleties of matching
-a(b)?c\1d acd 0 3 -1 -1
-a(b?c)+d accd 0 4 2 3
-(wee|week)(knights|night) weeknights 0 10 0 3 3 10
-.* abc 0 3
-a(b|(c))d abd 0 3 1 2 -1 -1
-a(b|(c))d acd 0 3 1 2 1 2
-a(b*|c|e)d abbd 0 4 1 3
-a(b*|c|e)d acd 0 3 1 2
-a(b*|c|e)d ad 0 2 1 1
-a(b?)c abc 0 3 1 2
-a(b?)c ac 0 2 1 1
-a(b+)c abc 0 3 1 2
-a(b+)c abbbc 0 5 1 4 
-a(b*)c ac 0 2 1 1 
-(a|ab)(bc([de]+)f|cde) abcdef 0 6 0 1 1 6 3 5
-a([bc]?)c abc 0 3 1 2
-a([bc]?)c ac 0 2 1 1 
-a([bc]+)c abc 0 3 1 2
-a([bc]+)c abcc 0 4 1 3
-a([bc]+)bc abcbc 0 5 1 3
-a(bb+|b)b abb 0 3 1 2
-a(bbb+|bb+|b)b abb 0 3 1 2
-a(bbb+|bb+|b)b abbb 0 4 1 3
-a(bbb+|bb+|b)bb abbb 0 4 1 2
-(.*).* abcdef 0 6 0 6
-(a*)* bc 0 0 0 0
-xyx*xz xyxxxxyxxxz 5 11
-
-; do we get the right subexpression when it is used more than once?
-a(b|c)*d ad 0 2 -1 -1
-a(b|c)*d abcd 0 4 2 3
-a(b|c)+d abd 0 3 1 2
-a(b|c)+d abcd 0 4 2 3
-a(b|c?)+d ad 0 2 1 1
-a(b|c){0,0}d ad 0 2 -1 -1
-a(b|c){0,1}d ad 0 2 -1 -1
-a(b|c){0,1}d abd 0 3 1 2
-a(b|c){0,2}d ad 0 2 -1 -1
-a(b|c){0,2}d abcd 0 4 2 3
-a(b|c){0,}d ad 0 2 -1 -1
-a(b|c){0,}d abcd 0 4 2 3
-a(b|c){1,1}d abd 0 3 1 2
-a(b|c){1,2}d abd 0 3 1 2
-a(b|c){1,2}d abcd 0 4 2 3
-a(b|c){1,}d abd 0 3 1 2
-a(b|c){1,}d abcd 0 4 2 3
-a(b|c){2,2}d acbd 0 4 2 3
-a(b|c){2,2}d abcd 0 4 2 3
-a(b|c){2,4}d abcd 0 4 2 3
-a(b|c){2,4}d abcbd 0 5 3 4
-a(b|c){2,4}d abcbcd 0 6 4 5
-a(b|c){2,}d abcd 0 4 2 3
-a(b|c){2,}d abcbd 0 5 3 4
-; perl only:
-a(b|c?)+d abcd 0 4 3 3
-a(b+|((c)*))+d abd 0 3 2 2 2 2 -1 -1
-a(b+|((c)*))+d abcd 0 4 3 3 3 3 2 3
-
-; posix only:
- match_default extended REG_EXTENDED REG_STARTEND
-a(b|c?)+d abcd 0 4 2 3
-a(b|((c)*))+d abcd 0 4 2 3 2 3 2 3
-a(b+|((c)*))+d abd 0 3 1 2 -1 -1 -1 -1
-a(b+|((c)*))+d abcd 0 4 2 3 2 3 2 3
-
-
- match_default normal REG_EXTENDED REG_STARTEND REG_NOSPEC literal
-\**?/{} \\**?/{} 0 7
-
- match_default normal REG_PERL
-; try to match C++ syntax elements:
-; line comment:
-//[^\n]* "++i //here is a line comment\n" 4 28
-; block comment:
-/\*([^*]|\*+[^*/])*\*+/ "/* here is a block comment */" 0 29 26 27
-/\*([^*]|\*+[^*/])*\*+/ "/**/" 0 4 -1 -1
-/\*([^*]|\*+[^*/])*\*+/ "/***/" 0 5 -1 -1
-/\*([^*]|\*+[^*/])*\*+/ "/****/" 0 6 -1 -1
-/\*([^*]|\*+[^*/])*\*+/ "/*****/" 0 7 -1 -1
-/\*([^*]|\*+[^*/])*\*+/ "/*****/*/" 0 7 -1 -1
-; preprossor directives:
-^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol" 0 19 -1 -1
-^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) #x" 0 25 -1 -1
-; perl only:
-^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) \\  \r\n  foo();\\\r\n   printf(#x);" 0 53 30 42
-; literals:
-((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFF         						0 4		0 4		0 4 	-1 -1 	-1 -1 	-1 -1 	-1 -1
-((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 35 									0 2 	0 2		-1 -1 	0 2 	-1 -1 	-1 -1 	-1 -1
-((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFu 								0 5		0 4		0 4 	-1 -1 	-1 -1 	-1 -1 	-1 -1
-((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFL 								0 5		0 4		0 4 	-1 -1 	4 5 	-1 -1 	-1 -1
-((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFFFFFFFFFFFFFFFuint64 			0 24	0 18	0 18 	-1 -1 	19 24 	19 24 	22 24
-; strings:
-'([^\\']|\\.)*' '\\x3A' 0 6 4 5
-'([^\\']|\\.)*' '\\'' 0 4 1 3
-'([^\\']|\\.)*' '\\n' 0 4 1 3
-
-; posix only:
- match_default extended escape_in_lists REG_EXTENDED REG_NO_POSIX_TEST ; we disable POSIX testing because it can't handle escapes in sets
-^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) \\  \r\n  foo();\\\r\n   printf(#x);" 0 53 28 42
-
-
-; now try and test some unicode specific characters:
- match_default normal REG_PERL REG_UNICODE_ONLY
-[[:unicode:]]+  a\0300\0400z 1 3
-[\x10-\xff] \39135\12409 -1 -1
-[\01-\05]{5} \36865\36865\36865\36865\36865 -1 -1
-
-; finally try some case insensitive matches:
- match_default normal REG_EXTENDED REG_ICASE
-; upper and lower have no meaning here so they fail, however these
-; may compile with other libraries...
-;[[:lower:]] !
-;[[:upper:]] !
-0123456789@abcdefghijklmnopqrstuvwxyz\[\\\]\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\{\|\} 0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\} 0 72
-
-; known and suspected bugs:
- match_default normal REG_EXTENDED
-\( ( 0 1
-\) ) 0 1
-\$ $ 0 1
-\^ ^ 0 1
-\. . 0 1
-\* * 0 1
-\+ + 0 1
-\? ? 0 1
-\[ [ 0 1
-\] ] 0 1
-\| | 0 1
-\\ \\ 0 1
-# # 0 1
-\# # 0 1
-a- a- 0 2
-\- - 0 1
-\{ { 0 1
-\} } 0 1
-0 0 0 1
-1 1 0 1
-9 9 0 1
-b b 0 1
-B B 0 1
-< < 0 1
-> > 0 1
-w w 0 1
-W W 0 1
-` ` 0 1
-' ' 0 1
-\n \n 0 1
-, , 0 1
-a a 0 1
-f f 0 1
-n n 0 1
-r r 0 1
-t t 0 1
-v v 0 1
-c c 0 1
-x x 0 1
-: : 0 1
-(\.[[:alnum:]]+){2} "w.a.b " 1 5 3 5
-
- match_default normal REG_EXTENDED REG_ICASE
-a A 0 1
-A a 0 1
-[abc]+ abcABC 0 6
-[ABC]+ abcABC 0 6
-[a-z]+ abcABC 0 6
-[A-Z]+ abzANZ 0 6
-[a-Z]+ abzABZ 0 6
-[A-z]+ abzABZ 0 6
-[[:lower:]]+ abyzABYZ 0 8
-[[:upper:]]+ abzABZ 0 6
-[[:word:]]+ abcZZZ 0 6
-[[:alpha:]]+ abyzABYZ 0 8
-[[:alnum:]]+ 09abyzABYZ 0 10
-
-; updated tests for version 2:
- match_default normal REG_EXTENDED
-\x41 A 0 1
-\xff \255 0 1
-\xFF \255 0 1
- match_default normal REG_EXTENDED REG_NO_POSIX_TEST
-\c@ \0 0 1
- match_default normal REG_EXTENDED
-\cA \1 0 1
-\cz \58 0 1
-\c= !
-\c? !
-=: =: 0 2
-
-; word start:
-[[:<:]]abcd "  abcd" 2 6
-[[:<:]]ab cab -1 -1
-[[:<:]]ab "\nab" 1 3
-[[:<:]]tag ::tag 2 5
-;word end:
-abc[[:>:]] abc 0 3
-abc[[:>:]] abcd -1 -1
-abc[[:>:]] abc\n 0 3
-abc[[:>:]] abc:: 0 3
-
 ; collating elements and rewritten set code:
 - match_default normal REG_EXTENDED REG_STARTEND
 [[.zero.]] 0 0 1
--- a/test/regress/main.cpp
+++ b/test/regress/main.cpp
@ -7,6 +7,13 @@ int error_count = 0;
 int cpp_main(int argc, char * argv[])
 {
   basic_tests();
+   test_alt();
+   test_sets();
+   test_anchors();
+   test_backrefs();
+   test_character_escapes();
+   test_assertion_escapes();
+   test_tricky_cases();
   return error_count;
 }

@ -42,4 +49,5 @@ const int* make_array(int first, ...)
   }
   va_end(ap);
   return data;
-}
+}
+
--- a/test/regress/test.hpp
+++ b/test/regress/test.hpp
@ -85,6 +85,13 @@ const int* make_array(int first, ...);
 // define the test group proceedures:
 //
 void basic_tests();
+void test_alt();
+void test_sets();
+void test_anchors();
+void test_backrefs();
+void test_character_escapes();
+void test_assertion_escapes();
+void test_tricky_cases();


 #endif