Added support for Perl style \N \P and \p.

Completed first draft of Unicode UCS-4 support. Broken compiler compatibility fixes. Added unicode_iterators. [SVN r26185]
2004-11-11 17:04:17 +00:00
parent f141de61ec
commit d35e5a088e
17 changed files with 1051 additions and 46 deletions
@@ -42,6 +42,7 @@ SOURCES =
   cpp_regex_traits.cpp
   cregex.cpp
   fileiter.cpp
+   icu.cpp
   instances.cpp
   posix_api.cpp
   regex.cpp
@@ -179,7 +179,7 @@
 #  define BOOST_REGEX_DECL
 #endif

-#if (defined(BOOST_MSVC) || defined(__BORLANDC__)) && !defined(BOOST_REGEX_NO_LIB) && !defined(BOOST_REGEX_SOURCE) && !defined(BOOST_ALL_NO_LIB) && defined(__cplusplus)
+#if !defined(BOOST_REGEX_NO_LIB) && !defined(BOOST_REGEX_SOURCE) && !defined(BOOST_ALL_NO_LIB) && defined(__cplusplus)
 #  define BOOST_LIB_NAME boost_regex
 #  if defined(BOOST_REGEX_DYN_LINK) || defined(BOOST_ALL_DYN_LINK)
 #     define BOOST_DYN_LINK
@@ -177,6 +177,8 @@ private:
      offset_xdigit = U_CHAR_CATEGORY_COUNT+2,
      offset_underscore = U_CHAR_CATEGORY_COUNT+3,
      offset_unicode = U_CHAR_CATEGORY_COUNT+4,
+      offset_any = U_CHAR_CATEGORY_COUNT+5,
+      offset_ascii = U_CHAR_CATEGORY_COUNT+6,
   };

   //
@@ -187,6 +189,10 @@ private:
   static const char_class_type mask_xdigit;
   static const char_class_type mask_underscore;
   static const char_class_type mask_unicode;
+   static const char_class_type mask_any;
+   static const char_class_type mask_ascii;
+
+   static char_class_type lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2);

   boost::shared_ptr< ::boost::re_detail::icu_regex_traits_implementation> m_pimpl;
 };
@@ -49,35 +49,63 @@ inline bool is_surrogate(T v)
   return (v & 0xF800u) == 0xd800;
 }

+inline unsigned utf8_byte_count(boost::uint8_t c)
+{
+   // if the most significant bit with a zero in it is in position
+   // 8-N then there are N bytes in this UTF-8 sequence:
+   boost::uint8_t mask = 0x80u;
+   unsigned result = 0;
+   while(c & mask)
+   {
+      ++result;
+      mask >>= 1;
+   }
+   return (result == 0) ? 1 : result;
+}
+
+inline unsigned utf8_trailing_byte_count(boost::uint8_t c)
+{
+   return utf8_byte_count(c) - 1;
+}
+
 }

 template <class BaseIterator, class U16Type = ::boost::uint16_t>
 class u32_to_u16_iterator
   : public boost::iterator_facade<u32_to_u16_iterator<BaseIterator, U16Type>, U16Type, std::bidirectional_iterator_tag, const U16Type>
 {
-   typedef boost::iterator_facade<u32_to_u16_iterator<BaseIterator>, U16Type, std::bidirectional_iterator_tag, const U16Type> base_type;
+   typedef boost::iterator_facade<u32_to_u16_iterator<BaseIterator, U16Type>, U16Type, std::bidirectional_iterator_tag, const U16Type> base_type;
+
+#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
   typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type;

   BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 32);
   BOOST_STATIC_ASSERT(sizeof(U16Type)*CHAR_BIT == 16);
+#endif
+
 public:
   typename base_type::reference
      dereference()const
   {
      if(m_current == 2)
-         const_cast<u32_to_u16_iterator*>(this)->extract_current();
+         extract_current();
      return m_values[m_current];
   }
   bool equal(const u32_to_u16_iterator& that)const
   {
      if(m_position == that.m_position)
      {
+         // Both m_currents must be equal, or both even
+         // this is the same as saying their sum must be even:
+         return (m_current + that.m_current) & 1u ? false : true;
+            /*
         if((m_current >= 2) && (that.m_current < 2))
            const_cast<u32_to_u16_iterator*>(this)->extract_current();
         else if((m_current < 2) && (that.m_current >= 2))
            const_cast<u32_to_u16_iterator&>(that).extract_current();
         if(m_current == that.m_current)
            return true;
+            */
      }
      return false;
   }
@@ -127,7 +155,7 @@ public:
      m_values[2] = 0;
   }
 private:
-   void invalid_code_point(::boost::uint32_t val)
+   static void invalid_code_point(::boost::uint32_t val)
   {
 #ifndef BOOST_NO_STD_LOCALE
      std::stringstream ss;
@@ -139,15 +167,15 @@ private:
      boost::throw_exception(e);
   }

-   void extract_current()
+   void extract_current()const
   {
      // begin by checking for a code point out of range:
-      if(static_cast< ::boost::uint32_t>(*m_position) >= 0x10000u)
+      ::boost::uint32_t v = *m_position;
+      if(v >= 0x10000u)
      {
-         if(static_cast< ::boost::uint32_t>(*m_position) > 0x10FFFFu)
+         if(v > 0x10FFFFu)
            invalid_code_point(*m_position);
         // split into two surrogates:
-         base_value_type v = *m_position;
         m_values[0] = static_cast<U16Type>(v >> 10) + detail::high_surrogate_base;
         m_values[1] = static_cast<U16Type>(v & detail::ten_bit_mask) + detail::low_surrogate_base;
         m_current = 0;
@@ -166,29 +194,31 @@ private:
      }
   }
   BaseIterator m_position;
-   U16Type m_values[3];
-   unsigned m_current;
+   mutable U16Type m_values[3];
+   mutable unsigned m_current;
 };

 template <class BaseIterator, class U32Type = ::boost::uint32_t>
 class u16_to_u32_iterator
   : public boost::iterator_facade<u16_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type>
 {
-   typedef boost::iterator_facade<u16_to_u32_iterator<BaseIterator>, U32Type, std::bidirectional_iterator_tag, const U32Type> base_type;
+   typedef boost::iterator_facade<u16_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type> base_type;
   // special values for pending iterator reads:
   BOOST_STATIC_CONSTANT(::boost::uint32_t, pending_read = 0xffffffffu);

+#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
   typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type;

   BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 16);
   BOOST_STATIC_ASSERT(sizeof(U32Type)*CHAR_BIT == 32);
+#endif

 public:
   typename base_type::reference
      dereference()const
   {
      if(m_value == pending_read)
-         const_cast<u16_to_u32_iterator*>(this)->extract_current();
+         extract_current();
      return m_value;
   }
   bool equal(const u16_to_u32_iterator& that)const
@@ -223,7 +253,7 @@ public:
      m_value = pending_read;
   }
 private:
-   void invalid_code_point(::boost::uint16_t val)
+   static void invalid_code_point(::boost::uint16_t val)
   {
 #ifndef BOOST_NO_STD_LOCALE
      std::stringstream ss;
@@ -234,28 +264,254 @@ private:
 #endif
      boost::throw_exception(e);
   }
-   void extract_current()
+   void extract_current()const
   {
      m_value = static_cast<U32Type>(static_cast< ::boost::uint16_t>(*m_position));
      // if the last value is a high surrogate then adjust m_position and m_value as needed:
      if(detail::is_high_surrogate(*m_position))
      {
         // precondition; next value must have be a low-surrogate:
-         ::boost::uint16_t t = *++m_position;
-         if((*m_position & 0xFC00u) != 0xDC00u)
+         BaseIterator next(m_position);
+         ::boost::uint16_t t = *++next;
+         if((t & 0xFC00u) != 0xDC00u)
            invalid_code_point(t);
         m_value = (m_value - detail::high_surrogate_base) << 10;
         m_value |= (static_cast<U32Type>(static_cast< ::boost::uint16_t>(t)) & detail::ten_bit_mask);
-         --m_position;
      }
      // postcondition; result must not be a surrogate:
      if(detail::is_surrogate(m_value))
         invalid_code_point(static_cast< ::boost::uint16_t>(m_value));
   }
   BaseIterator m_position;
-   U32Type m_value;
+   mutable U32Type m_value;
 };

-}
+template <class BaseIterator, class U8Type = ::boost::uint8_t>
+class u32_to_u8_iterator
+   : public boost::iterator_facade<u32_to_u8_iterator<BaseIterator, U8Type>, U8Type, std::bidirectional_iterator_tag, const U8Type>
+{
+   typedef boost::iterator_facade<u32_to_u8_iterator<BaseIterator, U8Type>, U8Type, std::bidirectional_iterator_tag, const U8Type> base_type;
+   
+#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
+   typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type;
+
+   BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 32);
+   BOOST_STATIC_ASSERT(sizeof(U8Type)*CHAR_BIT == 8);
+#endif
+
+public:
+   typename base_type::reference
+      dereference()const
+   {
+      if(m_current == 4)
+         extract_current();
+      return m_values[m_current];
+   }
+   bool equal(const u32_to_u8_iterator& that)const
+   {
+      if(m_position == that.m_position)
+      {
+         // either the m_current's must be equal, or one must be 0 and 
+         // the other 4: which means neither must have bits 1 or 2 set:
+         return (m_current == that.m_current)
+            || (((m_current | that.m_current) & 3) == 0);
+      }
+      return false;
+   }
+   void increment()
+   {
+      // if we have a pending read then read now, so that we know whether
+      // to skip a position, or move to a low-surrogate:
+      if(m_current == 4)
+      {
+         // pending read:
+         extract_current();
+      }
+      // move to the next surrogate position:
+      ++m_current;
+      // if we've reached the end skip a position:
+      if(m_values[m_current] == 0)
+      {
+         m_current = 4;
+         ++m_position;
+      }
+   }
+   void decrement()
+   {
+      if((m_current & 3) == 0)
+      {
+         --m_position;
+         extract_current();
+         m_current = 3;
+         while(m_current && (m_values[m_current] == 0))
+            --m_current;
+      }
+      else
+         --m_current;
+   }
+   BaseIterator base()const
+   {
+      return m_position;
+   }
+   // construct:
+   u32_to_u8_iterator() : m_position(), m_current(0)
+   {
+      m_values[4] = 0;
+   }
+   u32_to_u8_iterator(BaseIterator b) : m_position(b), m_current(4)
+   {
+      m_values[4] = 0;
+   }
+private:
+   static void invalid_code_point(::boost::uint32_t val)
+   {
+#ifndef BOOST_NO_STD_LOCALE
+      std::stringstream ss;
+      ss << "Invalid UTF-32 code point U+" << std::showbase << std::hex << val << " encountered while trying to encode UTF-8 sequence";
+      std::out_of_range e(ss.str());
+#else
+      std::out_of_range e("Invalid UTF-32 code point encountered while trying to encode UTF-8 sequence");
+#endif
+      boost::throw_exception(e);
+   }
+
+   void extract_current()const
+   {
+      boost::uint32_t c = *m_position;
+      if(c > 0x10FFFFu)
+         invalid_code_point(c);
+      if(c < 0x80u)
+      {
+         m_values[0] = static_cast<unsigned char>(c);
+         m_values[1] = static_cast<unsigned char>(0u);
+         m_values[2] = static_cast<unsigned char>(0u);
+         m_values[3] = static_cast<unsigned char>(0u);
+      }
+      else if(c < 0x800u)
+      {
+         m_values[0] = static_cast<unsigned char>(0xC0u + (c >> 6));
+         m_values[1] = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
+         m_values[2] = static_cast<unsigned char>(0u);
+         m_values[3] = static_cast<unsigned char>(0u);
+      }
+      else if(c < 0x10000u)
+      {
+         m_values[0] = static_cast<unsigned char>(0xE0u + (c >> 12));
+         m_values[1] = static_cast<unsigned char>(0x80u + ((c >> 6) & 0x3Fu));
+         m_values[2] = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
+         m_values[3] = static_cast<unsigned char>(0u);
+      }
+      else
+      {
+         m_values[0] = static_cast<unsigned char>(0xF0u + (c >> 18));
+         m_values[1] = static_cast<unsigned char>(0x80u + ((c >> 12) & 0x3Fu));
+         m_values[2] = static_cast<unsigned char>(0x80u + ((c >> 6) & 0x3Fu));
+         m_values[3] = static_cast<unsigned char>(0x80u + (c & 0x3Fu));
+      }
+      m_current= 0;
+   }
+   BaseIterator m_position;
+   mutable U8Type m_values[5];
+   mutable unsigned m_current;
+};
+
+template <class BaseIterator, class U32Type = ::boost::uint32_t>
+class u8_to_u32_iterator
+   : public boost::iterator_facade<u8_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type>
+{
+   typedef boost::iterator_facade<u8_to_u32_iterator<BaseIterator, U32Type>, U32Type, std::bidirectional_iterator_tag, const U32Type> base_type;
+   // special values for pending iterator reads:
+   BOOST_STATIC_CONSTANT(::boost::uint32_t, pending_read = 0xffffffffu);
+
+#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
+   typedef typename std::iterator_traits<BaseIterator>::value_type base_value_type;
+
+   BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 8);
+   BOOST_STATIC_ASSERT(sizeof(U32Type)*CHAR_BIT == 32);
+#endif
+
+public:
+   typename base_type::reference
+      dereference()const
+   {
+      if(m_value == pending_read)
+         extract_current();
+      return m_value;
+   }
+   bool equal(const u8_to_u32_iterator& that)const
+   {
+      return m_position == that.m_position;
+   }
+   void increment()
+   {
+      // skip high surrogate first if there is one:
+      unsigned c = detail::utf8_byte_count(*m_position);
+      std::advance(m_position, c);
+      m_value = pending_read;
+   }
+   void decrement()
+   {
+      // Keep backtracking until we don't have a trailing character:
+      unsigned count = 0;
+      while((*--m_position & 0xC0u) == 0x80u) ++count;
+      // now check that the sequence was valid:
+      if(count != detail::utf8_trailing_byte_count(*m_position))
+         invalid_sequnce();
+      m_value = pending_read;
+   }
+   BaseIterator base()const
+   {
+      return m_position;
+   }
+   // construct:
+   u8_to_u32_iterator() : m_position()
+   {
+      m_value = pending_read;
+   }
+   u8_to_u32_iterator(BaseIterator b) : m_position(b)
+   {
+      m_value = pending_read;
+   }
+private:
+   static void invalid_sequnce()
+   {
+      std::out_of_range e("Invalid UTF-8 sequence encountered while trying to encode UTF-32 character");
+      boost::throw_exception(e);
+   }
+   void extract_current()const
+   {
+      m_value = static_cast<U32Type>(static_cast< ::boost::uint8_t>(*m_position));
+      // we must not have a continuation character:
+      if((m_value & 0xC0u) == 0x80u)
+         invalid_sequnce();
+      // see how many extra byts we have:
+      unsigned extra = detail::utf8_trailing_byte_count(*m_position);
+      // extract the extra bits, 6 from each extra byte:
+      BaseIterator next(m_position);
+      for(unsigned c = 0; c < extra; ++c)
+      {
+         ++next;
+         m_value <<= 6;
+         m_value += static_cast<boost::uint8_t>(*next) & 0x3Fu;
+      }
+      // we now need to remove a few of the leftmost bits, but how many depends
+      // upon how many extra bytes we've extracted:
+      static const boost::uint32_t masks[] = 
+      {
+         0x7Fu,
+         0x7FFu,
+         0xFFFFu,
+         0x1FFFFFu,
+      };
+      m_value &= masks[extra];
+      // check the result:
+      if(m_value > 0x10FFFFu)
+         invalid_sequnce();
+   }
+   BaseIterator m_position;
+   mutable U32Type m_value;
+};
+
+} // namespace boost

 #endif // BOOST_REGEX_UNICODE_ITERATOR_HPP
@@ -611,6 +611,52 @@ bool basic_regex_parser<charT, traits>::parse_extended_escape()
      ++m_position;
      this->append_state(syntax_element_restart_continue);
      break;
+   case regex_constants::escape_type_not_property:
+      negate = true;
+      // fall through:
+   case regex_constants::escape_type_property:
+      {
+         ++m_position;
+         char_class_type m;
+         if(m_position == m_end)
+         {
+            fail(regex_constants::error_escape, m_position - m_base);
+            return false;
+         }
+         // maybe have \p{ddd}
+         if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
+         {
+            const charT* base = m_position;
+            // skip forward until we find enclosing brace:
+            while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
+               ++m_position;
+            if(m_position == m_end)
+            {
+               fail(regex_constants::error_escape, m_position - m_base);
+               return false;
+            }
+            m = this->m_traits.lookup_classname(++base, m_position++);
+         }
+         else
+         {
+            m = this->m_traits.lookup_classname(m_position, m_position+1);
+            ++m_position;
+         }
+         if(m != 0)
+         {
+            basic_char_set<charT, traits> char_set;
+            if(negate)
+               char_set.negate();
+            char_set.add_class(m);
+            if(0 == this->append_set(char_set))
+            {
+               fail(regex_constants::error_ctype, m_position - m_base);
+               return false;
+            }
+            return true;
+         }
+         fail(regex_constants::error_ctype, m_position - m_base);
+      }
   default:
      this->append_literal(unescape_character());
      break;
@@ -948,6 +994,7 @@ bool basic_regex_parser<charT, traits>::parse_set()
               if(m != 0)
               {
                  char_set.add_class(m);
+                  ++m_position;
                  break;
               }
            }
@@ -1373,6 +1420,41 @@ charT basic_regex_parser<charT, traits>::unescape_character()
      }
      return static_cast<charT>(val);
      }
+   case regex_constants::escape_type_named_char:
+      {
+         ++m_position;
+         if(m_position == m_end)
+         {
+            fail(regex_constants::error_escape, m_position - m_base);
+            return false;
+         }
+         // maybe have \N{name}
+         if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
+         {
+            const charT* base = m_position;
+            // skip forward until we find enclosing brace:
+            while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
+               ++m_position;
+            if(m_position == m_end)
+            {
+               fail(regex_constants::error_escape, m_position - m_base);
+               return false;
+            }
+            string_type s = this->m_traits.lookup_collatename(++base, m_position++);
+            if(s.empty())
+            {
+               fail(regex_constants::error_collate, m_position - m_base);
+               return false;
+            }
+            if(s.size() == 1)
+            {
+               return s[0];
+            }
+         }
+         // fall through is a failure:
+         fail(regex_constants::error_escape, m_position - m_base);
+         return false;
+      }
   default:
      result = *m_position;
      break;
@@ -75,7 +75,7 @@ class basic_regex_formatter
 public:
   typedef typename traits::char_type char_type;
   basic_regex_formatter(OutputIterator o, const Results& r, const traits& t)
-      : m_traits(t), m_results(r), m_out(o), m_state(output_copy) {}
+      : m_traits(t), m_results(r), m_out(o), m_state(output_copy), m_have_conditional(false) {}
   OutputIterator format(const char_type* p1, const char_type* p2, match_flag_type f);
   OutputIterator format(const char_type* p1, match_flag_type f)
   {
@@ -108,6 +108,7 @@ private:
   const char_type* m_end;       // format string end
   match_flag_type m_flags;      // format flags to use
   output_state    m_state;      // what to do with the next character
+   bool            m_have_conditional; // we are parsing a conditional
 private:
   basic_regex_formatter(const basic_regex_formatter&);
   basic_regex_formatter& operator=(const basic_regex_formatter&);
@@ -147,7 +148,10 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_all()
         if(m_flags & boost::regex_constants::format_all)
         {
            ++m_position;
+            bool have_conditional = m_have_conditional;
+            m_have_conditional = false;
            format_until_scope_end();
+            m_have_conditional = have_conditional;
            if(m_position == m_end)
               return;
            BOOST_ASSERT(*m_position == static_cast<char_type>(')'));
@@ -158,7 +162,6 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_all()
         ++m_position;
         break;
      case ')':
-      case ':':
         if(m_flags & boost::regex_constants::format_all)
         {
            return;
@@ -166,6 +169,14 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_all()
         put(*m_position);
         ++m_position;
         break;
+      case ':':
+         if((m_flags & boost::regex_constants::format_all) && m_have_conditional)
+         {
+            return;
+         }
+         put(*m_position);
+         ++m_position;
+         break;
      case '?':
         if(m_flags & boost::regex_constants::format_all)
         {
@@ -405,7 +416,9 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_conditional(
   // output varies depending upon whether sub-expression v matched or not:
   if(m_results[v].matched)
   {
+      m_have_conditional = true;
      format_all();
+      m_have_conditional = false;
      if((m_position != m_end) && (*m_position == static_cast<char_type>(':')))
      {
         // skip the ':':
@@ -425,7 +438,9 @@ void basic_regex_formatter<OutputIterator, Results, traits>::format_conditional(
      output_state saved_state = m_state;
      m_state = output_none;
      // format until ':' or ')':
+      m_have_conditional = true;
      format_all();
+      m_have_conditional = false;
      // restore state:
      m_state = saved_state;
      if((m_position != m_end) && (*m_position == static_cast<char_type>(':')))
@@ -89,7 +89,11 @@ static const escape_syntax_type escape_type_C = 50;                            /
 static const escape_syntax_type escape_type_Z = 51;                            // for \Z
 static const escape_syntax_type escape_type_G = 52;                            // for \G

-static const escape_syntax_type syntax_max = 54;
+static const escape_syntax_type escape_type_property = 54;                     // for \p
+static const escape_syntax_type escape_type_not_property = 55;                 // for \P
+static const escape_syntax_type escape_type_named_char = 56;                   // for \N
+
+static const escape_syntax_type syntax_max = 57;

 }
 }
@@ -15,7 +15,10 @@
  *   VERSION      see <boost/version.hpp>
  *   DESCRIPTION: Unicode regular expressions on top of the ICU Library.
  */
+#define BOOST_REGEX_SOURCE

+#include <boost/regex/config.hpp>
+#ifdef BOOST_HAS_ICU
 #include <boost/regex/icu.hpp>

 namespace boost{
@@ -64,6 +67,264 @@ const icu_regex_traits::char_class_type icu_regex_traits::mask_space = icu_regex
 const icu_regex_traits::char_class_type icu_regex_traits::mask_xdigit = icu_regex_traits::char_class_type(1) << offset_xdigit;
 const icu_regex_traits::char_class_type icu_regex_traits::mask_underscore = icu_regex_traits::char_class_type(1) << offset_underscore;
 const icu_regex_traits::char_class_type icu_regex_traits::mask_unicode = icu_regex_traits::char_class_type(1) << offset_unicode;
+const icu_regex_traits::char_class_type icu_regex_traits::mask_any = icu_regex_traits::char_class_type(1) << offset_any;
+const icu_regex_traits::char_class_type icu_regex_traits::mask_ascii = icu_regex_traits::char_class_type(1) << offset_ascii;
+
+icu_regex_traits::char_class_type icu_regex_traits::lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2)
+{
+   static const ::UChar32 prop_name_table[] = {
+      /* any */  'a', 'n', 'y', 
+      /* ascii */  'a', 's', 'c', 'i', 'i', 
+      /* assigned */  'a', 's', 's', 'i', 'g', 'n', 'e', 'd', 
+      /* c* */  'c', '*', 
+      /* cc */  'c', 'c', 
+      /* cf */  'c', 'f', 
+      /* closepunctuation */  'c', 'l', 'o', 's', 'e', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', 
+      /* cn */  'c', 'n', 
+      /* co */  'c', 'o', 
+      /* connectorpunctuation */  'c', 'o', 'n', 'n', 'e', 'c', 't', 'o', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', 
+      /* control */  'c', 'o', 'n', 't', 'r', 'o', 'l', 
+      /* cs */  'c', 's', 
+      /* currencysymbol */  'c', 'u', 'r', 'r', 'e', 'n', 'c', 'y', 's', 'y', 'm', 'b', 'o', 'l', 
+      /* dashpunctuation */  'd', 'a', 's', 'h', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', 
+      /* decimaldigitnumber */  'd', 'e', 'c', 'i', 'm', 'a', 'l', 'd', 'i', 'g', 'i', 't', 'n', 'u', 'm', 'b', 'e', 'r', 
+      /* enclosingmark */  'e', 'n', 'c', 'l', 'o', 's', 'i', 'n', 'g', 'm', 'a', 'r', 'k', 
+      /* finalpunctuation */  'f', 'i', 'n', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', 
+      /* format */  'f', 'o', 'r', 'm', 'a', 't', 
+      /* initialpunctuation */  'i', 'n', 'i', 't', 'i', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', 
+      /* l* */  'l', '*', 
+      /* letter */  'l', 'e', 't', 't', 'e', 'r', 
+      /* letternumber */  'l', 'e', 't', 't', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r', 
+      /* lineseparator */  'l', 'i', 'n', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', 
+      /* ll */  'l', 'l', 
+      /* lm */  'l', 'm', 
+      /* lo */  'l', 'o', 
+      /* lowercaseletter */  'l', 'o', 'w', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r', 
+      /* lt */  'l', 't', 
+      /* lu */  'l', 'u', 
+      /* m* */  'm', '*', 
+      /* mark */  'm', 'a', 'r', 'k', 
+      /* mathsymbol */  'm', 'a', 't', 'h', 's', 'y', 'm', 'b', 'o', 'l', 
+      /* mc */  'm', 'c', 
+      /* me */  'm', 'e', 
+      /* mn */  'm', 'n', 
+      /* modifierletter */  'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r', 
+      /* modifiersymbol */  'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l', 
+      /* n* */  'n', '*', 
+      /* nd */  'n', 'd', 
+      /* nl */  'n', 'l', 
+      /* no */  'n', 'o', 
+      /* nonspacingmark */  'n', 'o', 'n', 's', 'p', 'a', 'c', 'i', 'n', 'g', 'm', 'a', 'r', 'k', 
+      /* notassigned */  'n', 'o', 't', 'a', 's', 's', 'i', 'g', 'n', 'e', 'd', 
+      /* number */  'n', 'u', 'm', 'b', 'e', 'r', 
+      /* openpunctuation */  'o', 'p', 'e', 'n', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', 
+      /* other */  'o', 't', 'h', 'e', 'r', 
+      /* otherletter */  'o', 't', 'h', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r', 
+      /* othernumber */  'o', 't', 'h', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r', 
+      /* otherpunctuation */  'o', 't', 'h', 'e', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', 
+      /* othersymbol */  'o', 't', 'h', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l', 
+      /* p* */  'p', '*', 
+      /* paragraphseparator */  'p', 'a', 'r', 'a', 'g', 'r', 'a', 'p', 'h', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', 
+      /* pc */  'p', 'c', 
+      /* pd */  'p', 'd', 
+      /* pe */  'p', 'e', 
+      /* pf */  'p', 'f', 
+      /* pi */  'p', 'i', 
+      /* po */  'p', 'o', 
+      /* privateuse */  'p', 'r', 'i', 'v', 'a', 't', 'e', 'u', 's', 'e', 
+      /* ps */  'p', 's', 
+      /* punctuation */  'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', 
+      /* s* */  's', '*', 
+      /* sc */  's', 'c', 
+      /* separator */  's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', 
+      /* sk */  's', 'k', 
+      /* sm */  's', 'm', 
+      /* so */  's', 'o', 
+      /* spaceseparator */  's', 'p', 'a', 'c', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', 
+      /* spacingcombiningmark */  's', 'p', 'a', 'c', 'i', 'n', 'g', 'c', 'o', 'm', 'b', 'i', 'n', 'i', 'n', 'g', 'm', 'a', 'r', 'k', 
+      /* surrogate */  's', 'u', 'r', 'r', 'o', 'g', 'a', 't', 'e', 
+      /* symbol */  's', 'y', 'm', 'b', 'o', 'l', 
+      /* titlecase */  't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e', 
+      /* titlecaseletter */  't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r', 
+      /* uppercaseletter */  'u', 'p', 'p', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r', 
+      /* z* */  'z', '*', 
+      /* zl */  'z', 'l', 
+      /* zp */  'z', 'p', 
+      /* zs */  'z', 's', 
+   };
+
+   static const re_detail::character_pointer_range<::UChar32> range_data[] = {
+      { prop_name_table+0, prop_name_table+3, }, // any
+      { prop_name_table+3, prop_name_table+8, }, // ascii
+      { prop_name_table+8, prop_name_table+16, }, // assigned
+      { prop_name_table+16, prop_name_table+18, }, // c*
+      { prop_name_table+18, prop_name_table+20, }, // cc
+      { prop_name_table+20, prop_name_table+22, }, // cf
+      { prop_name_table+22, prop_name_table+38, }, // closepunctuation
+      { prop_name_table+38, prop_name_table+40, }, // cn
+      { prop_name_table+40, prop_name_table+42, }, // co
+      { prop_name_table+42, prop_name_table+62, }, // connectorpunctuation
+      { prop_name_table+62, prop_name_table+69, }, // control
+      { prop_name_table+69, prop_name_table+71, }, // cs
+      { prop_name_table+71, prop_name_table+85, }, // currencysymbol
+      { prop_name_table+85, prop_name_table+100, }, // dashpunctuation
+      { prop_name_table+100, prop_name_table+118, }, // decimaldigitnumber
+      { prop_name_table+118, prop_name_table+131, }, // enclosingmark
+      { prop_name_table+131, prop_name_table+147, }, // finalpunctuation
+      { prop_name_table+147, prop_name_table+153, }, // format
+      { prop_name_table+153, prop_name_table+171, }, // initialpunctuation
+      { prop_name_table+171, prop_name_table+173, }, // l*
+      { prop_name_table+173, prop_name_table+179, }, // letter
+      { prop_name_table+179, prop_name_table+191, }, // letternumber
+      { prop_name_table+191, prop_name_table+204, }, // lineseparator
+      { prop_name_table+204, prop_name_table+206, }, // ll
+      { prop_name_table+206, prop_name_table+208, }, // lm
+      { prop_name_table+208, prop_name_table+210, }, // lo
+      { prop_name_table+210, prop_name_table+225, }, // lowercaseletter
+      { prop_name_table+225, prop_name_table+227, }, // lt
+      { prop_name_table+227, prop_name_table+229, }, // lu
+      { prop_name_table+229, prop_name_table+231, }, // m*
+      { prop_name_table+231, prop_name_table+235, }, // mark
+      { prop_name_table+235, prop_name_table+245, }, // mathsymbol
+      { prop_name_table+245, prop_name_table+247, }, // mc
+      { prop_name_table+247, prop_name_table+249, }, // me
+      { prop_name_table+249, prop_name_table+251, }, // mn
+      { prop_name_table+251, prop_name_table+265, }, // modifierletter
+      { prop_name_table+265, prop_name_table+279, }, // modifiersymbol
+      { prop_name_table+279, prop_name_table+281, }, // n*
+      { prop_name_table+281, prop_name_table+283, }, // nd
+      { prop_name_table+283, prop_name_table+285, }, // nl
+      { prop_name_table+285, prop_name_table+287, }, // no
+      { prop_name_table+287, prop_name_table+301, }, // nonspacingmark
+      { prop_name_table+301, prop_name_table+312, }, // notassigned
+      { prop_name_table+312, prop_name_table+318, }, // number
+      { prop_name_table+318, prop_name_table+333, }, // openpunctuation
+      { prop_name_table+333, prop_name_table+338, }, // other
+      { prop_name_table+338, prop_name_table+349, }, // otherletter
+      { prop_name_table+349, prop_name_table+360, }, // othernumber
+      { prop_name_table+360, prop_name_table+376, }, // otherpunctuation
+      { prop_name_table+376, prop_name_table+387, }, // othersymbol
+      { prop_name_table+387, prop_name_table+389, }, // p*
+      { prop_name_table+389, prop_name_table+407, }, // paragraphseparator
+      { prop_name_table+407, prop_name_table+409, }, // pc
+      { prop_name_table+409, prop_name_table+411, }, // pd
+      { prop_name_table+411, prop_name_table+413, }, // pe
+      { prop_name_table+413, prop_name_table+415, }, // pf
+      { prop_name_table+415, prop_name_table+417, }, // pi
+      { prop_name_table+417, prop_name_table+419, }, // po
+      { prop_name_table+419, prop_name_table+429, }, // privateuse
+      { prop_name_table+429, prop_name_table+431, }, // ps
+      { prop_name_table+431, prop_name_table+442, }, // punctuation
+      { prop_name_table+442, prop_name_table+444, }, // s*
+      { prop_name_table+444, prop_name_table+446, }, // sc
+      { prop_name_table+446, prop_name_table+455, }, // separator
+      { prop_name_table+455, prop_name_table+457, }, // sk
+      { prop_name_table+457, prop_name_table+459, }, // sm
+      { prop_name_table+459, prop_name_table+461, }, // so
+      { prop_name_table+461, prop_name_table+475, }, // spaceseparator
+      { prop_name_table+475, prop_name_table+495, }, // spacingcombiningmark
+      { prop_name_table+495, prop_name_table+504, }, // surrogate
+      { prop_name_table+504, prop_name_table+510, }, // symbol
+      { prop_name_table+510, prop_name_table+519, }, // titlecase
+      { prop_name_table+519, prop_name_table+534, }, // titlecaseletter
+      { prop_name_table+534, prop_name_table+549, }, // uppercaseletter
+      { prop_name_table+549, prop_name_table+551, }, // z*
+      { prop_name_table+551, prop_name_table+553, }, // zl
+      { prop_name_table+553, prop_name_table+555, }, // zp
+      { prop_name_table+555, prop_name_table+557, }, // zs
+   };
+
+   static const icu_regex_traits::char_class_type icu_class_map[] = {
+      icu_regex_traits::mask_any, // any
+      icu_regex_traits::mask_ascii, // ascii
+      (0x3FFFFFFFu) & ~(U_GC_CN_MASK), // assigned
+      U_GC_C_MASK, // c*
+      U_GC_CC_MASK, // cc
+      U_GC_CF_MASK, // cf
+      U_GC_PE_MASK, // closepunctuation
+      U_GC_CN_MASK, // cn
+      U_GC_CO_MASK, // co
+      U_GC_PC_MASK, // connectorpunctuation
+      U_GC_CC_MASK, // control
+      U_GC_CS_MASK, // cs
+      U_GC_SC_MASK, // currencysymbol
+      U_GC_PD_MASK, // dashpunctuation
+      U_GC_ND_MASK, // decimaldigitnumber
+      U_GC_ME_MASK, // enclosingmark
+      U_GC_PF_MASK, // finalpunctuation
+      U_GC_CF_MASK, // format
+      U_GC_PI_MASK, // initialpunctuation
+      U_GC_L_MASK, // l*
+      U_GC_L_MASK, // letter
+      U_GC_NL_MASK, // letternumber
+      U_GC_ZL_MASK, // lineseparator
+      U_GC_LL_MASK, // ll
+      U_GC_LM_MASK, // lm
+      U_GC_LO_MASK, // lo
+      U_GC_LL_MASK, // lowercaseletter
+      U_GC_LT_MASK, // lt
+      U_GC_LU_MASK, // lu
+      U_GC_M_MASK, // m*
+      U_GC_M_MASK, // mark
+      U_GC_SM_MASK, // mathsymbol
+      U_GC_MC_MASK, // mc
+      U_GC_ME_MASK, // me
+      U_GC_MN_MASK, // mn
+      U_GC_LM_MASK, // modifierletter
+      U_GC_SK_MASK, // modifiersymbol
+      U_GC_N_MASK, // n*
+      U_GC_ND_MASK, // nd
+      U_GC_NL_MASK, // nl
+      U_GC_NO_MASK, // no
+      U_GC_MN_MASK, // nonspacingmark
+      U_GC_CN_MASK, // notassigned
+      U_GC_N_MASK, // number
+      U_GC_PS_MASK, // openpunctuation
+      U_GC_C_MASK, // other
+      U_GC_LO_MASK, // otherletter
+      U_GC_NO_MASK, // othernumber
+      U_GC_PO_MASK, // otherpunctuation
+      U_GC_SO_MASK, // othersymbol
+      U_GC_P_MASK, // p*
+      U_GC_ZP_MASK, // paragraphseparator
+      U_GC_PC_MASK, // pc
+      U_GC_PD_MASK, // pd
+      U_GC_PE_MASK, // pe
+      U_GC_PF_MASK, // pf
+      U_GC_PI_MASK, // pi
+      U_GC_PO_MASK, // po
+      U_GC_CO_MASK, // privateuse
+      U_GC_PS_MASK, // ps
+      U_GC_P_MASK, // punctuation
+      U_GC_S_MASK, // s*
+      U_GC_SC_MASK, // sc
+      U_GC_Z_MASK, // separator
+      U_GC_SK_MASK, // sk
+      U_GC_SM_MASK, // sm
+      U_GC_SO_MASK, // so
+      U_GC_ZS_MASK, // spaceseparator
+      U_GC_MC_MASK, // spacingcombiningmark
+      U_GC_CS_MASK, // surrogate
+      U_GC_S_MASK, // symbol
+      U_GC_LT_MASK, // titlecase
+      U_GC_LT_MASK, // titlecaseletter
+      U_GC_LU_MASK, // uppercaseletter
+      U_GC_Z_MASK, // z*
+      U_GC_ZL_MASK, // zl
+      U_GC_ZP_MASK, // zp
+      U_GC_ZS_MASK, // zs
+   };
+
+
+   static const re_detail::character_pointer_range< ::UChar32>* ranges_begin = range_data;
+   static const re_detail::character_pointer_range< ::UChar32>* ranges_end = range_data + (sizeof(range_data)/sizeof(range_data[0]));
+   
+   re_detail::character_pointer_range< ::UChar32> t = { p1, p2, };
+   const re_detail::character_pointer_range< ::UChar32>* p = std::lower_bound(ranges_begin, ranges_end, t);
+   if((p != ranges_end) && (t == *p))
+      return icu_class_map[p - ranges_begin];
+   return 0;
+}

 icu_regex_traits::char_class_type icu_regex_traits::lookup_classname(const char_type* p1, const char_type* p2) const
 {
@@ -92,12 +353,33 @@ icu_regex_traits::char_class_type icu_regex_traits::lookup_classname(const char_
   };

   int id = ::boost::re_detail::get_default_class_id(p1, p2);
+   if(id >= 0)
+      return masks[id+1];
+   char_class_type result = lookup_icu_mask(p1, p2);
+   if(result != 0)
+      return result;
+
   if(id < 0)
   {
      string_type s(p1, p2);
-      for(string_type::size_type i = 0; i < s.size(); ++i)
+      string_type::size_type i = 0;
+      while(i < s.size())
+      {
         s[i] = static_cast<char>((::u_tolower)(s[i]));
+         if(::u_isspace(s[i]) || (s[i] == '-') || (s[i] == '_'))
+            s.erase(s.begin()+i, s.begin()+i+1);
+         else
+         {
+            s[i] = static_cast<char>((::u_tolower)(s[i]));
+            ++i;
+         }
+      }
      id = ::boost::re_detail::get_default_class_id(&*s.begin(), &*s.begin() + s.size());
+      if(id >= 0)
+         return masks[id+1];
+      result = lookup_icu_mask(&*s.begin(), &*s.begin() + s.size());
+      if(result != 0)
+         return result;
   }
   BOOST_ASSERT(id+1 < sizeof(masks) / sizeof(masks[0]));
   return masks[id+1];
@@ -109,6 +391,23 @@ icu_regex_traits::string_type icu_regex_traits::lookup_collatename(const char_ty
   if(std::find_if(p1, p2, std::bind2nd(std::greater< ::UChar32>(), 0x7f)) == p2)
   {
      std::string s(p1, p2);
+      // Try Unicode name:
+      UErrorCode err = U_ZERO_ERROR;
+      UChar32 c = ::u_charFromName(U_UNICODE_CHAR_NAME, s.c_str(), &err);
+      if(U_SUCCESS(err))
+      {
+         result.push_back(c);
+         return result;
+      }
+      // Try Unicode-extended name:
+      err = U_ZERO_ERROR;
+      c = ::u_charFromName(U_EXTENDED_CHAR_NAME, s.c_str(), &err);
+      if(U_SUCCESS(err))
+      {
+         result.push_back(c);
+         return result;
+      }
+      // try POSIX name:
      s = ::boost::re_detail::lookup_default_collate_name(s);
      result.assign(s.begin(), s.end());
   }
@@ -121,21 +420,26 @@ bool icu_regex_traits::isctype(char_type c, char_class_type f) const
 {
   // check for standard catagories first:
   char_class_type m = char_class_type(1u << u_charType(c));
-   if((m & f).any()) 
+   if((m & f) != 0) 
      return true;
   // now check for special cases:
-   if((f & mask_blank).any() && u_isblank(c))
+   if(((f & mask_blank) != 0) && u_isblank(c))
      return true;
-   if((f & mask_space).any() && u_isspace(c))
+   if(((f & mask_space) != 0) && u_isspace(c))
      return true;
-   if((f & mask_xdigit).any() && (u_digit(c, 16) >= 0))
+   if(((f & mask_xdigit) != 0) && (u_digit(c, 16) >= 0))
      return true;
-   if((f & mask_unicode).any() && (c >= 0x100))
+   if(((f & mask_unicode) != 0) && (c >= 0x100))
      return true;
-   if((f & mask_underscore).any() && (c == '_'))
+   if(((f & mask_underscore) != 0) && (c == '_'))
+      return true;
+   if(((f & mask_any) != 0) && (c <= 0x10FFFF))
+      return true;
+   if(((f & mask_ascii) != 0) && (c <= 0x7F))
      return true;
   return false;
 }

 }

+#endif // BOOST_HAS_ICU
@@ -96,7 +96,11 @@ BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_syntax(regex_constants
         "C",
         "Z",
         "G",
-         "!", };
+         "!",
+         "p",
+         "P",
+         "N",
+   };

   return ((n >= (sizeof(messages) / sizeof(messages[1]))) ? "" : messages[n]);
 }
@@ -374,9 +378,9 @@ BOOST_REGEX_DECL regex_constants::escape_syntax_type BOOST_REGEX_CALL get_defaul
      regex_constants::escape_type_not_class,        /*K*/
      regex_constants::escape_type_not_class,        /*L*/
      regex_constants::escape_type_not_class,        /*M*/
-      regex_constants::escape_type_not_class,        /*N*/
+      regex_constants::escape_type_named_char,       /*N*/
      regex_constants::escape_type_not_class,        /*O*/
-      regex_constants::escape_type_not_class,        /*P*/
+      regex_constants::escape_type_not_property,     /*P*/
      regex_constants::escape_type_Q,                /*Q*/
      regex_constants::escape_type_not_class,        /*R*/
      regex_constants::escape_type_not_class,        /*S*/
@@ -408,7 +412,7 @@ BOOST_REGEX_DECL regex_constants::escape_syntax_type BOOST_REGEX_CALL get_defaul
      regex_constants::escape_type_class,        /*m*/
      regex_constants::escape_type_control_n,       /*n*/
      regex_constants::escape_type_class,           /*o*/
-      regex_constants::escape_type_class,           /*p*/
+      regex_constants::escape_type_property,        /*p*/
      regex_constants::escape_type_class,           /*q*/
      regex_constants::escape_type_control_r,       /*r*/
      regex_constants::escape_type_class,           /*s*/
@@ -26,6 +26,7 @@ test_sets.cpp
 test_simple_repeats.cpp
 test_tricky_cases.cpp
 test_icu.cpp
+test_unicode.cpp
 test_overloads.cpp
 test_operators.cpp
 ;
@@ -120,6 +121,17 @@ test-suite regex
            <lib>../../test/build/boost_test_exec_monitor
      ]
      
+      [ run unicode/unicode_iterator_test.cpp ]
+      [ regex-test static_mutex_test
+          : <template>test                      # sources
+            static_mutex/static_mutex_test.cpp
+            <dll>../../thread/build/boost_thread
+      ]
+      [ regex-test object_cache_test
+          : <template>test                      # sources
+            object_cache/object_cache_test.cpp
+      ]
+      
      [ run config_info/regex_config_info.cpp <template>test 
      : : : <test-info>always_show_run_output ]

@@ -53,6 +53,7 @@ int cpp_main(int /*argc*/, char * /*argv*/[])
   test_emacs();
   test_operators();
   test_overloads();
+   test_unicode();
   return error_count;
 }

@@ -215,6 +215,7 @@ void test_en_locale();
 void test_emacs();
 void test_operators();
 void test_overloads();
+void test_unicode();

 //
 // template instances:
@@ -19,7 +19,8 @@
 //
 // We can only build this if we have ICU support:
 //
-#ifdef TEST_ICU
+#include <boost/regex/config.hpp>
+#ifdef BOOST_HAS_ICU

 #include <boost/regex/icu.hpp>
 #include "test.hpp"
@@ -27,7 +28,6 @@

 void test_icu(const wchar_t&, const test_regex_search_tag& )
 {
-   typedef boost::u16_to_u32_iterator<std::wstring::const_iterator, ::UChar32> conv_iterator;
   boost::u32regex r;
   if(*test_locale::c_str())
   {
@@ -37,26 +37,28 @@ void test_icu(const wchar_t&, const test_regex_search_tag& )
      r.imbue(l);
   }

-   const std::wstring& expression = test_info<wchar_t>::expression();
+   std::vector< ::UChar32> expression;
+   expression.assign(test_info<wchar_t>::expression().begin(), test_info<wchar_t>::expression().end());
   boost::regex_constants::syntax_option_type syntax_options = test_info<UChar32>::syntax_options();
   try{
-      r.assign(conv_iterator(expression.begin()), conv_iterator(expression.end()), syntax_options);
+      r.assign(expression.begin(), expression.end(), syntax_options);
      if(r.status())
      {
         BOOST_REGEX_TEST_ERROR("Expression did not compile when it should have done, error code = " << r.status(), UChar32);
      }
-      const std::wstring& search_text = test_info<wchar_t>::search_text();
+      std::vector< ::UChar32> search_text;
+      search_text.assign(test_info<wchar_t>::search_text().begin(), test_info<wchar_t>::search_text().end());
      boost::regex_constants::match_flag_type opts = test_info<wchar_t>::match_options();
      const int* answer_table = test_info<wchar_t>::answer_table();
-      boost::match_results<conv_iterator> what;
+      boost::match_results<std::vector< ::UChar32>::const_iterator> what;
      if(boost::regex_search(
-         conv_iterator(search_text.begin()),
-         conv_iterator(search_text.end()),
+         const_cast<std::vector< ::UChar32>const&>(search_text).begin(),
+         const_cast<std::vector< ::UChar32>const&>(search_text).end(),
         what,
         r,
         opts))
      {
-         test_result(what, conv_iterator(search_text.begin()), answer_table);
+         test_result(what, const_cast<std::vector< ::UChar32>const&>(search_text).begin(), answer_table);
      }
      else if(answer_table[0] >= 0)
      {
@@ -85,7 +87,8 @@ void test_icu(const wchar_t&, const test_regex_search_tag& )
 void test_icu(const wchar_t&, const test_invalid_regex_tag&)
 {
   typedef boost::u16_to_u32_iterator<std::wstring::const_iterator, ::UChar32> conv_iterator;
-   const std::wstring& expression = test_info<wchar_t>::expression();
+   std::vector< ::UChar32> expression;
+   expression.assign(test_info<wchar_t>::expression().begin(), test_info<wchar_t>::expression().end());
   boost::regex_constants::syntax_option_type syntax_options = test_info<wchar_t>::syntax_options();
   boost::u32regex r;
   if(*test_locale::c_str())
@@ -100,7 +103,7 @@ void test_icu(const wchar_t&, const test_invalid_regex_tag&)
   //
   try
   {
-      if(0 == r.assign(conv_iterator(expression.begin()), conv_iterator(expression.end()), syntax_options | boost::regex_constants::no_except).status())
+      if(0 == r.assign(expression.begin(), expression.end(), syntax_options | boost::regex_constants::no_except).status())
      {
         BOOST_REGEX_TEST_ERROR("Expression compiled when it should not have done so.", wchar_t);
      }
@@ -114,7 +117,7 @@ void test_icu(const wchar_t&, const test_invalid_regex_tag&)
   //
   bool have_catch = false;
   try{
-      r.assign(conv_iterator(expression.begin()), conv_iterator(expression.end()), syntax_options);
+      r.assign(expression.begin(), expression.end(), syntax_options);
 #ifdef BOOST_NO_EXCEPTIONS
      if(r.status())
         have_catch = true;
@@ -148,7 +151,8 @@ void test_icu(const wchar_t&, const test_invalid_regex_tag&)

 void test_icu(const wchar_t&, const test_regex_replace_tag&)
 {
-   const std::wstring& expression = test_info<wchar_t>::expression();
+   std::vector< ::UChar32> expression;
+   expression.assign(test_info<wchar_t>::expression().begin(), test_info<wchar_t>::expression().end());
   boost::regex_constants::syntax_option_type syntax_options = test_info<UChar32>::syntax_options();
   boost::u32regex r;
   try{
@@ -75,6 +75,8 @@ void test_replace()
   TEST_REGEX_REPLACE("(a+)(b+)", perl, " ...aabb,,", match_default|format_perl|format_no_copy, "\\0", "\0");
   TEST_REGEX_REPLACE("(a+)(b+)", perl, " ...aabb,,", match_default|format_perl|format_no_copy, "()?:", "()?:");
   TEST_REGEX_REPLACE("a+", perl, "...aaa,,", match_default|format_perl|format_no_copy, "\\0101", "A");
+   TEST_REGEX_REPLACE("(a+)(b+)", perl, " ...aabb,,", match_default|format_perl|format_no_copy, "\\1", "aa");
+   TEST_REGEX_REPLACE("(a+)(b+)", perl, " ...aabb,,", match_default|format_perl|format_no_copy, "\\2", "bb");

   // move to copying unmatched data:
   TEST_REGEX_REPLACE("a+", perl, "...aaa,,,", match_default|format_all, "bbb", "...bbb,,,");
@@ -101,5 +103,10 @@ void test_replace()
   TEST_REGEX_REPLACE("a+(b+)", perl, "...aaabb,,,", match_default|format_perl|format_no_copy, "(?1abc:def)", "(?1abc:def)");
   TEST_REGEX_REPLACE("a+(b+)", perl, "...", match_default|format_perl, "(?1abc:def)", "...");
   TEST_REGEX_REPLACE("a+(b+)", perl, "...", match_default|format_perl|format_no_copy, "(?1abc:def)", "");
+   // probe bug reports and other special cases:
+   TEST_REGEX_REPLACE("([^\\d]+).*", normal|icase, "tesd 999 test", match_default|format_all, "($1)replace", "tesd replace");
+   TEST_REGEX_REPLACE("(a)(b)", perl, "ab", match_default|format_all, "$1:$2", "a:b");
+   TEST_REGEX_REPLACE("(a(c)?)|(b)", perl, "acab", match_default|format_all, "(?1(?2(C:):A):B:)", "C:AB:");
+
 }

@@ -88,6 +88,7 @@ void test_sets()
   TEST_REGEX_SEARCH("[[:space:]]+", extended, "a \n\t\rb", match_default, make_array(1, 5, -2, -2));
   TEST_REGEX_SEARCH("[[:upper:]]+", extended, "aBCd", match_default, make_array(1, 3, -2, -2));
   TEST_REGEX_SEARCH("[[:xdigit:]]+", extended, "p0f3Cx", match_default, make_array(1, 5, -2, -2));
+   TEST_REGEX_SEARCH("[\\d]+", perl, "a019b", match_default, make_array(1, 4, -2, -2));

   //
   // escapes are supported in character classes if we have either
@@ -243,5 +244,66 @@ void test_sets2()
   TEST_REGEX_SEARCH("[\\s]+", perl, "AB   AB", match_default, make_array(2, 5, -2, -2));
   TEST_INVALID_REGEX("[\\S]", perl);
   TEST_REGEX_SEARCH("\\S+", perl, "  abc  ", match_default, make_array(2, 5, -2, -2));
+
+   // and some Perl style properties:
+   TEST_REGEX_SEARCH("\\pl+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
+   TEST_REGEX_SEARCH("\\Pl+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
+   TEST_REGEX_SEARCH("\\pu+", perl, "abABCab", match_default, make_array(2, 5, -2, -2));
+   TEST_REGEX_SEARCH("\\Pu+", perl, "ABabcAB", match_default, make_array(2, 5, -2, -2));
+   TEST_REGEX_SEARCH("\\pd+", perl, "AB012AB", match_default, make_array(2, 5, -2, -2));
+   TEST_REGEX_SEARCH("\\PD+", perl, "01abc01", match_default, make_array(2, 5, -2, -2));
+   TEST_REGEX_SEARCH("\\ps+", perl, "AB   AB", match_default, make_array(2, 5, -2, -2));
+   TEST_REGEX_SEARCH("\\PS+", perl, "  abc  ", match_default, make_array(2, 5, -2, -2));
+
+   TEST_REGEX_SEARCH("\\p{alnum}+", perl, "-%@a0X_-", match_default, make_array(3, 6, -2, -2));
+   TEST_REGEX_SEARCH("\\p{alpha}+", perl, " -%@aX_0-", match_default, make_array(4, 6, -2, -2));
+   TEST_REGEX_SEARCH("\\p{blank}+", perl, "a  \tb", match_default, make_array(1, 4, -2, -2));
+   TEST_REGEX_SEARCH("\\p{cntrl}+", perl, " a\n\tb", match_default, make_array(2, 4, -2, -2));
+   TEST_REGEX_SEARCH("\\p{digit}+", perl, "a019b", match_default, make_array(1, 4, -2, -2));
+   TEST_REGEX_SEARCH("\\p{graph}+", perl, " a%b ", match_default, make_array(1, 4, -2, -2));
+   TEST_REGEX_SEARCH("\\p{lower}+", perl, "AabC", match_default, make_array(1, 3, -2, -2));
+   TEST_REGEX_SEARCH("\\p{print}+", perl, "AabC", match_default, make_array(0, 4, -2, -2));
+   TEST_REGEX_SEARCH("\\p{punct}+", perl, " %-&\t", match_default, make_array(1, 4, -2, -2));
+   TEST_REGEX_SEARCH("\\p{space}+", perl, "a \n\t\rb", match_default, make_array(1, 5, -2, -2));
+   TEST_REGEX_SEARCH("\\p{upper}+", perl, "aBCd", match_default, make_array(1, 3, -2, -2));
+   TEST_REGEX_SEARCH("\\p{xdigit}+", perl, "p0f3Cx", match_default, make_array(1, 5, -2, -2));
+   TEST_REGEX_SEARCH("\\P{alnum}+", perl, "-%@a", match_default, make_array(0, 3, -2, -2));
+   TEST_REGEX_SEARCH("\\P{alpha}+", perl, " -%@a", match_default, make_array(0, 4, -2, -2));
+   TEST_REGEX_SEARCH("\\P{blank}+", perl, "a  ", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\P{cntrl}+", perl, " a\n", match_default, make_array(0, 2, -2, -2));
+   TEST_REGEX_SEARCH("\\P{digit}+", perl, "a0", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\P{graph}+", perl, " a", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\P{lower}+", perl, "Aa", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\P{print}+", perl, "Absc", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("\\P{punct}+", perl, " %", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\P{space}+", perl, "a ", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\P{upper}+", perl, "aB", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\P{xdigit}+", perl, "pf", match_default, make_array(0, 1, -2, -2));
+
+   TEST_INVALID_REGEX("\\p{invalid class}", perl);
+   TEST_INVALID_REGEX("\\p{upper", perl);
+   TEST_INVALID_REGEX("\\p{", perl);
+   TEST_INVALID_REGEX("\\p", perl);
+   TEST_INVALID_REGEX("\\P{invalid class}", perl);
+   TEST_INVALID_REGEX("\\P{upper", perl);
+   TEST_INVALID_REGEX("\\P{", perl);
+   TEST_INVALID_REGEX("\\P", perl);
+
+   // try named characters:
+   TEST_REGEX_SEARCH("\\N{zero}", perl, "0", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\N{one}", perl, "1", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\N{two}", perl, "2", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\N{three}", perl, "3", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\N{a}", perl, "bac", match_default, make_array(1, 2, -2, -2));
+   TEST_REGEX_SEARCH("\\N{\xf0}", perl, "b\xf0x", match_default, make_array(1, 2, -2, -2));
+   TEST_REGEX_SEARCH("\\N{right-curly-bracket}", perl, "}", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\N{NUL}", perl, "\0", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("[\\N{zero}-\\N{nine}]+", perl, " 0123456789 ", match_default, make_array(1, 11, -2, -2));
+
+   TEST_INVALID_REGEX("\\N", perl);
+   TEST_INVALID_REGEX("\\N{", perl);
+   TEST_INVALID_REGEX("\\N{}", perl);
+   TEST_INVALID_REGEX("\\N{invalid-name}", perl);
+   TEST_INVALID_REGEX("\\N{zero", perl);
 }

@@ -0,0 +1,147 @@
+/*
+ *
+ * Copyright (c) 2004
+ * Dr John Maddock
+ *
+ * Use, modification and distribution are subject to the 
+ * Boost Software License, Version 1.0. (See accompanying file 
+ * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+ *
+ */
+ 
+ /*
+  *   LOCATION:    see http://www.boost.org for most recent version.
+  *   FILE         test_unicode.hpp
+  *   VERSION      see <boost/version.hpp>
+  *   DESCRIPTION: Unicode specific tests (requires ICU).
+  */
+
+#include <boost/regex/config.hpp>
+#ifdef BOOST_HAS_ICU
+#include "test.hpp"
+
+#ifdef BOOST_MSVC
+#pragma warning(disable:4127)
+#endif
+
+#define TEST_REGEX_SEARCH_U(s, f, t, m, a)\
+   do{\
+      const wchar_t e[] = { s };\
+      std::wstring se(e, (sizeof(e) / sizeof(wchar_t)) - 1);\
+      const wchar_t st[] = { t };\
+      std::wstring sst(st, (sizeof(st) / sizeof(wchar_t)) - 1);\
+      test_info<wchar_t>::set_info(__FILE__, __LINE__, se, f, sst, m, a);\
+      test_icu(wchar_t(0), test_regex_search_tag());\
+   }while(0)
+
+#define TEST_REGEX_CLASS_U(classname, character)\
+   TEST_REGEX_SEARCH_U(\
+      BOOST_JOIN(L, \
+         BOOST_STRINGIZE(\
+            BOOST_JOIN([[:, BOOST_JOIN(classname, :]])))), \
+      perl, \
+      BOOST_JOIN(L, \
+         BOOST_STRINGIZE(\
+            BOOST_JOIN(\x, character))), \
+      match_default, \
+      make_array(0, 1, -2, -2))
+
+void test_unicode()
+{
+   using namespace boost::regex_constants;
+
+   TEST_REGEX_CLASS_U(L*, 3108);
+   TEST_REGEX_CLASS_U(Letter, 3108);
+   TEST_REGEX_CLASS_U(Lu, 2145);
+   TEST_REGEX_CLASS_U(Uppercase Letter, 2145);
+   TEST_REGEX_CLASS_U(Ll, 2146);
+   TEST_REGEX_CLASS_U(Lowercase Letter, 2146);
+   TEST_REGEX_CLASS_U(Lt, 1FFC);
+   TEST_REGEX_CLASS_U(Titlecase Letter, 1FFC);
+   TEST_REGEX_CLASS_U(Lm, 1D61);
+   TEST_REGEX_CLASS_U(Modifier Letter, 1D61);
+   TEST_REGEX_CLASS_U(Lo, 1974);
+   TEST_REGEX_CLASS_U(Other Letter, 1974);
+   TEST_REGEX_CLASS_U(M*, 20EA);
+   TEST_REGEX_CLASS_U(Mark, 20EA);
+   TEST_REGEX_CLASS_U(Mn, 20EA);
+   TEST_REGEX_CLASS_U(Non-Spacing Mark, 20EA);
+   TEST_REGEX_CLASS_U(Mc, 1938);
+   TEST_REGEX_CLASS_U(Spacing Combining Mark, 1938);
+   TEST_REGEX_CLASS_U(Me, 06DE);
+   TEST_REGEX_CLASS_U(Enclosing Mark, 06DE);
+   TEST_REGEX_CLASS_U(N*, 0669);
+   TEST_REGEX_CLASS_U(Number, 0669);
+   TEST_REGEX_CLASS_U(Nd, 0669);
+   TEST_REGEX_CLASS_U(Decimal Digit Number, 0669);
+   TEST_REGEX_CLASS_U(Nl, 303A);
+   TEST_REGEX_CLASS_U(Letter Number, 303A);
+   TEST_REGEX_CLASS_U(No, 2793);
+   TEST_REGEX_CLASS_U(Other Number, 2793);
+
+   TEST_REGEX_CLASS_U(S*, 2144);
+   TEST_REGEX_CLASS_U(Symbol, 2144);
+   TEST_REGEX_CLASS_U(Sm, 2144);
+   TEST_REGEX_CLASS_U(Math Symbol, 2144);
+   TEST_REGEX_CLASS_U(Sc, 20B1);
+   TEST_REGEX_CLASS_U(Currency Symbol, 20B1);
+   TEST_REGEX_CLASS_U(Sk, 1FFE);
+   TEST_REGEX_CLASS_U(Modifier Symbol, 1FFE);
+   TEST_REGEX_CLASS_U(So, 19FF);
+   TEST_REGEX_CLASS_U(Other Symbol, 19FF);
+
+   TEST_REGEX_CLASS_U(P*, 005F);
+   TEST_REGEX_CLASS_U(Punctuation, 005F);
+   TEST_REGEX_CLASS_U(Pc, 005F);
+   TEST_REGEX_CLASS_U(Connector Punctuation, 005F);
+   TEST_REGEX_CLASS_U(Pd, 002D);
+   TEST_REGEX_CLASS_U(Dash Punctuation, 002D);
+   TEST_REGEX_CLASS_U(Ps, 0028);
+   TEST_REGEX_CLASS_U(Open Punctuation, 0028);
+   TEST_REGEX_CLASS_U(Pe, FF63);
+   TEST_REGEX_CLASS_U(Close Punctuation, FF63);
+   TEST_REGEX_CLASS_U(Pi, 2039);
+   TEST_REGEX_CLASS_U(Initial Punctuation, 2039);
+   TEST_REGEX_CLASS_U(Pf, 203A);
+   TEST_REGEX_CLASS_U(Final Punctuation, 203A);
+   TEST_REGEX_CLASS_U(Po, 2038);
+   TEST_REGEX_CLASS_U(Other Punctuation, 2038);
+
+   TEST_REGEX_CLASS_U(Z*, 202F);
+   TEST_REGEX_CLASS_U(Separator, 202F);
+   TEST_REGEX_CLASS_U(Zs, 202F);
+   TEST_REGEX_CLASS_U(Space Separator, 202F);
+   TEST_REGEX_CLASS_U(Zl, 2028);
+   TEST_REGEX_CLASS_U(Line Separator, 2028);
+   TEST_REGEX_CLASS_U(Zp, 2029);
+   TEST_REGEX_CLASS_U(Paragraph Separator, 2029);
+   TEST_REGEX_CLASS_U(C*, 009F);
+   TEST_REGEX_CLASS_U(Other, 009F);
+   TEST_REGEX_CLASS_U(Cc, 009F);
+   TEST_REGEX_CLASS_U(Control, 009F);
+   TEST_REGEX_CLASS_U(Cf, FFFB);
+   TEST_REGEX_CLASS_U(Format, FFFB);
+   TEST_REGEX_CLASS_U(Cs, DC00);
+   TEST_REGEX_CLASS_U(Surrogate, DC00);
+   TEST_REGEX_CLASS_U(Co, F8FF);
+   TEST_REGEX_CLASS_U(Private Use, F8FF);
+   TEST_REGEX_CLASS_U(Cn, FFFF);
+   TEST_REGEX_CLASS_U(Not Assigned, FFFF);
+   TEST_REGEX_CLASS_U(Any, 2038);
+   TEST_REGEX_CLASS_U(Assigned, 2038);
+   TEST_REGEX_CLASS_U(ASCII, 7f);
+   TEST_REGEX_SEARCH_U(L"[[:Assigned:]]", perl, L"\xffff", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH_U(L"[[:ASCII:]]", perl, L"\x80", match_default, make_array(-2, -2));
+
+   TEST_REGEX_SEARCH_U(L"\\N{KHMER DIGIT SIX}", perl, L"\x17E6", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH_U(L"\\N{MODIFIER LETTER LOW ACUTE ACCENT}", perl, L"\x02CF", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH_U(L"\\N{SUPERSCRIPT ONE}", perl, L"\x00B9", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH_U(L"[\\N{KHMER DIGIT SIX}]", perl, L"\x17E6", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH_U(L"[\\N{MODIFIER LETTER LOW ACUTE ACCENT}]", perl, L"\x02CF", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH_U(L"[\\N{SUPERSCRIPT ONE}]", perl, L"\x00B9", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH_U(L"\\N{CJK UNIFIED IDEOGRAPH-7FED}", perl, L"\x7FED", match_default, make_array(0, 1, -2, -2));
+}
+
+#else
+void test_unicode(){}
+#endif
@@ -36,25 +36,70 @@ void spot_checks()
   BOOST_CHECK_EQUAL(*--it, 0xDF02u);
   BOOST_CHECK_EQUAL(*--it, 0xD800u);

+   ::boost::uint32_t spot8[] = { 0x004Du, 0x0430u, 0x4E8Cu, 0x10302u, };
+   typedef boost::u32_to_u8_iterator<const ::boost::uint32_t*> u32to8type;
+
+   u32to8type it8(spot8);
+   BOOST_CHECK_EQUAL(*it8++, 0x4Du);
+   BOOST_CHECK_EQUAL(*it8++, 0xD0u);
+   BOOST_CHECK_EQUAL(*it8++, 0xB0u);
+   BOOST_CHECK_EQUAL(*it8++, 0xE4u);
+   BOOST_CHECK_EQUAL(*it8++, 0xBAu);
+   BOOST_CHECK_EQUAL(*it8++, 0x8Cu);
+   BOOST_CHECK_EQUAL(*it8++, 0xF0u);
+   BOOST_CHECK_EQUAL(*it8++, 0x90u);
+   BOOST_CHECK_EQUAL(*it8++, 0x8Cu);
+   BOOST_CHECK_EQUAL(*it8++, 0x82u);
+
+   BOOST_CHECK_EQUAL(*--it8, 0x82u);
+   BOOST_CHECK_EQUAL(*--it8, 0x8Cu);
+   BOOST_CHECK_EQUAL(*--it8, 0x90u);
+   BOOST_CHECK_EQUAL(*--it8, 0xF0u);
+   BOOST_CHECK_EQUAL(*--it8, 0x8Cu);
+   BOOST_CHECK_EQUAL(*--it8, 0xBAu);
+   BOOST_CHECK_EQUAL(*--it8, 0xE4u);
+   BOOST_CHECK_EQUAL(*--it8, 0xB0u);
+   BOOST_CHECK_EQUAL(*--it8, 0xD0u);
+   BOOST_CHECK_EQUAL(*--it8, 0x4Du);
 }

 void test(const std::vector< ::boost::uint32_t>& v)
 {
   typedef std::vector< ::boost::uint32_t> vector32_type;
   typedef std::vector< ::boost::uint16_t> vector16_type;
+   typedef std::vector< ::boost::uint8_t>  vector8_type;
   typedef boost::u32_to_u16_iterator<vector32_type::const_iterator, ::boost::uint16_t> u32to16type;
   typedef boost::u16_to_u32_iterator<vector16_type::const_iterator, ::boost::uint32_t> u16to32type;
+#if !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
   typedef std::reverse_iterator<u32to16type> ru32to16type;
   typedef std::reverse_iterator<u16to32type> ru16to32type;
+#endif
+   typedef boost::u32_to_u8_iterator<vector32_type::const_iterator, ::boost::uint8_t> u32to8type;
+   typedef boost::u8_to_u32_iterator<vector8_type::const_iterator, ::boost::uint32_t> u8to32type;
+#if !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
+   typedef std::reverse_iterator<u32to8type> ru32to8type;
+   typedef std::reverse_iterator<u8to32type> ru8to32type;
+#endif
+   vector8_type  v8;
   vector16_type v16;
   vector32_type v32;
   vector32_type::const_iterator i, j, k;
   //
   // begin by testing forward iteration, of 32-16 bit interconversions:
   //
+#if !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
   v16.assign(u32to16type(v.begin()), u32to16type(v.end()));
+#else
+   v16.clear();
+   std::copy(u32to16type(v.begin()), u32to16type(v.end()), std::back_inserter(v16));
+#endif
   BOOST_CHECK_EQUAL(std::distance(u32to16type(v.begin()), u32to16type(v.end())), v16.size());
+#if !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
   v32.assign(u16to32type(v16.begin()), u16to32type(v16.end()));
+#else
+   v32.clear();
+   std::copy(u16to32type(v16.begin()), u16to32type(v16.end()), std::back_inserter(v32));
+#endif
   BOOST_CHECK_EQUAL(std::distance(u16to32type(v16.begin()), u16to32type(v16.end())), v32.size());
   BOOST_CHECK_EQUAL(v.size(), v32.size());
   i = v.begin();
@@ -68,6 +113,7 @@ void test(const std::vector< ::boost::uint32_t>& v)
   //
   // test backward iteration, of 32-16 bit interconversions:
   //
+#if !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
   v16.assign(ru32to16type(u32to16type(v.end())), ru32to16type(u32to16type(v.begin())));
   BOOST_CHECK_EQUAL(std::distance(ru32to16type(u32to16type(v.end())), ru32to16type(u32to16type(v.begin()))), v16.size());
   std::reverse(v16.begin(), v16.end());
@@ -83,6 +129,53 @@ void test(const std::vector< ::boost::uint32_t>& v)
      i, 
      j, 
      k);
+#endif
+   //
+   // Test forward iteration, of 32-8 bit interconversions:
+   //
+#if !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
+   v8.assign(u32to8type(v.begin()), u32to8type(v.end()));
+#else
+   v8.clear();
+   std::copy(u32to8type(v.begin()), u32to8type(v.end()), std::back_inserter(v8));
+#endif
+   BOOST_CHECK_EQUAL(std::distance(u32to8type(v.begin()), u32to8type(v.end())), v8.size());
+#if !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
+   v32.assign(u8to32type(v8.begin()), u8to32type(v8.end()));
+#else
+   v32.clear();
+   std::copy(u8to32type(v8.begin()), u8to32type(v8.end()), std::back_inserter(v32));
+#endif
+   BOOST_CHECK_EQUAL(std::distance(u8to32type(v8.begin()), u8to32type(v8.end())), v32.size());
+   BOOST_CHECK_EQUAL(v.size(), v32.size());
+   i = v.begin();
+   j = i;
+   std::advance(j, (std::min)(v.size(), v32.size()));
+   k = v32.begin();
+   BOOST_CHECK_EQUAL_COLLECTIONS(
+      i, 
+      j, 
+      k);
+   //
+   // test backward iteration, of 32-8 bit interconversions:
+   //
+#if !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
+   v8.assign(ru32to8type(u32to8type(v.end())), ru32to8type(u32to8type(v.begin())));
+   BOOST_CHECK_EQUAL(std::distance(ru32to8type(u32to8type(v.end())), ru32to8type(u32to8type(v.begin()))), v8.size());
+   std::reverse(v8.begin(), v8.end());
+   v32.assign(ru8to32type(u8to32type(v8.end())), ru8to32type(u8to32type(v8.begin())));
+   BOOST_CHECK_EQUAL(std::distance(ru8to32type(u8to32type(v8.end())), ru8to32type(u8to32type(v8.begin()))), v32.size());
+   BOOST_CHECK_EQUAL(v.size(), v32.size());
+   std::reverse(v32.begin(), v32.end());
+   i = v.begin();
+   j = i;
+   std::advance(j, (std::min)(v.size(), v32.size()));
+   k = v32.begin();
+   BOOST_CHECK_EQUAL_COLLECTIONS(
+      i, 
+      j, 
+      k);
+#endif
 }

 int test_main( int, char* [] ) 
@@ -98,6 +191,12 @@ int test_main( int, char* [] )
   v.push_back(0xFFFF);
   v.push_back(0x10000);
   v.push_back(0x10FFFF);
+   v.push_back(0x80u);
+   v.push_back(0x80u - 1);
+   v.push_back(0x800u);
+   v.push_back(0x800u - 1);
+   v.push_back(0x10000u);
+   v.push_back(0x10000u - 1);
   test(v);
   return 0;
 }