Make icu.hpp header only.

2025-07-16 13:52:17 +02:00 · 2020-11-28 18:28:29 +00:00
parent c9d389014a
commit 9d64cf60ff
7 changed files with 444 additions and 605 deletions
--- a/build/Jamfile.v2
+++ b/build/Jamfile.v2
@ -122,7 +122,6 @@ explicit has_icu ;
 alias icu_options : : : : [ check-target-builds has_icu : $(ICU_OPTS) : ] ;

 SOURCES = 
-   icu.cpp
   posix_api.cpp
   regex.cpp
   regex_debug.cpp
--- a/include/boost/regex/icu.hpp
+++ b/include/boost/regex/icu.hpp
@ -67,7 +67,33 @@ public:
   {
      return m_locale;
   }
-   string_type do_transform(const char_type* p1, const char_type* p2, const U_NAMESPACE_QUALIFIER Collator* pcoll) const;
+   string_type do_transform(const char_type* p1, const char_type* p2, const U_NAMESPACE_QUALIFIER Collator* pcoll) const
+   {
+      // TODO make thread safe!!!! :
+      typedef u32_to_u16_iterator<const char_type*, ::UChar> itt;
+      itt i(p1), j(p2);
+      std::vector< ::UChar> t(i, j);
+      ::uint8_t result[100];
+      ::int32_t len;
+      if (!t.empty())
+         len = pcoll->getSortKey(&*t.begin(), static_cast<::int32_t>(t.size()), result, sizeof(result));
+      else
+         len = pcoll->getSortKey(static_cast<UChar const*>(0), static_cast<::int32_t>(0), result, sizeof(result));
+      if (std::size_t(len) > sizeof(result))
+      {
+         scoped_array< ::uint8_t> presult(new ::uint8_t[len + 1]);
+         if (!t.empty())
+            len = pcoll->getSortKey(&*t.begin(), static_cast<::int32_t>(t.size()), presult.get(), len + 1);
+         else
+            len = pcoll->getSortKey(static_cast<UChar const*>(0), static_cast<::int32_t>(0), presult.get(), len + 1);
+         if ((0 == presult[len - 1]) && (len > 1))
+            --len;
+         return string_type(presult.get(), presult.get() + len);
+      }
+      if ((0 == result[len - 1]) && (len > 1))
+         --len;
+      return string_type(result, result + len);
+   }
   string_type transform(const char_type* p1, const char_type* p2) const
   {
      return do_transform(p1, p2, m_collator.get());
@ -101,11 +127,7 @@ public:
   typedef std::size_t                  size_type;
   typedef std::vector<char_type>       string_type;
   typedef U_NAMESPACE_QUALIFIER Locale locale_type;
-#ifdef BOOST_NO_INT64_T
-   typedef std::bitset<64>              char_class_type;
-#else
   typedef boost::uint64_t              char_class_type;
-#endif

   struct boost_extensions_tag{};

@ -113,7 +135,16 @@ public:
      : m_pimpl(BOOST_REGEX_DETAIL_NS::get_icu_regex_traits_implementation(U_NAMESPACE_QUALIFIER Locale()))
   {
   }
-   static size_type length(const char_type* p);
+   static size_type length(const char_type* p)
+   {
+      size_type result = 0;
+      while (*p)
+      {
+         ++p;
+         ++result;
+      }
+      return result;
+   }

   ::boost::regex_constants::syntax_type syntax_type(char_type c)const
   {
@ -151,9 +182,149 @@ public:
   {
      return m_pimpl->transform_primary(p1, p2);
   }
-   char_class_type lookup_classname(const char_type* p1, const char_type* p2) const;
-   string_type lookup_collatename(const char_type* p1, const char_type* p2) const;
-   bool isctype(char_type c, char_class_type f) const;
+   char_class_type lookup_classname(const char_type* p1, const char_type* p2) const
+   {
+      constexpr char_class_type mask_blank = char_class_type(1) << offset_blank;
+      constexpr char_class_type mask_space = char_class_type(1) << offset_space;
+      constexpr char_class_type mask_xdigit = char_class_type(1) << offset_xdigit;
+      constexpr char_class_type mask_underscore = char_class_type(1) << offset_underscore;
+      constexpr char_class_type mask_unicode = char_class_type(1) << offset_unicode;
+      constexpr char_class_type mask_any = char_class_type(1) << offset_any;
+      constexpr char_class_type mask_ascii = char_class_type(1) << offset_ascii;
+      constexpr char_class_type mask_horizontal = char_class_type(1) << offset_horizontal;
+      constexpr char_class_type mask_vertical = char_class_type(1) << offset_vertical;
+
+      static const char_class_type masks[] =
+      {
+         0,
+         U_GC_L_MASK | U_GC_ND_MASK,
+         U_GC_L_MASK,
+         mask_blank,
+         U_GC_CC_MASK | U_GC_CF_MASK | U_GC_ZL_MASK | U_GC_ZP_MASK,
+         U_GC_ND_MASK,
+         U_GC_ND_MASK,
+         (0x3FFFFFFFu) & ~(U_GC_CC_MASK | U_GC_CF_MASK | U_GC_CS_MASK | U_GC_CN_MASK | U_GC_Z_MASK),
+         mask_horizontal,
+         U_GC_LL_MASK,
+         U_GC_LL_MASK,
+         ~(U_GC_C_MASK),
+         U_GC_P_MASK,
+         char_class_type(U_GC_Z_MASK) | mask_space,
+         char_class_type(U_GC_Z_MASK) | mask_space,
+         U_GC_LU_MASK,
+         mask_unicode,
+         U_GC_LU_MASK,
+         mask_vertical,
+         char_class_type(U_GC_L_MASK | U_GC_ND_MASK | U_GC_MN_MASK) | mask_underscore,
+         char_class_type(U_GC_L_MASK | U_GC_ND_MASK | U_GC_MN_MASK) | mask_underscore,
+         char_class_type(U_GC_ND_MASK) | mask_xdigit,
+      };
+
+      int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2);
+      if (idx >= 0)
+         return masks[idx + 1];
+      char_class_type result = lookup_icu_mask(p1, p2);
+      if (result != 0)
+         return result;
+
+      if (idx < 0)
+      {
+         string_type s(p1, p2);
+         string_type::size_type i = 0;
+         while (i < s.size())
+         {
+            s[i] = static_cast<char>((::u_tolower)(s[i]));
+            if (::u_isspace(s[i]) || (s[i] == '-') || (s[i] == '_'))
+               s.erase(s.begin() + i, s.begin() + i + 1);
+            else
+            {
+               s[i] = static_cast<char>((::u_tolower)(s[i]));
+               ++i;
+            }
+         }
+         if (!s.empty())
+            idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size());
+         if (idx >= 0)
+            return masks[idx + 1];
+         if (!s.empty())
+            result = lookup_icu_mask(&*s.begin(), &*s.begin() + s.size());
+         if (result != 0)
+            return result;
+      }
+      BOOST_ASSERT(std::size_t(idx + 1) < sizeof(masks) / sizeof(masks[0]));
+      return masks[idx + 1];
+   }
+   string_type lookup_collatename(const char_type* p1, const char_type* p2) const
+   {
+      string_type result;
+#ifdef BOOST_NO_CXX98_BINDERS
+      if (std::find_if(p1, p2, std::bind(std::greater< ::UChar32>(), std::placeholders::_1, 0x7f)) == p2)
+#else
+      if (std::find_if(p1, p2, std::bind2nd(std::greater< ::UChar32>(), 0x7f)) == p2)
+#endif
+      {
+         std::string s(p1, p2);
+         // Try Unicode name:
+         UErrorCode err = U_ZERO_ERROR;
+         UChar32 c = ::u_charFromName(U_UNICODE_CHAR_NAME, s.c_str(), &err);
+         if (U_SUCCESS(err))
+         {
+            result.push_back(c);
+            return result;
+         }
+         // Try Unicode-extended name:
+         err = U_ZERO_ERROR;
+         c = ::u_charFromName(U_EXTENDED_CHAR_NAME, s.c_str(), &err);
+         if (U_SUCCESS(err))
+         {
+            result.push_back(c);
+            return result;
+         }
+         // try POSIX name:
+         s = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(s);
+         result.assign(s.begin(), s.end());
+      }
+      if (result.empty() && (p2 - p1 == 1))
+         result.push_back(*p1);
+      return result;
+   }
+   bool isctype(char_type c, char_class_type f) const
+   {
+      constexpr char_class_type mask_blank = char_class_type(1) << offset_blank;
+      constexpr char_class_type mask_space = char_class_type(1) << offset_space;
+      constexpr char_class_type mask_xdigit = char_class_type(1) << offset_xdigit;
+      constexpr char_class_type mask_underscore = char_class_type(1) << offset_underscore;
+      constexpr char_class_type mask_unicode = char_class_type(1) << offset_unicode;
+      constexpr char_class_type mask_any = char_class_type(1) << offset_any;
+      constexpr char_class_type mask_ascii = char_class_type(1) << offset_ascii;
+      constexpr char_class_type mask_horizontal = char_class_type(1) << offset_horizontal;
+      constexpr char_class_type mask_vertical = char_class_type(1) << offset_vertical;
+
+      // check for standard catagories first:
+      char_class_type m = char_class_type(static_cast<char_class_type>(1) << u_charType(c));
+      if ((m & f) != 0)
+         return true;
+      // now check for special cases:
+      if (((f & mask_blank) != 0) && u_isblank(c))
+         return true;
+      if (((f & mask_space) != 0) && u_isspace(c))
+         return true;
+      if (((f & mask_xdigit) != 0) && (u_digit(c, 16) >= 0))
+         return true;
+      if (((f & mask_unicode) != 0) && (c >= 0x100))
+         return true;
+      if (((f & mask_underscore) != 0) && (c == '_'))
+         return true;
+      if (((f & mask_any) != 0) && (c <= 0x10FFFF))
+         return true;
+      if (((f & mask_ascii) != 0) && (c <= 0x7F))
+         return true;
+      if (((f & mask_vertical) != 0) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == static_cast<char_type>('\v')) || (m == U_GC_ZL_MASK) || (m == U_GC_ZP_MASK)))
+         return true;
+      if (((f & mask_horizontal) != 0) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && u_isspace(c) && (c != static_cast<char_type>('\v')))
+         return true;
+      return false;
+   }
   boost::intmax_t toi(const char_type*& p1, const char_type* p2, int radix)const
   {
      return BOOST_REGEX_DETAIL_NS::global_toi(p1, p2, radix, *this);
@ -195,20 +366,271 @@ private:
      offset_vertical = U_CHAR_CATEGORY_COUNT+8
   };

-   //
-   // and now the masks:
-   //
-   static const char_class_type mask_blank;
-   static const char_class_type mask_space;
-   static const char_class_type mask_xdigit;
-   static const char_class_type mask_underscore;
-   static const char_class_type mask_unicode;
-   static const char_class_type mask_any;
-   static const char_class_type mask_ascii;
-   static const char_class_type mask_horizontal;
-   static const char_class_type mask_vertical;
+   static char_class_type lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2)
+   {
+      constexpr char_class_type mask_blank = char_class_type(1) << offset_blank;
+      constexpr char_class_type mask_space = char_class_type(1) << offset_space;
+      constexpr char_class_type mask_xdigit = char_class_type(1) << offset_xdigit;
+      constexpr char_class_type mask_underscore = char_class_type(1) << offset_underscore;
+      constexpr char_class_type mask_unicode = char_class_type(1) << offset_unicode;
+      constexpr char_class_type mask_any = char_class_type(1) << offset_any;
+      constexpr char_class_type mask_ascii = char_class_type(1) << offset_ascii;
+      constexpr char_class_type mask_horizontal = char_class_type(1) << offset_horizontal;
+      constexpr char_class_type mask_vertical = char_class_type(1) << offset_vertical;

-   static char_class_type lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2);
+      static const ::UChar32 prop_name_table[] = {
+         /* any */  'a', 'n', 'y',
+         /* ascii */  'a', 's', 'c', 'i', 'i',
+         /* assigned */  'a', 's', 's', 'i', 'g', 'n', 'e', 'd',
+         /* c* */  'c', '*',
+         /* cc */  'c', 'c',
+         /* cf */  'c', 'f',
+         /* closepunctuation */  'c', 'l', 'o', 's', 'e', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
+         /* cn */  'c', 'n',
+         /* co */  'c', 'o',
+         /* connectorpunctuation */  'c', 'o', 'n', 'n', 'e', 'c', 't', 'o', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
+         /* control */  'c', 'o', 'n', 't', 'r', 'o', 'l',
+         /* cs */  'c', 's',
+         /* currencysymbol */  'c', 'u', 'r', 'r', 'e', 'n', 'c', 'y', 's', 'y', 'm', 'b', 'o', 'l',
+         /* dashpunctuation */  'd', 'a', 's', 'h', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
+         /* decimaldigitnumber */  'd', 'e', 'c', 'i', 'm', 'a', 'l', 'd', 'i', 'g', 'i', 't', 'n', 'u', 'm', 'b', 'e', 'r',
+         /* enclosingmark */  'e', 'n', 'c', 'l', 'o', 's', 'i', 'n', 'g', 'm', 'a', 'r', 'k',
+         /* finalpunctuation */  'f', 'i', 'n', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
+         /* format */  'f', 'o', 'r', 'm', 'a', 't',
+         /* initialpunctuation */  'i', 'n', 'i', 't', 'i', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
+         /* l* */  'l', '*',
+         /* letter */  'l', 'e', 't', 't', 'e', 'r',
+         /* letternumber */  'l', 'e', 't', 't', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r',
+         /* lineseparator */  'l', 'i', 'n', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r',
+         /* ll */  'l', 'l',
+         /* lm */  'l', 'm',
+         /* lo */  'l', 'o',
+         /* lowercaseletter */  'l', 'o', 'w', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r',
+         /* lt */  'l', 't',
+         /* lu */  'l', 'u',
+         /* m* */  'm', '*',
+         /* mark */  'm', 'a', 'r', 'k',
+         /* mathsymbol */  'm', 'a', 't', 'h', 's', 'y', 'm', 'b', 'o', 'l',
+         /* mc */  'm', 'c',
+         /* me */  'm', 'e',
+         /* mn */  'm', 'n',
+         /* modifierletter */  'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r',
+         /* modifiersymbol */  'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l',
+         /* n* */  'n', '*',
+         /* nd */  'n', 'd',
+         /* nl */  'n', 'l',
+         /* no */  'n', 'o',
+         /* nonspacingmark */  'n', 'o', 'n', 's', 'p', 'a', 'c', 'i', 'n', 'g', 'm', 'a', 'r', 'k',
+         /* notassigned */  'n', 'o', 't', 'a', 's', 's', 'i', 'g', 'n', 'e', 'd',
+         /* number */  'n', 'u', 'm', 'b', 'e', 'r',
+         /* openpunctuation */  'o', 'p', 'e', 'n', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
+         /* other */  'o', 't', 'h', 'e', 'r',
+         /* otherletter */  'o', 't', 'h', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r',
+         /* othernumber */  'o', 't', 'h', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r',
+         /* otherpunctuation */  'o', 't', 'h', 'e', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
+         /* othersymbol */  'o', 't', 'h', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l',
+         /* p* */  'p', '*',
+         /* paragraphseparator */  'p', 'a', 'r', 'a', 'g', 'r', 'a', 'p', 'h', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r',
+         /* pc */  'p', 'c',
+         /* pd */  'p', 'd',
+         /* pe */  'p', 'e',
+         /* pf */  'p', 'f',
+         /* pi */  'p', 'i',
+         /* po */  'p', 'o',
+         /* privateuse */  'p', 'r', 'i', 'v', 'a', 't', 'e', 'u', 's', 'e',
+         /* ps */  'p', 's',
+         /* punctuation */  'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n',
+         /* s* */  's', '*',
+         /* sc */  's', 'c',
+         /* separator */  's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r',
+         /* sk */  's', 'k',
+         /* sm */  's', 'm',
+         /* so */  's', 'o',
+         /* spaceseparator */  's', 'p', 'a', 'c', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r',
+         /* spacingcombiningmark */  's', 'p', 'a', 'c', 'i', 'n', 'g', 'c', 'o', 'm', 'b', 'i', 'n', 'i', 'n', 'g', 'm', 'a', 'r', 'k',
+         /* surrogate */  's', 'u', 'r', 'r', 'o', 'g', 'a', 't', 'e',
+         /* symbol */  's', 'y', 'm', 'b', 'o', 'l',
+         /* titlecase */  't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e',
+         /* titlecaseletter */  't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r',
+         /* uppercaseletter */  'u', 'p', 'p', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r',
+         /* z* */  'z', '*',
+         /* zl */  'z', 'l',
+         /* zp */  'z', 'p',
+         /* zs */  'z', 's',
+      };
+
+      static const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32> range_data[] = {
+         { prop_name_table + 0, prop_name_table + 3, }, // any
+         { prop_name_table + 3, prop_name_table + 8, }, // ascii
+         { prop_name_table + 8, prop_name_table + 16, }, // assigned
+         { prop_name_table + 16, prop_name_table + 18, }, // c*
+         { prop_name_table + 18, prop_name_table + 20, }, // cc
+         { prop_name_table + 20, prop_name_table + 22, }, // cf
+         { prop_name_table + 22, prop_name_table + 38, }, // closepunctuation
+         { prop_name_table + 38, prop_name_table + 40, }, // cn
+         { prop_name_table + 40, prop_name_table + 42, }, // co
+         { prop_name_table + 42, prop_name_table + 62, }, // connectorpunctuation
+         { prop_name_table + 62, prop_name_table + 69, }, // control
+         { prop_name_table + 69, prop_name_table + 71, }, // cs
+         { prop_name_table + 71, prop_name_table + 85, }, // currencysymbol
+         { prop_name_table + 85, prop_name_table + 100, }, // dashpunctuation
+         { prop_name_table + 100, prop_name_table + 118, }, // decimaldigitnumber
+         { prop_name_table + 118, prop_name_table + 131, }, // enclosingmark
+         { prop_name_table + 131, prop_name_table + 147, }, // finalpunctuation
+         { prop_name_table + 147, prop_name_table + 153, }, // format
+         { prop_name_table + 153, prop_name_table + 171, }, // initialpunctuation
+         { prop_name_table + 171, prop_name_table + 173, }, // l*
+         { prop_name_table + 173, prop_name_table + 179, }, // letter
+         { prop_name_table + 179, prop_name_table + 191, }, // letternumber
+         { prop_name_table + 191, prop_name_table + 204, }, // lineseparator
+         { prop_name_table + 204, prop_name_table + 206, }, // ll
+         { prop_name_table + 206, prop_name_table + 208, }, // lm
+         { prop_name_table + 208, prop_name_table + 210, }, // lo
+         { prop_name_table + 210, prop_name_table + 225, }, // lowercaseletter
+         { prop_name_table + 225, prop_name_table + 227, }, // lt
+         { prop_name_table + 227, prop_name_table + 229, }, // lu
+         { prop_name_table + 229, prop_name_table + 231, }, // m*
+         { prop_name_table + 231, prop_name_table + 235, }, // mark
+         { prop_name_table + 235, prop_name_table + 245, }, // mathsymbol
+         { prop_name_table + 245, prop_name_table + 247, }, // mc
+         { prop_name_table + 247, prop_name_table + 249, }, // me
+         { prop_name_table + 249, prop_name_table + 251, }, // mn
+         { prop_name_table + 251, prop_name_table + 265, }, // modifierletter
+         { prop_name_table + 265, prop_name_table + 279, }, // modifiersymbol
+         { prop_name_table + 279, prop_name_table + 281, }, // n*
+         { prop_name_table + 281, prop_name_table + 283, }, // nd
+         { prop_name_table + 283, prop_name_table + 285, }, // nl
+         { prop_name_table + 285, prop_name_table + 287, }, // no
+         { prop_name_table + 287, prop_name_table + 301, }, // nonspacingmark
+         { prop_name_table + 301, prop_name_table + 312, }, // notassigned
+         { prop_name_table + 312, prop_name_table + 318, }, // number
+         { prop_name_table + 318, prop_name_table + 333, }, // openpunctuation
+         { prop_name_table + 333, prop_name_table + 338, }, // other
+         { prop_name_table + 338, prop_name_table + 349, }, // otherletter
+         { prop_name_table + 349, prop_name_table + 360, }, // othernumber
+         { prop_name_table + 360, prop_name_table + 376, }, // otherpunctuation
+         { prop_name_table + 376, prop_name_table + 387, }, // othersymbol
+         { prop_name_table + 387, prop_name_table + 389, }, // p*
+         { prop_name_table + 389, prop_name_table + 407, }, // paragraphseparator
+         { prop_name_table + 407, prop_name_table + 409, }, // pc
+         { prop_name_table + 409, prop_name_table + 411, }, // pd
+         { prop_name_table + 411, prop_name_table + 413, }, // pe
+         { prop_name_table + 413, prop_name_table + 415, }, // pf
+         { prop_name_table + 415, prop_name_table + 417, }, // pi
+         { prop_name_table + 417, prop_name_table + 419, }, // po
+         { prop_name_table + 419, prop_name_table + 429, }, // privateuse
+         { prop_name_table + 429, prop_name_table + 431, }, // ps
+         { prop_name_table + 431, prop_name_table + 442, }, // punctuation
+         { prop_name_table + 442, prop_name_table + 444, }, // s*
+         { prop_name_table + 444, prop_name_table + 446, }, // sc
+         { prop_name_table + 446, prop_name_table + 455, }, // separator
+         { prop_name_table + 455, prop_name_table + 457, }, // sk
+         { prop_name_table + 457, prop_name_table + 459, }, // sm
+         { prop_name_table + 459, prop_name_table + 461, }, // so
+         { prop_name_table + 461, prop_name_table + 475, }, // spaceseparator
+         { prop_name_table + 475, prop_name_table + 495, }, // spacingcombiningmark
+         { prop_name_table + 495, prop_name_table + 504, }, // surrogate
+         { prop_name_table + 504, prop_name_table + 510, }, // symbol
+         { prop_name_table + 510, prop_name_table + 519, }, // titlecase
+         { prop_name_table + 519, prop_name_table + 534, }, // titlecaseletter
+         { prop_name_table + 534, prop_name_table + 549, }, // uppercaseletter
+         { prop_name_table + 549, prop_name_table + 551, }, // z*
+         { prop_name_table + 551, prop_name_table + 553, }, // zl
+         { prop_name_table + 553, prop_name_table + 555, }, // zp
+         { prop_name_table + 555, prop_name_table + 557, }, // zs
+      };
+
+      static const icu_regex_traits::char_class_type icu_class_map[] = {
+         mask_any, // any
+         mask_ascii, // ascii
+         (0x3FFFFFFFu) & ~(U_GC_CN_MASK), // assigned
+         U_GC_C_MASK, // c*
+         U_GC_CC_MASK, // cc
+         U_GC_CF_MASK, // cf
+         U_GC_PE_MASK, // closepunctuation
+         U_GC_CN_MASK, // cn
+         U_GC_CO_MASK, // co
+         U_GC_PC_MASK, // connectorpunctuation
+         U_GC_CC_MASK, // control
+         U_GC_CS_MASK, // cs
+         U_GC_SC_MASK, // currencysymbol
+         U_GC_PD_MASK, // dashpunctuation
+         U_GC_ND_MASK, // decimaldigitnumber
+         U_GC_ME_MASK, // enclosingmark
+         U_GC_PF_MASK, // finalpunctuation
+         U_GC_CF_MASK, // format
+         U_GC_PI_MASK, // initialpunctuation
+         U_GC_L_MASK, // l*
+         U_GC_L_MASK, // letter
+         U_GC_NL_MASK, // letternumber
+         U_GC_ZL_MASK, // lineseparator
+         U_GC_LL_MASK, // ll
+         U_GC_LM_MASK, // lm
+         U_GC_LO_MASK, // lo
+         U_GC_LL_MASK, // lowercaseletter
+         U_GC_LT_MASK, // lt
+         U_GC_LU_MASK, // lu
+         U_GC_M_MASK, // m*
+         U_GC_M_MASK, // mark
+         U_GC_SM_MASK, // mathsymbol
+         U_GC_MC_MASK, // mc
+         U_GC_ME_MASK, // me
+         U_GC_MN_MASK, // mn
+         U_GC_LM_MASK, // modifierletter
+         U_GC_SK_MASK, // modifiersymbol
+         U_GC_N_MASK, // n*
+         U_GC_ND_MASK, // nd
+         U_GC_NL_MASK, // nl
+         U_GC_NO_MASK, // no
+         U_GC_MN_MASK, // nonspacingmark
+         U_GC_CN_MASK, // notassigned
+         U_GC_N_MASK, // number
+         U_GC_PS_MASK, // openpunctuation
+         U_GC_C_MASK, // other
+         U_GC_LO_MASK, // otherletter
+         U_GC_NO_MASK, // othernumber
+         U_GC_PO_MASK, // otherpunctuation
+         U_GC_SO_MASK, // othersymbol
+         U_GC_P_MASK, // p*
+         U_GC_ZP_MASK, // paragraphseparator
+         U_GC_PC_MASK, // pc
+         U_GC_PD_MASK, // pd
+         U_GC_PE_MASK, // pe
+         U_GC_PF_MASK, // pf
+         U_GC_PI_MASK, // pi
+         U_GC_PO_MASK, // po
+         U_GC_CO_MASK, // privateuse
+         U_GC_PS_MASK, // ps
+         U_GC_P_MASK, // punctuation
+         U_GC_S_MASK, // s*
+         U_GC_SC_MASK, // sc
+         U_GC_Z_MASK, // separator
+         U_GC_SK_MASK, // sk
+         U_GC_SM_MASK, // sm
+         U_GC_SO_MASK, // so
+         U_GC_ZS_MASK, // spaceseparator
+         U_GC_MC_MASK, // spacingcombiningmark
+         U_GC_CS_MASK, // surrogate
+         U_GC_S_MASK, // symbol
+         U_GC_LT_MASK, // titlecase
+         U_GC_LT_MASK, // titlecaseletter
+         U_GC_LU_MASK, // uppercaseletter
+         U_GC_Z_MASK, // z*
+         U_GC_ZL_MASK, // zl
+         U_GC_ZP_MASK, // zp
+         U_GC_ZS_MASK, // zs
+      };
+
+
+      const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32>* ranges_begin = range_data;
+      const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32>* ranges_end = range_data + (sizeof(range_data) / sizeof(range_data[0]));
+
+      BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32> t = { p1, p2, };
+      const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32>* p = std::lower_bound(ranges_begin, ranges_end, t);
+      if ((p != ranges_end) && (t == *p))
+         return icu_class_map[p - ranges_begin];
+      return 0;
+   }

   boost::shared_ptr< ::boost::BOOST_REGEX_DETAIL_NS::icu_regex_traits_implementation> m_pimpl;
 };
@ -227,7 +649,6 @@ typedef match_results<const ::UChar*> u16match;
 //
 namespace BOOST_REGEX_DETAIL_NS{

-#if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__)
 template <class InputIterator>
 inline u32regex do_make_u32regex(InputIterator i, 
                              InputIterator j, 
@ -256,65 +677,6 @@ inline u32regex do_make_u32regex(InputIterator i,
 {
   return u32regex(i, j, opt);
 }
-#else
-template <class InputIterator>
-inline u32regex do_make_u32regex(InputIterator i, 
-                              InputIterator j, 
-                              boost::regex_constants::syntax_option_type opt, 
-                              const boost::mpl::int_<1>*)
-{
-   typedef boost::u8_to_u32_iterator<InputIterator, UChar32> conv_type;
-   typedef std::vector<UChar32> vector_type;
-   vector_type v;
-   conv_type a(i, i, j), b(j, i, j);
-   while(a != b)
-   {
-      v.push_back(*a);
-      ++a;
-   }
-   if(v.size())
-      return u32regex(&*v.begin(), v.size(), opt);
-   return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt);
-}
-
-template <class InputIterator>
-inline u32regex do_make_u32regex(InputIterator i, 
-                              InputIterator j, 
-                              boost::regex_constants::syntax_option_type opt, 
-                              const boost::mpl::int_<2>*)
-{
-   typedef boost::u16_to_u32_iterator<InputIterator, UChar32> conv_type;
-   typedef std::vector<UChar32> vector_type;
-   vector_type v;
-   conv_type a(i, i, j), b(j, i, j);
-   while(a != b)
-   {
-      v.push_back(*a);
-      ++a;
-   }
-   if(v.size())
-      return u32regex(&*v.begin(), v.size(), opt);
-   return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt);
-}
-
-template <class InputIterator>
-inline u32regex do_make_u32regex(InputIterator i, 
-                              InputIterator j, 
-                              boost::regex_constants::syntax_option_type opt, 
-                              const boost::mpl::int_<4>*)
-{
-   typedef std::vector<UChar32> vector_type;
-   vector_type v;
-   while(i != j)
-   {
-      v.push_back((UChar32)(*i));
-      ++i;
-   }
-   if(v.size())
-      return u32regex(&*v.begin(), v.size(), opt);
-   return u32regex(static_cast<UChar32 const*>(0), static_cast<u32regex::size_type>(0), opt);
-}
-#endif
 }

 // BOOST_REGEX_UCHAR_IS_WCHAR_T
@ -853,14 +1215,7 @@ OutputIterator do_regex_replace(OutputIterator out,
 {
   // unfortunately we have to copy the format string in order to pass in onward:
   std::vector<UChar32> f;
-#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
   f.assign(fmt.first, fmt.second);
-#else
-   f.clear();
-   I2 pos = fmt.first;
-   while(pos != fmt.second)
-      f.push_back(*pos++);
-#endif
   
   regex_iterator<I1, UChar32, icu_regex_traits> i(in.first, in.second, e, flags);
   regex_iterator<I1, UChar32, icu_regex_traits> j;
--- a/src/icu.cpp
+++ b/src/icu.cpp
@ -1,511 +0,0 @@
-/*
- *
- * Copyright (c) 2004
- * John Maddock
- *
- * Use, modification and distribution are subject to the 
- * Boost Software License, Version 1.0. (See accompanying file 
- * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- *
- */
-
- /*
-  *   LOCATION:    see http://www.boost.org for most recent version.
-  *   FILE         icu.cpp
-  *   VERSION      see <boost/version.hpp>
-  *   DESCRIPTION: Unicode regular expressions on top of the ICU Library.
-  */
-#define BOOST_REGEX_SOURCE
-
-#include <boost/regex/config.hpp>
-#ifdef BOOST_HAS_ICU
-#define BOOST_REGEX_ICU_INSTANTIATE
-#include <boost/regex/icu.hpp>
-
-#ifdef BOOST_INTEL
-#pragma warning(disable:981 2259 383)
-#endif
-
-namespace boost{
-
-namespace BOOST_REGEX_DETAIL_NS{
-
-icu_regex_traits_implementation::string_type icu_regex_traits_implementation::do_transform(const char_type* p1, const char_type* p2, const U_NAMESPACE_QUALIFIER Collator* pcoll) const
-{
-   // TODO make thread safe!!!! :
-   typedef u32_to_u16_iterator<const char_type*, ::UChar> itt;
-   itt i(p1), j(p2);
-#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
-   std::vector< ::UChar> t(i, j);
-#else
-   std::vector< ::UChar> t;
-   while(i != j)
-      t.push_back(*i++);
-#endif
-   ::uint8_t result[100];
-   ::int32_t len;
-   if(!t.empty())
-      len = pcoll->getSortKey(&*t.begin(), static_cast< ::int32_t>(t.size()), result, sizeof(result));
-   else
-      len = pcoll->getSortKey(static_cast<UChar const*>(0), static_cast< ::int32_t>(0), result, sizeof(result));
-   if(std::size_t(len) > sizeof(result))
-   {
-      scoped_array< ::uint8_t> presult(new ::uint8_t[len+1]);
-      if(!t.empty())
-         len = pcoll->getSortKey(&*t.begin(), static_cast< ::int32_t>(t.size()), presult.get(), len+1);
-      else
-         len = pcoll->getSortKey(static_cast<UChar const*>(0), static_cast< ::int32_t>(0), presult.get(), len+1);
-      if((0 == presult[len-1]) && (len > 1))
-         --len;
-#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
-      return string_type(presult.get(), presult.get()+len);
-#else
-      string_type sresult;
-      ::uint8_t const* ia = presult.get();
-      ::uint8_t const* ib = presult.get()+len;
-      while(ia != ib)
-         sresult.push_back(*ia++);
-      return sresult;
-#endif
-   }
-   if((0 == result[len-1]) && (len > 1))
-      --len;
-#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
-   return string_type(result, result+len);
-#else
-   string_type sresult;
-   ::uint8_t const* ia = result;
-   ::uint8_t const* ib = result+len;
-   while(ia != ib)
-      sresult.push_back(*ia++);
-   return sresult;
-#endif
-}
-
-}
-
-icu_regex_traits::size_type icu_regex_traits::length(const char_type* p)
-{
-   size_type result = 0;
-   while(*p)
-   {
-      ++p;
-      ++result;
-   }
-   return result;
-}
-
-//
-// define our bitmasks:
-//
-const icu_regex_traits::char_class_type icu_regex_traits::mask_blank = icu_regex_traits::char_class_type(1) << offset_blank;
-const icu_regex_traits::char_class_type icu_regex_traits::mask_space = icu_regex_traits::char_class_type(1) << offset_space;
-const icu_regex_traits::char_class_type icu_regex_traits::mask_xdigit = icu_regex_traits::char_class_type(1) << offset_xdigit;
-const icu_regex_traits::char_class_type icu_regex_traits::mask_underscore = icu_regex_traits::char_class_type(1) << offset_underscore;
-const icu_regex_traits::char_class_type icu_regex_traits::mask_unicode = icu_regex_traits::char_class_type(1) << offset_unicode;
-const icu_regex_traits::char_class_type icu_regex_traits::mask_any = icu_regex_traits::char_class_type(1) << offset_any;
-const icu_regex_traits::char_class_type icu_regex_traits::mask_ascii = icu_regex_traits::char_class_type(1) << offset_ascii;
-const icu_regex_traits::char_class_type icu_regex_traits::mask_horizontal = icu_regex_traits::char_class_type(1) << offset_horizontal;
-const icu_regex_traits::char_class_type icu_regex_traits::mask_vertical = icu_regex_traits::char_class_type(1) << offset_vertical;
-
-icu_regex_traits::char_class_type icu_regex_traits::lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2)
-{
-   static const ::UChar32 prop_name_table[] = {
-      /* any */  'a', 'n', 'y', 
-      /* ascii */  'a', 's', 'c', 'i', 'i', 
-      /* assigned */  'a', 's', 's', 'i', 'g', 'n', 'e', 'd', 
-      /* c* */  'c', '*', 
-      /* cc */  'c', 'c', 
-      /* cf */  'c', 'f', 
-      /* closepunctuation */  'c', 'l', 'o', 's', 'e', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', 
-      /* cn */  'c', 'n', 
-      /* co */  'c', 'o', 
-      /* connectorpunctuation */  'c', 'o', 'n', 'n', 'e', 'c', 't', 'o', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', 
-      /* control */  'c', 'o', 'n', 't', 'r', 'o', 'l', 
-      /* cs */  'c', 's', 
-      /* currencysymbol */  'c', 'u', 'r', 'r', 'e', 'n', 'c', 'y', 's', 'y', 'm', 'b', 'o', 'l', 
-      /* dashpunctuation */  'd', 'a', 's', 'h', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', 
-      /* decimaldigitnumber */  'd', 'e', 'c', 'i', 'm', 'a', 'l', 'd', 'i', 'g', 'i', 't', 'n', 'u', 'm', 'b', 'e', 'r', 
-      /* enclosingmark */  'e', 'n', 'c', 'l', 'o', 's', 'i', 'n', 'g', 'm', 'a', 'r', 'k', 
-      /* finalpunctuation */  'f', 'i', 'n', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', 
-      /* format */  'f', 'o', 'r', 'm', 'a', 't', 
-      /* initialpunctuation */  'i', 'n', 'i', 't', 'i', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', 
-      /* l* */  'l', '*', 
-      /* letter */  'l', 'e', 't', 't', 'e', 'r', 
-      /* letternumber */  'l', 'e', 't', 't', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r', 
-      /* lineseparator */  'l', 'i', 'n', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', 
-      /* ll */  'l', 'l', 
-      /* lm */  'l', 'm', 
-      /* lo */  'l', 'o', 
-      /* lowercaseletter */  'l', 'o', 'w', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r', 
-      /* lt */  'l', 't', 
-      /* lu */  'l', 'u', 
-      /* m* */  'm', '*', 
-      /* mark */  'm', 'a', 'r', 'k', 
-      /* mathsymbol */  'm', 'a', 't', 'h', 's', 'y', 'm', 'b', 'o', 'l', 
-      /* mc */  'm', 'c', 
-      /* me */  'm', 'e', 
-      /* mn */  'm', 'n', 
-      /* modifierletter */  'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r', 
-      /* modifiersymbol */  'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l', 
-      /* n* */  'n', '*', 
-      /* nd */  'n', 'd', 
-      /* nl */  'n', 'l', 
-      /* no */  'n', 'o', 
-      /* nonspacingmark */  'n', 'o', 'n', 's', 'p', 'a', 'c', 'i', 'n', 'g', 'm', 'a', 'r', 'k', 
-      /* notassigned */  'n', 'o', 't', 'a', 's', 's', 'i', 'g', 'n', 'e', 'd', 
-      /* number */  'n', 'u', 'm', 'b', 'e', 'r', 
-      /* openpunctuation */  'o', 'p', 'e', 'n', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', 
-      /* other */  'o', 't', 'h', 'e', 'r', 
-      /* otherletter */  'o', 't', 'h', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r', 
-      /* othernumber */  'o', 't', 'h', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r', 
-      /* otherpunctuation */  'o', 't', 'h', 'e', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', 
-      /* othersymbol */  'o', 't', 'h', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l', 
-      /* p* */  'p', '*', 
-      /* paragraphseparator */  'p', 'a', 'r', 'a', 'g', 'r', 'a', 'p', 'h', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', 
-      /* pc */  'p', 'c', 
-      /* pd */  'p', 'd', 
-      /* pe */  'p', 'e', 
-      /* pf */  'p', 'f', 
-      /* pi */  'p', 'i', 
-      /* po */  'p', 'o', 
-      /* privateuse */  'p', 'r', 'i', 'v', 'a', 't', 'e', 'u', 's', 'e', 
-      /* ps */  'p', 's', 
-      /* punctuation */  'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', 
-      /* s* */  's', '*', 
-      /* sc */  's', 'c', 
-      /* separator */  's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', 
-      /* sk */  's', 'k', 
-      /* sm */  's', 'm', 
-      /* so */  's', 'o', 
-      /* spaceseparator */  's', 'p', 'a', 'c', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', 
-      /* spacingcombiningmark */  's', 'p', 'a', 'c', 'i', 'n', 'g', 'c', 'o', 'm', 'b', 'i', 'n', 'i', 'n', 'g', 'm', 'a', 'r', 'k', 
-      /* surrogate */  's', 'u', 'r', 'r', 'o', 'g', 'a', 't', 'e', 
-      /* symbol */  's', 'y', 'm', 'b', 'o', 'l', 
-      /* titlecase */  't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e', 
-      /* titlecaseletter */  't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r', 
-      /* uppercaseletter */  'u', 'p', 'p', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r', 
-      /* z* */  'z', '*', 
-      /* zl */  'z', 'l', 
-      /* zp */  'z', 'p', 
-      /* zs */  'z', 's', 
-   };
-
-   static const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32> range_data[] = {
-      { prop_name_table+0, prop_name_table+3, }, // any
-      { prop_name_table+3, prop_name_table+8, }, // ascii
-      { prop_name_table+8, prop_name_table+16, }, // assigned
-      { prop_name_table+16, prop_name_table+18, }, // c*
-      { prop_name_table+18, prop_name_table+20, }, // cc
-      { prop_name_table+20, prop_name_table+22, }, // cf
-      { prop_name_table+22, prop_name_table+38, }, // closepunctuation
-      { prop_name_table+38, prop_name_table+40, }, // cn
-      { prop_name_table+40, prop_name_table+42, }, // co
-      { prop_name_table+42, prop_name_table+62, }, // connectorpunctuation
-      { prop_name_table+62, prop_name_table+69, }, // control
-      { prop_name_table+69, prop_name_table+71, }, // cs
-      { prop_name_table+71, prop_name_table+85, }, // currencysymbol
-      { prop_name_table+85, prop_name_table+100, }, // dashpunctuation
-      { prop_name_table+100, prop_name_table+118, }, // decimaldigitnumber
-      { prop_name_table+118, prop_name_table+131, }, // enclosingmark
-      { prop_name_table+131, prop_name_table+147, }, // finalpunctuation
-      { prop_name_table+147, prop_name_table+153, }, // format
-      { prop_name_table+153, prop_name_table+171, }, // initialpunctuation
-      { prop_name_table+171, prop_name_table+173, }, // l*
-      { prop_name_table+173, prop_name_table+179, }, // letter
-      { prop_name_table+179, prop_name_table+191, }, // letternumber
-      { prop_name_table+191, prop_name_table+204, }, // lineseparator
-      { prop_name_table+204, prop_name_table+206, }, // ll
-      { prop_name_table+206, prop_name_table+208, }, // lm
-      { prop_name_table+208, prop_name_table+210, }, // lo
-      { prop_name_table+210, prop_name_table+225, }, // lowercaseletter
-      { prop_name_table+225, prop_name_table+227, }, // lt
-      { prop_name_table+227, prop_name_table+229, }, // lu
-      { prop_name_table+229, prop_name_table+231, }, // m*
-      { prop_name_table+231, prop_name_table+235, }, // mark
-      { prop_name_table+235, prop_name_table+245, }, // mathsymbol
-      { prop_name_table+245, prop_name_table+247, }, // mc
-      { prop_name_table+247, prop_name_table+249, }, // me
-      { prop_name_table+249, prop_name_table+251, }, // mn
-      { prop_name_table+251, prop_name_table+265, }, // modifierletter
-      { prop_name_table+265, prop_name_table+279, }, // modifiersymbol
-      { prop_name_table+279, prop_name_table+281, }, // n*
-      { prop_name_table+281, prop_name_table+283, }, // nd
-      { prop_name_table+283, prop_name_table+285, }, // nl
-      { prop_name_table+285, prop_name_table+287, }, // no
-      { prop_name_table+287, prop_name_table+301, }, // nonspacingmark
-      { prop_name_table+301, prop_name_table+312, }, // notassigned
-      { prop_name_table+312, prop_name_table+318, }, // number
-      { prop_name_table+318, prop_name_table+333, }, // openpunctuation
-      { prop_name_table+333, prop_name_table+338, }, // other
-      { prop_name_table+338, prop_name_table+349, }, // otherletter
-      { prop_name_table+349, prop_name_table+360, }, // othernumber
-      { prop_name_table+360, prop_name_table+376, }, // otherpunctuation
-      { prop_name_table+376, prop_name_table+387, }, // othersymbol
-      { prop_name_table+387, prop_name_table+389, }, // p*
-      { prop_name_table+389, prop_name_table+407, }, // paragraphseparator
-      { prop_name_table+407, prop_name_table+409, }, // pc
-      { prop_name_table+409, prop_name_table+411, }, // pd
-      { prop_name_table+411, prop_name_table+413, }, // pe
-      { prop_name_table+413, prop_name_table+415, }, // pf
-      { prop_name_table+415, prop_name_table+417, }, // pi
-      { prop_name_table+417, prop_name_table+419, }, // po
-      { prop_name_table+419, prop_name_table+429, }, // privateuse
-      { prop_name_table+429, prop_name_table+431, }, // ps
-      { prop_name_table+431, prop_name_table+442, }, // punctuation
-      { prop_name_table+442, prop_name_table+444, }, // s*
-      { prop_name_table+444, prop_name_table+446, }, // sc
-      { prop_name_table+446, prop_name_table+455, }, // separator
-      { prop_name_table+455, prop_name_table+457, }, // sk
-      { prop_name_table+457, prop_name_table+459, }, // sm
-      { prop_name_table+459, prop_name_table+461, }, // so
-      { prop_name_table+461, prop_name_table+475, }, // spaceseparator
-      { prop_name_table+475, prop_name_table+495, }, // spacingcombiningmark
-      { prop_name_table+495, prop_name_table+504, }, // surrogate
-      { prop_name_table+504, prop_name_table+510, }, // symbol
-      { prop_name_table+510, prop_name_table+519, }, // titlecase
-      { prop_name_table+519, prop_name_table+534, }, // titlecaseletter
-      { prop_name_table+534, prop_name_table+549, }, // uppercaseletter
-      { prop_name_table+549, prop_name_table+551, }, // z*
-      { prop_name_table+551, prop_name_table+553, }, // zl
-      { prop_name_table+553, prop_name_table+555, }, // zp
-      { prop_name_table+555, prop_name_table+557, }, // zs
-   };
-
-   static const icu_regex_traits::char_class_type icu_class_map[] = {
-      icu_regex_traits::mask_any, // any
-      icu_regex_traits::mask_ascii, // ascii
-      (0x3FFFFFFFu) & ~(U_GC_CN_MASK), // assigned
-      U_GC_C_MASK, // c*
-      U_GC_CC_MASK, // cc
-      U_GC_CF_MASK, // cf
-      U_GC_PE_MASK, // closepunctuation
-      U_GC_CN_MASK, // cn
-      U_GC_CO_MASK, // co
-      U_GC_PC_MASK, // connectorpunctuation
-      U_GC_CC_MASK, // control
-      U_GC_CS_MASK, // cs
-      U_GC_SC_MASK, // currencysymbol
-      U_GC_PD_MASK, // dashpunctuation
-      U_GC_ND_MASK, // decimaldigitnumber
-      U_GC_ME_MASK, // enclosingmark
-      U_GC_PF_MASK, // finalpunctuation
-      U_GC_CF_MASK, // format
-      U_GC_PI_MASK, // initialpunctuation
-      U_GC_L_MASK, // l*
-      U_GC_L_MASK, // letter
-      U_GC_NL_MASK, // letternumber
-      U_GC_ZL_MASK, // lineseparator
-      U_GC_LL_MASK, // ll
-      U_GC_LM_MASK, // lm
-      U_GC_LO_MASK, // lo
-      U_GC_LL_MASK, // lowercaseletter
-      U_GC_LT_MASK, // lt
-      U_GC_LU_MASK, // lu
-      U_GC_M_MASK, // m*
-      U_GC_M_MASK, // mark
-      U_GC_SM_MASK, // mathsymbol
-      U_GC_MC_MASK, // mc
-      U_GC_ME_MASK, // me
-      U_GC_MN_MASK, // mn
-      U_GC_LM_MASK, // modifierletter
-      U_GC_SK_MASK, // modifiersymbol
-      U_GC_N_MASK, // n*
-      U_GC_ND_MASK, // nd
-      U_GC_NL_MASK, // nl
-      U_GC_NO_MASK, // no
-      U_GC_MN_MASK, // nonspacingmark
-      U_GC_CN_MASK, // notassigned
-      U_GC_N_MASK, // number
-      U_GC_PS_MASK, // openpunctuation
-      U_GC_C_MASK, // other
-      U_GC_LO_MASK, // otherletter
-      U_GC_NO_MASK, // othernumber
-      U_GC_PO_MASK, // otherpunctuation
-      U_GC_SO_MASK, // othersymbol
-      U_GC_P_MASK, // p*
-      U_GC_ZP_MASK, // paragraphseparator
-      U_GC_PC_MASK, // pc
-      U_GC_PD_MASK, // pd
-      U_GC_PE_MASK, // pe
-      U_GC_PF_MASK, // pf
-      U_GC_PI_MASK, // pi
-      U_GC_PO_MASK, // po
-      U_GC_CO_MASK, // privateuse
-      U_GC_PS_MASK, // ps
-      U_GC_P_MASK, // punctuation
-      U_GC_S_MASK, // s*
-      U_GC_SC_MASK, // sc
-      U_GC_Z_MASK, // separator
-      U_GC_SK_MASK, // sk
-      U_GC_SM_MASK, // sm
-      U_GC_SO_MASK, // so
-      U_GC_ZS_MASK, // spaceseparator
-      U_GC_MC_MASK, // spacingcombiningmark
-      U_GC_CS_MASK, // surrogate
-      U_GC_S_MASK, // symbol
-      U_GC_LT_MASK, // titlecase
-      U_GC_LT_MASK, // titlecaseletter
-      U_GC_LU_MASK, // uppercaseletter
-      U_GC_Z_MASK, // z*
-      U_GC_ZL_MASK, // zl
-      U_GC_ZP_MASK, // zp
-      U_GC_ZS_MASK, // zs
-   };
-
-
-   const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32>* ranges_begin = range_data;
-   const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32>* ranges_end = range_data + (sizeof(range_data)/sizeof(range_data[0]));
-   
-   BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32> t = { p1, p2, };
-   const BOOST_REGEX_DETAIL_NS::character_pointer_range< ::UChar32>* p = std::lower_bound(ranges_begin, ranges_end, t);
-   if((p != ranges_end) && (t == *p))
-      return icu_class_map[p - ranges_begin];
-   return 0;
-}
-
-icu_regex_traits::char_class_type icu_regex_traits::lookup_classname(const char_type* p1, const char_type* p2) const
-{
-   static const char_class_type masks[] = 
-   {
-      0,
-      U_GC_L_MASK | U_GC_ND_MASK, 
-      U_GC_L_MASK,
-      mask_blank,
-      U_GC_CC_MASK | U_GC_CF_MASK | U_GC_ZL_MASK | U_GC_ZP_MASK,
-      U_GC_ND_MASK,
-      U_GC_ND_MASK,
-      (0x3FFFFFFFu) & ~(U_GC_CC_MASK | U_GC_CF_MASK | U_GC_CS_MASK | U_GC_CN_MASK | U_GC_Z_MASK),
-      mask_horizontal,
-      U_GC_LL_MASK,
-      U_GC_LL_MASK,
-      ~(U_GC_C_MASK),
-      U_GC_P_MASK,
-      char_class_type(U_GC_Z_MASK) | mask_space,
-      char_class_type(U_GC_Z_MASK) | mask_space,
-      U_GC_LU_MASK,
-      mask_unicode,
-      U_GC_LU_MASK,
-      mask_vertical,
-      char_class_type(U_GC_L_MASK | U_GC_ND_MASK | U_GC_MN_MASK) | mask_underscore, 
-      char_class_type(U_GC_L_MASK | U_GC_ND_MASK | U_GC_MN_MASK) | mask_underscore, 
-      char_class_type(U_GC_ND_MASK) | mask_xdigit,
-   };
-
-   int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2);
-   if(idx >= 0)
-      return masks[idx+1];
-   char_class_type result = lookup_icu_mask(p1, p2);
-   if(result != 0)
-      return result;
-
-   if(idx < 0)
-   {
-      string_type s(p1, p2);
-      string_type::size_type i = 0;
-      while(i < s.size())
-      {
-         s[i] = static_cast<char>((::u_tolower)(s[i]));
-         if(::u_isspace(s[i]) || (s[i] == '-') || (s[i] == '_'))
-            s.erase(s.begin()+i, s.begin()+i+1);
-         else
-         {
-            s[i] = static_cast<char>((::u_tolower)(s[i]));
-            ++i;
-         }
-      }
-      if(!s.empty())
-         idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size());
-      if(idx >= 0)
-         return masks[idx+1];
-      if(!s.empty())
-         result = lookup_icu_mask(&*s.begin(), &*s.begin() + s.size());
-      if(result != 0)
-         return result;
-   }
-   BOOST_ASSERT(std::size_t(idx+1) < sizeof(masks) / sizeof(masks[0]));
-   return masks[idx+1];
-}
-
-icu_regex_traits::string_type icu_regex_traits::lookup_collatename(const char_type* p1, const char_type* p2) const
-{
-   string_type result;
-#ifdef BOOST_NO_CXX98_BINDERS
-   if(std::find_if(p1, p2, std::bind(std::greater< ::UChar32>(), std::placeholders::_1, 0x7f)) == p2)
-#else
-   if(std::find_if(p1, p2, std::bind2nd(std::greater< ::UChar32>(), 0x7f)) == p2)
-#endif
-   {
-#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
-      std::string s(p1, p2);
-#else
-      std::string s;
-      const char_type* p3 = p1;
-      while(p3 != p2)
-         s.append(1, *p3++);
-#endif
-      // Try Unicode name:
-      UErrorCode err = U_ZERO_ERROR;
-      UChar32 c = ::u_charFromName(U_UNICODE_CHAR_NAME, s.c_str(), &err);
-      if(U_SUCCESS(err))
-      {
-         result.push_back(c);
-         return result;
-      }
-      // Try Unicode-extended name:
-      err = U_ZERO_ERROR;
-      c = ::u_charFromName(U_EXTENDED_CHAR_NAME, s.c_str(), &err);
-      if(U_SUCCESS(err))
-      {
-         result.push_back(c);
-         return result;
-      }
-      // try POSIX name:
-      s = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(s);
-#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
-      result.assign(s.begin(), s.end());
-#else
-      result.clear();
-      std::string::const_iterator si, sj;
-      si = s.begin();
-      sj = s.end();
-      while(si != sj)
-         result.push_back(*si++);
-#endif
-   }
-   if(result.empty() && (p2-p1 == 1))
-      result.push_back(*p1);
-   return result;
-}
-
-bool icu_regex_traits::isctype(char_type c, char_class_type f) const
-{
-   // check for standard catagories first:
-   char_class_type m = char_class_type(static_cast<char_class_type>(1) << u_charType(c));
-   if((m & f) != 0) 
-      return true;
-   // now check for special cases:
-   if(((f & mask_blank) != 0) && u_isblank(c))
-      return true;
-   if(((f & mask_space) != 0) && u_isspace(c))
-      return true;
-   if(((f & mask_xdigit) != 0) && (u_digit(c, 16) >= 0))
-      return true;
-   if(((f & mask_unicode) != 0) && (c >= 0x100))
-      return true;
-   if(((f & mask_underscore) != 0) && (c == '_'))
-      return true;
-   if(((f & mask_any) != 0) && (c <= 0x10FFFF))
-      return true;
-   if(((f & mask_ascii) != 0) && (c <= 0x7F))
-      return true;
-   if(((f & mask_vertical) != 0) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == static_cast<char_type>('\v')) || (m == U_GC_ZL_MASK) || (m == U_GC_ZP_MASK)))
-      return true;
-   if(((f & mask_horizontal) != 0) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && u_isspace(c) && (c != static_cast<char_type>('\v')))
-      return true;
-   return false;
-}
-
-}
-
-#endif // BOOST_HAS_ICU
--- a/test/Jamfile.v2
+++ b/test/Jamfile.v2
@ -61,7 +61,6 @@ test_operators.cpp
 ;

 lib boost_regex_recursive :    
-   ../src/icu.cpp
   ../src/posix_api.cpp
   ../src/regex.cpp
   ../src/regex_debug.cpp
--- a/test/captures/Jamfile.v2
+++ b/test/captures/Jamfile.v2
@ -8,7 +8,6 @@ project
    ;

 EX_SOURCES = 
-   icu.cpp
   posix_api.cpp
   regex.cpp
   regex_debug.cpp
--- a/test/noeh_test/Jamfile.v2
+++ b/test/noeh_test/Jamfile.v2
@ -24,7 +24,6 @@ project


 lib boost_regex_noeh :    
-   ../../src/icu.cpp
   ../../src/posix_api.cpp
   ../../src/regex.cpp
   ../../src/regex_debug.cpp
--- a/test/test_consolidated.cpp
+++ b/test/test_consolidated.cpp
@ -10,7 +10,6 @@
 */


-#include <libs/regex/src/icu.cpp>
 #include <libs/regex/src/posix_api.cpp>
 #include <libs/regex/src/regex.cpp>
 #include <libs/regex/src/regex_debug.cpp>