merged changes in regex5 branch

[SVN r26692]
2005-01-13 17:06:21 +00:00
parent de0ab9092a
commit 71a0e020e2
275 changed files with 37305 additions and 27154 deletions
--- a/example/Jamfile
+++ b/example/Jamfile
@ -5,6 +5,41 @@ subproject libs/regex/example ;
 # bring in the rules for testing
 import testing  ;

+#
+# test for MFC by looking inside VC++ include directories:
+#
+if ! $(gMFC_CHECK)
+{
+   gMFC_CHECK = true ;
+   if $(VS71COMNTOOLS)
+   {
+      VS71COMNTOOLS = $(VS71COMNTOOLS:J=" ") ;
+      if [ GLOB $(VS71COMNTOOLS)..\\..\\VC7\\atlmfc\\include : cstringt.h ]
+      {
+         ECHO MFC/ATL regex wrappers will be tested when building with VC7.1 ;
+         REGEX_MFC_OPTS += "<vc-7_1><*><define>TEST_MFC=1" ;
+      }
+   }
+   if $(VSCOMNTOOLS)
+   {
+      VSCOMNTOOLS = $(VSCOMNTOOLS:J=" ") ;
+      if [ GLOB $(VSCOMNTOOLS)\\..\\..\\VC7\\atlmfc\\include : cstringt.h ]
+      {
+         ECHO MFC/ATL regex wrappers will be tested when building with VC7 ;
+         REGEX_MFC_OPTS += "<vc7><*><define>TEST_MFC=1" ;
+      }
+   }
+   if $(VS80COMNTOOLS)
+   {
+      VS80COMNTOOLS = $(VS80COMNTOOLS:J=" ") ;
+      if [ GLOB $(VS80COMNTOOLS)..\\..\\VC8\\atlmfc\\include : cstringt.h ]
+      {
+         ECHO MFC/ATL regex wrappers will be tested when building with VC8 ;
+         REGEX_MFC_OPTS += "<vc-8_0><*><define>TEST_MFC=1" ;
+      }
+   }
+}
+
 rule regex-test-run ( sources + : input * )
 {
        return [ 
@ -20,6 +55,7 @@ rule regex-test-run ( sources + : input * )
         :  # test-files
         :  # requirements
         <threading>multi
+         $(REGEX_MFC_OPTS)
         :  # test name
        ] ;
 }
@ -29,6 +65,8 @@ test-suite regex-examples :
 [ regex-test-run timer/regex_timer.cpp <template>../build/msvc-stlport-tricky : $(BOOST_ROOT)/libs/regex/example/timer/input_script.txt ]
 [ regex-test-run jgrep/jgrep.cpp jgrep/main.cpp : -n boost/ $(BOOST_ROOT)/boost/regex.hpp ]
 [ regex-test-run snippets/credit_card_example.cpp ]
+[ regex-test-run snippets/mfc_example.cpp ]
+[ regex-test-run snippets/icu_example.cpp ]
 [ regex-test-run snippets/partial_regex_grep.cpp : $(BOOST_ROOT)/libs/regex/index.htm ]
 [ regex-test-run snippets/partial_regex_match.cpp : 1234-5678-8765-4 ]
 [ regex-test-run snippets/regex_grep_example_1.cpp : $(BOOST_ROOT)/boost/rational.hpp ]
--- a/example/Jamfile.v2
+++ b/example/Jamfile.v2
@ -1,7 +1,7 @@
 # copyright John Maddock 2003

 project
-    : requirements <threading>multi
+    : requirements <threading>multi <link>shared:<define>BOOST_REGEX_DYN_LINK=1
    ;


@ -26,6 +26,8 @@ test-suite regex-examples :
 [ regex-test-run timer/regex_timer.cpp : $(BOOST_ROOT)/libs/regex/example/timer/input_script.txt ]
 [ regex-test-run jgrep/jgrep.cpp jgrep/main.cpp : -n boost/ $(BOOST_ROOT)/boost/regex.hpp ]
 [ regex-test-run snippets/credit_card_example.cpp ]
+[ regex-test-run snippets/mfc_example.cpp ]
+[ regex-test-run snippets/icu_example.cpp ]
 [ regex-test-run snippets/partial_regex_grep.cpp : $(BOOST_ROOT)/libs/regex/index.htm ]
 [ regex-test-run snippets/partial_regex_match.cpp : 1234-5678-8765-4 ]
 [ regex-test-run snippets/regex_grep_example_1.cpp : $(BOOST_ROOT)/boost/rational.hpp ]
--- a/example/iso8859_1_regex_traits/iso8859_1_regex_traits.cpp
+++ b/example/iso8859_1_regex_traits/iso8859_1_regex_traits.cpp
@ -1,317 +0,0 @@
-/*
- * (C) Copyright Christain Engstrom 2001.
- * Distributed under the Boost Software License, Version 1.0. (See
- * accompanying file LICENSE_1_0.txt or copy at
- * http://www.boost.org/LICENSE_1_0.txt)
- */
- 
-#include "iso8859_1_regex_traits.hpp"
-
-bool iso8859_1_regex_traits::is_class(char c, boost::uint_fast32_t f) const
-{
-  static const boost::uint_fast32_t cntrl =      base::char_class_cntrl;
-  static const boost::uint_fast32_t space =      base::char_class_space;
-  static const boost::uint_fast32_t blank =      base::char_class_blank;
-  static const boost::uint_fast32_t digit =      base::char_class_digit;
-  static const boost::uint_fast32_t xdigit =     base::char_class_xdigit;
-  static const boost::uint_fast32_t underscore = base::char_class_underscore;
-  static const boost::uint_fast32_t punct =      base::char_class_punct;
-  static const boost::uint_fast32_t upper =      base::char_class_upper |
-                                                 base::char_class_alpha;
-  static const boost::uint_fast32_t lower =      base::char_class_lower |
-                                                 base::char_class_alpha;
-
-
-  static const boost::uint_fast32_t flag_table[UCHAR_MAX + 1] = {
-//                            Hex  Oct  Dec  Char
-  cntrl                  , //   0    0   0
-  cntrl                  , //   1    1   1
-  cntrl                  , //   2    2   2
-  cntrl                  , //   3    3   3
-  cntrl                  , //   4    4   4
-  cntrl                  , //   5    5   5
-  cntrl                  , //   6    6   6
-  cntrl                  , //   7    7   7
-  cntrl                  , //   8   10   8
-  cntrl | space | blank  , //   9   11   9  <HT>
-  cntrl | space          , //   a   12  10  <LF>
-  cntrl | space          , //   b   13  11  <VT>
-  cntrl | space          , //   c   14  12  <FF>
-  cntrl | space          , //   d   15  13  <CR>
-  cntrl                  , //   e   16  14
-  cntrl                  , //   f   17  15
-  cntrl                  , //  10   20  16
-  cntrl                  , //  11   21  17
-  cntrl                  , //  12   22  18
-  cntrl                  , //  13   23  19
-  cntrl                  , //  14   24  20
-  cntrl                  , //  15   25  21
-  cntrl                  , //  16   26  22
-  cntrl                  , //  17   27  23
-  cntrl                  , //  18   30  24
-  cntrl                  , //  19   31  25
-  cntrl                  , //  1a   32  26
-  cntrl                  , //  1b   33  27
-  cntrl                  , //  1c   34  28
-  cntrl                  , //  1d   35  29
-  cntrl                  , //  1e   36  30
-  cntrl                  , //  1f   37  31
-  space | blank          , //  20   40  32       |  Space
-  punct                  , //  21   41  33  !
-  punct                  , //  22   42  34  "
-  punct                  , //  23   43  35  #
-  punct                  , //  24   44  36  $
-  punct                  , //  25   45  37  %
-  punct                  , //  26   46  38  &
-  punct                  , //  27   47  39  '
-  punct                  , //  28   50  40  (
-  punct                  , //  29   51  41  )
-  punct                  , //  2a   52  42  *
-  punct                  , //  2b   53  43  +
-  punct                  , //  2c   54  44
-  punct                  , //  2d   55  45  -
-  punct                  , //  2e   56  46  .
-  punct                  , //  2f   57  47  /
-  digit | xdigit         , //  30   60  48  0
-  digit | xdigit         , //  31   61  49  1
-  digit | xdigit         , //  32   62  50  2
-  digit | xdigit         , //  33   63  51  3
-  digit | xdigit         , //  34   64  52  4
-  digit | xdigit         , //  35   65  53  5
-  digit | xdigit         , //  36   66  54  6
-  digit | xdigit         , //  37   67  55  7
-  digit | xdigit         , //  38   70  56  8
-  digit | xdigit         , //  39   71  57  9
-  punct                  , //  3a   72  58  :
-  punct                  , //  3b   73  59  ;
-  punct                  , //  3c   74  60  <
-  punct                  , //  3d   75  61  =
-  punct                  , //  3e   76  62  >
-  punct                  , //  3f   77  63  ?
-  punct                  , //  40  100  64  @
-  upper | xdigit         , //  41  101  65  A
-  upper | xdigit         , //  42  102  66  B
-  upper | xdigit         , //  43  103  67  C
-  upper | xdigit         , //  44  104  68  D
-  upper | xdigit         , //  45  105  69  E
-  upper | xdigit         , //  46  106  70  F
-  upper                  , //  47  107  71  G
-  upper                  , //  48  110  72  H
-  upper                  , //  49  111  73  I
-  upper                  , //  4a  112  74  J
-  upper                  , //  4b  113  75  K
-  upper                  , //  4c  114  76  L
-  upper                  , //  4d  115  77  M
-  upper                  , //  4e  116  78  N
-  upper                  , //  4f  117  79  O
-  upper                  , //  50  120  80  P
-  upper                  , //  51  121  81  Q
-  upper                  , //  52  122  82  R
-  upper                  , //  53  123  83  S
-  upper                  , //  54  124  84  T
-  upper                  , //  55  125  85  U
-  upper                  , //  56  126  86  V
-  upper                  , //  57  127  87  W
-  upper                  , //  58  130  88  X
-  upper                  , //  59  131  89  Y
-  upper                  , //  5a  132  90  Z
-  punct                  , //  5b  133  91  [    |  Left square bracket
-  punct                  , //  5c  134  92  \    |  Backslash
-  punct                  , //  5d  135  93  ]    |  Right square bracket
-  punct                  , //  5e  136  94  ^    |  Circumflex
-  punct | underscore     , //  5f  137  95  _    |  Underscore
-  punct                  , //  60  140  96  `
-  lower | xdigit         , //  61  141  97  a
-  lower | xdigit         , //  62  142  98  b
-  lower | xdigit         , //  63  143  99  c
-  lower | xdigit         , //  64  144 100  d
-  lower | xdigit         , //  65  145 101  e
-  lower | xdigit         , //  66  146 102  f
-  lower                  , //  67  147 103  g
-  lower                  , //  68  150 104  h
-  lower                  , //  69  151 105  i
-  lower                  , //  6a  152 106  j
-  lower                  , //  6b  153 107  k
-  lower                  , //  6c  154 108  l
-  lower                  , //  6d  155 109  m
-  lower                  , //  6e  156 110  n
-  lower                  , //  6f  157 111  o
-  lower                  , //  70  160 112  p
-  lower                  , //  71  161 113  q
-  lower                  , //  72  162 114  r
-  lower                  , //  73  163 115  s
-  lower                  , //  74  164 116  t
-  lower                  , //  75  165 117  u
-  lower                  , //  76  166 118  v
-  lower                  , //  77  167 119  w
-  lower                  , //  78  170 120  x
-  lower                  , //  79  171 121  y
-  lower                  , //  7a  172 122  z
-  punct                  , //  7b  173 123  {
-  punct                  , //  7c  174 124  |
-  punct                  , //  7d  175 125  }
-  punct                  , //  7e  176 126  ~
-  cntrl                  , //  7f  177 127
-  cntrl                  , //  80  200 128
-  cntrl                  , //  81  201 129
-  cntrl                  , //  82  202 130
-  cntrl                  , //  83  203 131
-  cntrl                  , //  84  204 132
-  cntrl                  , //  85  205 133
-  cntrl                  , //  86  206 134
-  cntrl                  , //  87  207 135
-  cntrl                  , //  88  210 136
-  cntrl                  , //  89  211 137
-  cntrl                  , //  8a  212 138
-  cntrl                  , //  8b  213 139
-  cntrl                  , //  8c  214 140
-  cntrl                  , //  8d  215 141
-  cntrl                  , //  8e  216 142
-  cntrl                  , //  8f  217 143
-  cntrl                  , //  90  220 144
-  cntrl                  , //  91  221 145
-  cntrl                  , //  92  222 146
-  cntrl                  , //  93  223 147
-  cntrl                  , //  94  224 148
-  cntrl                  , //  95  225 149
-  cntrl                  , //  96  226 150
-  cntrl                  , //  97  227 151
-  cntrl                  , //  98  230 152
-  cntrl                  , //  99  231 153
-  cntrl                  , //  9a  232 154
-  cntrl                  , //  9b  233 155
-  cntrl                  , //  9c  234 156
-  cntrl                  , //  9d  235 157
-  cntrl                  , //  9e  236 158
-  cntrl                  , //  9f  237 159
-  punct                  , //  a0  240 160  <20>    |  Non-breaking space
-  punct                  , //  a1  241 161  <20>    |  Inverted exclamation mark
-  punct                  , //  a2  242 162  <20>    |  Cent sign
-  punct                  , //  a3  243 163  <20>    |  Pound sign
-  punct                  , //  a4  244 164  <20>    |  Currency sign
-  punct                  , //  a5  245 165  <20>    |  Yen sign
-  punct                  , //  a6  246 166  <20>    |  Broken bar
-  punct                  , //  a7  247 167  <20>    |  Section sign
-  punct                  , //  a8  250 168  <20>    |  Diaeresis
-  punct                  , //  a9  251 169  <20>    |  Copyright sign
-  punct                  , //  aa  252 170  <20>    |  Feminine ordinal indicator
-  punct                  , //  ab  253 171  <20>    |  Left-pointing double angle
-  punct                  , //  ac  254 172  <20>    |  Not sign
-  punct                  , //  ad  255 173  <20>    |  Soft hyphen
-  punct                  , //  ae  256 174  <20>    |  Registered trademark sign
-  punct                  , //  af  257 175  <20>    |  Macron
-  punct                  , //  b0  260 176  <20>    |  Degree sign
-  punct                  , //  b1  261 177  <20>    |  Plus-minus sign
-  punct                  , //  b2  262 178  <20>    |  Superscript two
-  punct                  , //  b3  263 179  <20>    |  Superscript three
-  punct                  , //  b4  264 180  <20>    |  Acute accent
-  punct                  , //  b5  265 181  <20>    |  Micro sign
-  punct                  , //  b6  266 182  <20>    |  Pilcrow sign
-  punct                  , //  b7  267 183  <20>    |  Middle dot
-  punct                  , //  b8  270 184  <20>    |  Cedilla
-  punct                  , //  b9  271 185  <20>    |  Superscript one
-  punct                  , //  ba  272 186  <20>    |  Masculine ordinal indicator
-  punct                  , //  bb  273 187  <20>    |  Right-pointing double angle
-  punct                  , //  bc  274 188  <20>    |  Fraction one quarter
-  punct                  , //  bd  275 189  <20>    |  Fraction one half
-  punct                  , //  be  276 190  <20>    |  Fraction three quarters
-  punct                  , //  bf  277 191  <20>    |  Inverted question mark
-  upper                  , //  c0  300 192  <20>    |  "A" with grave accent
-  upper                  , //  c1  301 193  <20>    |  "A" with acute accent
-  upper                  , //  c2  302 194  <20>    |  "A" with circumflex
-  upper                  , //  c3  303 195  <20>    |  "A" with tilde
-  upper                  , //  c4  304 196  <20>    |  "A" with diaeresis
-  upper                  , //  c5  305 197  <20>    |  "A" with ring above
-  upper                  , //  c6  306 198  <20>    |  "AE" ligature
-  upper                  , //  c7  307 199  <20>    |  "C" with cedilla
-  upper                  , //  c8  310 200  <20>    |  "E" with grave accent
-  upper                  , //  c9  311 201  <20>    |  "E" with acute accent
-  upper                  , //  ca  312 202  <20>    |  "E" with circumflex
-  upper                  , //  cb  313 203  <20>    |  "E" with diaeresis
-  upper                  , //  cc  314 204  <20>    |  "I" with grave accent
-  upper                  , //  cd  315 205  <20>    |  "I" with acute accent
-  upper                  , //  ce  316 206  <20>    |  "I" with circumflex
-  upper                  , //  cf  317 207  <20>    |  "I" with diaeresis
-  upper                  , //  d0  320 208  <20>    |  Upper Icelandic D (Eth)
-  upper                  , //  d1  321 209  <20>    |  "N" with tilde
-  upper                  , //  d2  322 210  <20>    |  "O" with grave accent
-  upper                  , //  d3  323 211  <20>    |  "O" with acute accent
-  upper                  , //  d4  324 212  <20>    |  "O" with circumflex
-  upper                  , //  d5  325 213  <20>    |  "O" with tilde
-  upper                  , //  d6  326 214  <20>    |  "O" with diaeresis
-  punct                  , //  d7  327 215  <20>    |  Multiplication sign
-  upper                  , //  d8  330 216  <20>    |  "O" with stroke
-  upper                  , //  d9  331 217  <20>    |  "U" with grave accent
-  upper                  , //  da  332 218  <20>    |  "U" with acute accent
-  upper                  , //  db  333 219  <20>    |  "U" with circumflex
-  upper                  , //  dc  334 220  <20>    |  "U" with diaeresis
-  upper                  , //  dd  335 221  <20>    |  "Y" with acute accent
-  upper                  , //  de  336 222  <20>    |  Upper Icelandic T (Thorn)
-  lower                  , //  df  337 223  <20>    |  German sharp s
-  lower                  , //  e0  340 224  <20>    |  "a" with grave accent
-  lower                  , //  e1  341 225  <20>    |  "a" with acute accent
-  lower                  , //  e2  342 226  <20>    |  "a" with circumflex
-  lower                  , //  e3  343 227  <20>    |  "a" with tilde
-  lower                  , //  e4  344 228  <20>    |  "a" with diaeresis
-  lower                  , //  e5  345 229  <20>    |  "a" with ring above
-  lower                  , //  e6  346 230  <20>    |  Latin small ligature ae
-  lower                  , //  e7  347 231  <20>    |  "c" with cedilla
-  lower                  , //  e8  350 232  <20>    |  "e" with grave accent
-  lower                  , //  e9  351 233  <20>    |  "e" with acute accent
-  lower                  , //  ea  352 234  <20>    |  "e" with circumflex
-  lower                  , //  eb  353 235  <20>    |  "e" with diaeresis
-  lower                  , //  ec  354 236  <20>    |  "i" with grave accent
-  lower                  , //  ed  355 237  <20>    |  "i" with acute accent
-  lower                  , //  ee  356 238  <20>    |  "i" with circumflex
-  lower                  , //  ef  357 239  <20>    |  "i" with diaeresis
-  lower                  , //  f0  360 240  <20>    |  Lower Icelandic d (eth)
-  lower                  , //  f1  361 241  <20>    |  "n" with tilde
-  lower                  , //  f2  362 242  <20>    |  "o" with grave accent
-  lower                  , //  f3  363 243  <20>    |  "o" with acute accent
-  lower                  , //  f4  364 244  <20>    |  "o" with circumflex
-  lower                  , //  f5  365 245  <20>    |  "o" with tilde
-  lower                  , //  f6  366 246  <20>    |  "o" with diaeresis
-  punct                  , //  f7  367 247  <20>    |  Division sign
-  lower                  , //  f8  370 248  <20>    |  "o" with oblique bar
-  lower                  , //  f9  371 249  <20>    |  "u" with grave accent
-  lower                  , //  fa  372 250  <20>    |  "u" with acute accent
-  lower                  , //  fb  373 251  <20>    |  "u" with circumflex
-  lower                  , //  fc  374 252  <20>    |  "u" with diaeresis
-  lower                  , //  fd  375 253  <20>    |  "y" with acute accent
-  lower                  , //  fe  376 254  <20>    |  Lower Icelandic t (thorn)
-  lower                    //  ff  377 255  <20>    |  "y" with diaeresis
-  };
-
-  return (flag_table[unsigned char(c)] & f) != 0;
-}
-
-
-//============================================================================
-char iso8859_1_regex_traits::translate(char c, bool icase) const
-{
-//----------------------------------------------------------------------------
-//      "ABCDEFGHIJKLMNOPQRSTUVWXYZ" -> "abcdefghijklmnopqrstuvwxyz"
-//  "<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>" -> "<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>"
-//----------------------------------------------------------------------------
-  static const char lower_case_map[UCHAR_MAX + 1] = {
-    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
-   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
-   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-   64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
-  112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
-   96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
-  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
-  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
-  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
-  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
-  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
-  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
-  240,241,242,243,244,245,246,215,248,249,250,251,252,253,254,223,
-  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
-  240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
-};
-
-  return icase ? lower_case_map[unsigned char(c)] : c;
-}
-
--- a/example/iso8859_1_regex_traits/iso8859_1_regex_traits.hpp
+++ b/example/iso8859_1_regex_traits/iso8859_1_regex_traits.hpp
@ -1,24 +0,0 @@
-/*
- * (C) Copyright Christain Engstrom 2001.
- * Distributed under the Boost Software License, Version 1.0. (See
- * accompanying file LICENSE_1_0.txt or copy at
- * http://www.boost.org/LICENSE_1_0.txt)
- */
-
-#ifndef BOOST_ISO8859_1_REGEX_TRAITS_HPP
-#define BOOST_ISO8859_1_REGEX_TRAITS_HPP
-  
-class iso8859_1_regex_traits : public boost::c_regex_traits<char> {
-private:
-  typedef boost::c_regex_traits<char> base;
-public:
-  iso8859_1_regex_traits() {}
-  ~iso8859_1_regex_traits() {}
-
-  bool is_class(char c, boost::uint_fast32_t f) const;
-  char translate(char c, bool icase) const;
-};
-
-#endif
-
-
--- a/example/iso8859_1_regex_traits/readme.txt
+++ b/example/iso8859_1_regex_traits/readme.txt
@ -1,49 +0,0 @@
-I have written a class iso8859_1_regex_traits, which I enclose.  Any
-comments will be very welcome.
-
-The class is derived from boost::c_regex_traits<char>, and just redefines
-the member function is_class so that it will handle the character classes
-for all the 8 bit characters in the ISO8859-1 (Latin 1) alphabet.  The
-enclosed file iso8859_1.txt lists the character classes for all the 256
-characters.
-
-For the 7 bit ASCII characters up to octal 177, the character classes are
-identical to the ones returned by boost::c_regex_traits<char> and
-boost::w32_regex_traits<char>.  (I have only tried this under Windows.)
-
-For the characters above octal 177, iso8859_1_regex_traits differs from
-boost::w32_regex_traits<char> in the following ways:
-
-1) The characters between octal 200 and octal 237 all belong to the
-character class cntrl only, whereas most of them belong to print graph punct
-in Windows.  This reflects a true difference between the ISO8859-1 and
-Windows character sets, so it should be pretty uncontroversial.
-
-2) The superscript digits <20><><EFBFBD> (octal 271, 262 and 263) belong to print graph
-punct only in iso8859_1_regex_traits.  In w32_regex_traits these characters
-are defined to belong to digit as well, which automatically makes them
-members of alnum and word too.  I consider this to be a mistake made by
-those who defined the Windows locale, as I can see few, if any, uses for a
-character class defined in this manner, and the definition precludes the use
-of the digit character class for many normal and useful purposes, such as
-checking if a string contains an integer number.  (Please note that this
-criticism is directed towards Microsoft itself rather than towards the regex
-package, since this definition is a part of Visual C++.)
-
-3) The non-breaking space character (octal 240) belongs to print graph punct
-in iso8859_1_regex_traits.  In w32_regex_traits it is instead defined to
-belong print blank space, i.e.: it is defined in exactly the same way as the
-ordinary octal 40 space character.  Again I consider this to be a mistake in
-the Windows locale, that renders the space character class fairly useless
-for any applications where non-breaking spaces might occur.  After all, the
-whole point in having both ordinary and non-breaking spaces in the character
-set is that programs should be able to treat them differently, but with the
-Windows locale, this is not possible.
-
-As stated above I look forward to any comments, both relating to the
-implementation as such and to the choices I have made in the definition of
-the character classes.
-
-Christian Engstr<74>m
-
-
--- a/example/jgrep/jgrep.cpp
+++ b/example/jgrep/jgrep.cpp
@ -53,6 +53,8 @@ public:
   ogrep_predicate(unsigned int& i, const char* p, iterator start, iterator end) : lines(i), filename(p), last_line(-1), end_of_storage(end), last_line_start(start) {}
   ogrep_predicate(const ogrep_predicate& o) : lines(o.lines), filename(o.filename), last_line(o.last_line), end_of_storage(o.end_of_storage), last_line_start(o.last_line_start) {}
   bool operator () (const boost::match_results<iterator, Allocator>& i);
+private:
+   void operator=(const ogrep_predicate&);
 };

 // ideally we'd ignor the allocator type and use a template member function
--- a/example/jgrep/jgrep.h
+++ b/example/jgrep/jgrep.h
@ -19,15 +19,7 @@

 #include <boost/regex.hpp>

-// case sensitive reg_expression determines our allocator type:
-typedef boost::reg_expression<char> re_type;
-typedef re_type::allocator_type allocator_type;
-
-// now declare static (global) data, including an allocator
-// instance which we'll pass to all instances that require an allocator.
-
-extern allocator_type a;
-
+typedef boost::basic_regex<char> re_type;
 extern re_type e;

 // flags for output:
--- a/example/jgrep/main.cpp
+++ b/example/jgrep/main.cpp
@ -15,40 +15,26 @@
  */


-#include <stdio.h>
-#include <stdlib.h>
-#include <boost/regex.hpp>
-#ifdef JM_OLD_IOSTREAM
-#include <iostream.h>
-#else
+#include <cstdio>
+#include <cstdlib>
 #include <iostream>
-using std::cout;
-using std::cin;
-using std::cerr;
-using std::endl;
-#endif
-#ifdef __BORLANDC__
-#  pragma hrdstop
-#endif
+#include <algorithm>

-#ifdef BOOST_REGEX_V3
-#include <boost/regex/v3/fileiter.hpp>
-#else
+#include <boost/regex.hpp>
 #include <boost/regex/v4/fileiter.hpp>
-#endif
 #include "jgrep.h"
 #ifndef BOOST_REGEX_NO_FILEITER

-#ifndef JM_ALGO_INCLUDED
-// HP and SGI STL's use <algo.h> instead
-// this will have been pulled in by <jm_cfg.h>
-// for std::distance
-#include <algorithm>
+#ifdef BOOST_NO_STDC_NAMESPACE
+namespace std{
+   using ::strcpy;
+   using ::strcat;
+   using ::sprintf;
+}
 #endif

-allocator_type a;

-re_type e(a);
+re_type e;
 //rei_type ei(a);

 // flags for output:
@ -65,7 +51,7 @@ bool verbose = false;

 void usage()
 {
-   cout <<
+   std::cout <<
 "jgrep version 0.95\n"
 "usage: jgrep [-options] expression file [files...]\n"
 "\n"
@ -84,7 +70,7 @@ void usage()
 "expression: a regular expression, or a literal string if -r- is specified\n"
 "\n"
 "files:  one or more files to search, the names can contain the wildcard\n"
-"        characters ? and *\n" << endl;
+"        characters ? and *\n" << std::endl;

 }

@ -123,9 +109,9 @@ void parse_switch(const char* flag)
                  verbose = false;
                  break;
               default:
-                  cout << "Undefined option -";
-                  cout.put(*flag);
-                  cout << endl;
+                  std::cout << "Undefined option -";
+                  std::cout.put(*flag);
+                  std::cout << std::endl;
            }
            // turn off prev character:
            break;
@ -159,9 +145,9 @@ void parse_switch(const char* flag)
         case '+':
            break;
         default:
-            cout << "Undefined option -";
-            cout.put(*flag);
-            cout << endl;
+            std::cout << "Undefined option -";
+            std::cout.put(*flag);
+            std::cout << std::endl;
      }
      ++flag;
   }
@ -172,7 +158,6 @@ using namespace boost;
 void HandleFile(const char* wild)
 {
   using namespace boost;
-   jm_trace("Handling file " << wild);
   file_iterator end;
   file_iterator start(wild);

@ -193,14 +178,12 @@ void HandleFile(const char* wild)
         std::strcat(buf, directory_iterator::separator());
         std::strcat(buf, "*");
      }
-      jm_trace("Enumerating directories: " << buf);
      directory_iterator dstart(buf);
      directory_iterator dend;

      // now get the file mask bit of "wild":
      const char* ptr = wild + rootlen;
      if(*ptr) ++ptr;
-      jm_trace("File mask part is: " << ptr);

      while(dstart != dend)
      {
@ -217,7 +200,6 @@ int done = 0;
 void HandleArg(const char* arg)
 {
   using namespace boost;
-   jm_trace("Handling argument: " << arg);
   if(*arg == '-')
   {
      parse_switch(arg);
@ -286,7 +268,7 @@ int main(int argc, char * argv[])

 int main(int argc, char * argv[])
 {
-   std::cout <<
+   std::std::cout <<
   "\n<note>\n"
   "This functionality is not available on with this compiler on this platform.\n"
   "</note>\n";
--- a/example/snippets/captures_example.cpp
+++ b/example/snippets/captures_example.cpp
@ -1,14 +1,21 @@
 /*
 *
- * Copyright (c) 2004
+ * Copyright (c) 2003-2004
 * Dr John Maddock
 *
- * Use, modification and distribution are subject to the
- * Boost Software License, Version 1.0. (See accompanying file
+ * Use, modification and distribution are subject to the 
+ * Boost Software License, Version 1.0. (See accompanying file 
 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 *
 */

+ /*
+  *   LOCATION:    see http://www.boost.org for most recent version.
+  *   FILE         captures_example.cpp
+  *   VERSION      see <boost/version.hpp>
+  *   DESCRIPTION: Demonstrate the behaviour of captures.
+  */
+
 #include <boost/regex.hpp>
 #include <iostream>

--- a/example/snippets/icu_example.cpp
+++ b/example/snippets/icu_example.cpp
@ -0,0 +1,182 @@
+/*
+ *
+ * Copyright (c) 2004
+ * Dr John Maddock
+ *
+ * Use, modification and distribution are subject to the 
+ * Boost Software License, Version 1.0. (See accompanying file 
+ * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+ *
+ */
+
+ /*
+  *   LOCATION:    see http://www.boost.org for most recent version.
+  *   FILE         mfc_example.cpp
+  *   VERSION      see <boost/version.hpp>
+  *   DESCRIPTION: examples of using Boost.Regex with MFC and ATL string types.
+  */
+
+#include <boost/regex/config.hpp>
+
+#ifdef BOOST_HAS_ICU
+
+#include <boost/regex/icu.hpp>
+#include <iostream>
+#include <assert.h>
+
+//
+// Find out if *password* meets our password requirements,
+// as defined by the regular expression *requirements*.
+//
+bool is_valid_password(const UnicodeString& password, const UnicodeString& requirements)
+{
+   return boost::u32regex_match(password, boost::make_u32regex(requirements));
+}
+
+//
+// Extract filename part of a path from a UTF-8 encoded std::string and return the result
+// as another std::string:
+//
+std::string get_filename(const std::string& path)
+{
+   boost::u32regex r = boost::make_u32regex("(?:\\A|.*\\\\)([^\\\\]+)");
+   boost::smatch what;
+   if(boost::u32regex_match(path, what, r))
+   {
+      // extract $1 as a CString:
+      return what.str(1);
+   }
+   else
+   {
+      throw std::runtime_error("Invalid pathname");
+   }
+}
+
+UnicodeString extract_greek(const UnicodeString& text)
+{
+   // searches through some UTF-16 encoded text for a block encoded in Greek,
+   // this expression is imperfect, but the best we can do for now - searching
+   // for specific scripts is actually pretty hard to do right.
+   boost::u32regex r = boost::make_u32regex(L"[\\x{370}-\\x{3FF}](?:[^[:L*:]]|[\\x{370}-\\x{3FF}])*");
+   boost::u16match what;
+   if(boost::u32regex_search(text, what, r))
+   {
+      // extract $0 as a CString:
+      return UnicodeString(what[0].first, what.length(0));
+   }
+   else
+   {
+      throw std::runtime_error("No Greek found!");
+   }
+}
+
+void enumerate_currencies(const std::string& text)
+{
+   // enumerate and print all the currency symbols, along
+   // with any associated numeric values:
+   const char* re = 
+      "([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?"
+      "([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?"
+      "(?(1)"
+         "|(?(2)"
+            "[[:Cf:][:Cc:][:Z*:]]*"
+         ")"
+         "[[:Sc:]]"
+      ")";
+   boost::u32regex r = boost::make_u32regex(re);
+   boost::u32regex_iterator<std::string::const_iterator> i(boost::make_u32regex_iterator(text, r)), j;
+   while(i != j)
+   {
+      std::cout << (*i)[0] << std::endl;
+      ++i;
+   }
+}
+
+void enumerate_currencies2(const std::string& text)
+{
+   // enumerate and print all the currency symbols, along
+   // with any associated numeric values:
+   const char* re = 
+      "([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?"
+      "([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?"
+      "(?(1)"
+         "|(?(2)"
+            "[[:Cf:][:Cc:][:Z*:]]*"
+         ")"
+         "[[:Sc:]]"
+      ")";
+   boost::u32regex r = boost::make_u32regex(re);
+   boost::u32regex_token_iterator<std::string::const_iterator> 
+      i(boost::make_u32regex_token_iterator(text, r, 1)), j;
+   while(i != j)
+   {
+      std::cout << *i << std::endl;
+      ++i;
+   }
+}
+
+
+//
+// Take a credit card number as a string of digits, 
+// and reformat it as a human readable string with "-"
+// separating each group of four digit;, 
+// note that we're mixing a UTF-32 regex, with a UTF-16
+// string and a UTF-8 format specifier, and it still all 
+// just works:
+//
+const boost::u32regex e = boost::make_u32regex("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z");
+const char* human_format = "$1-$2-$3-$4";
+
+UnicodeString human_readable_card_number(const UnicodeString& s)
+{
+   return boost::u32regex_replace(s, e, human_format);
+}
+
+
+int main()
+{
+   // password checks using u32regex_match:
+   UnicodeString pwd = "abcDEF---";
+   UnicodeString pwd_check = "(?=.*[[:lower:]])(?=.*[[:upper:]])(?=.*[[:punct:]]).{6,}";
+   bool b = is_valid_password(pwd, pwd_check);
+   assert(b);
+   pwd = "abcD-";
+   b = is_valid_password(pwd, pwd_check);
+   assert(!b);
+   // filename extraction with u32regex_match:
+   std::string file = "abc.hpp";
+   file = get_filename(file);
+   assert(file == "abc.hpp");
+   file = "c:\\a\\b\\c\\d.h";
+   file = get_filename(file);
+   assert(file == "d.h");
+
+   // Greek text extraction with u32regex_search:
+   UnicodeString text = L"Some where in \x0391\x039D\x0395\x0398\x0391 2004";
+   UnicodeString greek = extract_greek(text);
+   assert(greek == L"\x0391\x039D\x0395\x0398\x0391 2004");
+
+   // extract currency symbols with associated value, use iterator interface:
+   std::string text2 = " $100.23 or \xC2\xA3""198.12 "; // \xC2\xA3 is the <20> sign encoded in UTF-8
+   enumerate_currencies(text2);
+   enumerate_currencies2(text2);
+
+   UnicodeString credit_card_number = "1234567887654321";
+   credit_card_number = human_readable_card_number(credit_card_number);
+   assert(credit_card_number == "1234-5678-8765-4321");
+   return 0;
+}
+
+#else
+
+#include <iostream>
+
+int main()
+{
+   std::cout << "<NOTE>ICU support not enabled, feature unavailable</NOTE>";
+   return 0;
+}
+
+
+#endif
+
--- a/example/snippets/mfc_example.cpp
+++ b/example/snippets/mfc_example.cpp
@ -0,0 +1,162 @@
+/*
+ *
+ * Copyright (c) 2004
+ * Dr John Maddock
+ *
+ * Use, modification and distribution are subject to the 
+ * Boost Software License, Version 1.0. (See accompanying file 
+ * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+ *
+ */
+
+ /*
+  *   LOCATION:    see http://www.boost.org for most recent version.
+  *   FILE         mfc_example.cpp
+  *   VERSION      see <boost/version.hpp>
+  *   DESCRIPTION: examples of using Boost.Regex with MFC and ATL string types.
+  */
+
+#ifdef TEST_MFC
+
+#include <boost/regex/mfc.hpp>
+#include <cstringt.h>
+#include <atlstr.h>
+#include <assert.h>
+#include <tchar.h>
+#include <iostream>
+
+#ifdef _UNICODE
+#define cout wcout
+#endif
+
+//
+// Find out if *password* meets our password requirements,
+// as defined by the regular expression *requirements*.
+//
+bool is_valid_password(const CString& password, const CString& requirements)
+{
+   return boost::regex_match(password, boost::make_regex(requirements));
+}
+
+//
+// Extract filename part of a path from a CString and return the result
+// as another CString:
+//
+CString get_filename(const CString& path)
+{
+   boost::tregex r(__T("(?:\\A|.*\\\\)([^\\\\]+)"));
+   boost::tmatch what;
+   if(boost::regex_match(path, what, r))
+   {
+      // extract $1 as a CString:
+      return CString(what[1].first, what.length(1));
+   }
+   else
+   {
+      throw std::runtime_error("Invalid pathname");
+   }
+}
+
+CString extract_postcode(const CString& address)
+{
+   // searches throw address for a UK postcode and returns the result,
+   // the expression used is by Phil A. on www.regxlib.com:
+   boost::tregex r(__T("^(([A-Z]{1,2}[0-9]{1,2})|([A-Z]{1,2}[0-9][A-Z]))\\s?([0-9][A-Z]{2})$"));
+   boost::tmatch what;
+   if(boost::regex_search(address, what, r))
+   {
+      // extract $0 as a CString:
+      return CString(what[0].first, what.length());
+   }
+   else
+   {
+      throw std::runtime_error("No postcode found");
+   }
+}
+
+void enumerate_links(const CString& html)
+{
+   // enumerate and print all the <a> links in some HTML text,
+   // the expression used is by Andew Lee on www.regxlib.com:
+   boost::tregex r(__T("href=[\"\']((http:\\/\\/|\\.\\/|\\/)?\\w+(\\.\\w+)*(\\/\\w+(\\.\\w+)?)*(\\/|\\?\\w*=\\w*(&\\w*=\\w*)*)?)[\"\']"));
+   boost::tregex_iterator i(boost::make_regex_iterator(html, r)), j;
+   while(i != j)
+   {
+      std::cout << (*i)[1] << std::endl;
+      ++i;
+   }
+}
+
+void enumerate_links2(const CString& html)
+{
+   // enumerate and print all the <a> links in some HTML text,
+   // the expression used is by Andew Lee on www.regxlib.com:
+   boost::tregex r(__T("href=[\"\']((http:\\/\\/|\\.\\/|\\/)?\\w+(\\.\\w+)*(\\/\\w+(\\.\\w+)?)*(\\/|\\?\\w*=\\w*(&\\w*=\\w*)*)?)[\"\']"));
+   boost::tregex_token_iterator i(boost::make_regex_token_iterator(html, r, 1)), j;
+   while(i != j)
+   {
+      std::cout << *i << std::endl;
+      ++i;
+   }
+}
+
+//
+// Take a credit card number as a string of digits, 
+// and reformat it as a human readable string with "-"
+// separating each group of four digits:
+//
+const boost::tregex e(__T("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z"));
+const CString human_format = __T("$1-$2-$3-$4");
+
+CString human_readable_card_number(const CString& s)
+{
+   return boost::regex_replace(s, e, human_format);
+}
+
+
+int main()
+{
+   // password checks using regex_match:
+   CString pwd = "abcDEF---";
+   CString pwd_check = "(?=.*[[:lower:]])(?=.*[[:upper:]])(?=.*[[:punct:]]).{6,}";
+   bool b = is_valid_password(pwd, pwd_check);
+   assert(b);
+   pwd = "abcD-";
+   b = is_valid_password(pwd, pwd_check);
+   assert(!b);
+
+   // filename extraction with regex_match:
+   CString file = "abc.hpp";
+   file = get_filename(file);
+   assert(file == "abc.hpp");
+   file = "c:\\a\\b\\c\\d.h";
+   file = get_filename(file);
+   assert(file == "d.h");
+
+   // postcode extraction with regex_search:
+   CString address = "Joe Bloke, 001 Somestreet, Somewhere,\nPL2 8AB";
+   CString postcode = extract_postcode(address);
+   assert(postcode = "PL2 8NV");
+
+   // html link extraction with regex_iterator:
+   CString text = "<dt><a href=\"syntax_perl.html\">Perl Regular Expressions</a></dt><dt><a href=\"syntax_extended.html\">POSIX-Extended Regular Expressions</a></dt><dt><a href=\"syntax_basic.html\">POSIX-Basic Regular Expressions</a></dt>";
+   enumerate_links(text);
+   enumerate_links2(text);
+
+   CString credit_card_number = "1234567887654321";
+   credit_card_number = human_readable_card_number(credit_card_number);
+   assert(credit_card_number == "1234-5678-8765-4321");
+   return 0;
+}
+
+#else
+
+#include <iostream>
+
+int main()
+{
+   std::cout << "<NOTE>MFC support not enabled, feature unavailable</NOTE>";
+   return 0;
+}
+
+#endif
--- a/example/timer/regex_timer.cpp
+++ b/example/timer/regex_timer.cpp
@ -33,9 +33,14 @@ using std::getline;

 #include <boost/config.hpp>
 #include <boost/regex.hpp>
+#include <boost/cregex.hpp>
 #include <boost/timer.hpp> 
 #include <boost/smart_ptr.hpp>

+#if defined(_WIN32) && defined(BOOST_REGEX_USE_WIN32_LOCALE)
+#include <windows.h>
+#endif
+
 #if (defined(_MSC_VER) && (_MSC_VER <= 1300)) || defined(__sgi)
 // maybe no Koenig lookup, use using declaration instead:
 using namespace boost;
@ -367,7 +372,9 @@ int main(int argc, char**argv)
   return 0;
 }

-
+#if defined(_WIN32) && defined(BOOST_REGEX_USE_WIN32_LOCALE)
+#pragma message(lib, "user32.lib")
+#endif