Updated Unicode compatibility, and finished off the docs.

[SVN r26681]
2025-07-29 12:07:28 +02:00 · 2005-01-12 12:44:02 +00:00
parent f996efb1e1
commit 16a494ca8f
18 changed files with 707 additions and 28 deletions
--- a/doc/Attic/collating_names.html
+++ b/doc/Attic/collating_names.html
@ -24,10 +24,17 @@
      </P>
      <HR>
      <p></p>
-      <P>
-         The following are treated as valid digraphs when used as a collating name:</P>
+      <H3>Contents</H3>
+      <dl class="index">
+         <dt><A href="#digraphs">Digraphs</A></dt>
+         <dt><A href="#posix">POSIX Symbolic Names</A></dt>
+         <dt><A href="#unicode">Unicode Symbolic Names</A></dt>
+      </dl>
+      <H3><A name="digraphs"></A>Digraphs</H3>
+      <P>The following are treated as valid digraphs when used as a collating name:</P>
      <P>"ae", "Ae", "AE", "ch", "Ch", "CH", "ll", "Ll", "LL", "ss", "Ss", "SS", "nj", 
         "Nj", "NJ", "dz", "Dz", "DZ", "lj", "Lj", "LJ".</P>
+      <H3><A name="posix"></A>POSIX Symbolic Names</H3>
      <P>The following symbolic names are recognised as valid collating element names, 
         in addition to any single character:</P>
      <P>
@ -342,15 +349,18 @@
            </TR>
         </TABLE>
      </P>
+      <P>
+         <H3><A name="unicode"></A>Named Unicode Characters</H3>
+      <P>When using <A href="icu_strings.html">Unicode aware regular expressions</A> (with 
+         the <EM>u32regex </EM>type), all the normal symbolic names for Unicode 
+         characters (those given in Unidata.txt) are recognised.</P>
      <P>
         <HR>
+      </P>
      <P></P>
-      <p>Revised 
-         <!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan --> 
-         24 Oct 2003 
-         <!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
-      <p><i><EFBFBD> Copyright John Maddock&nbsp;1998- 
-            <!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->  2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
+      <p>Revised 12 Jan 2005 
+         <!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan --></p>
+      <p><i><EFBFBD> Copyright John Maddock&nbsp;2004-2005</i></p>
      <P><I>Use, modification and distribution are subject to the Boost Software License, 
            Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
            or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
--- a/doc/Attic/history.html
+++ b/doc/Attic/history.html
@ -40,7 +40,10 @@
         <LI>
            Added <A href="mfc_strings.html">MFC/ATL string wrappers</A>.
         <LI>
-            Added <A href="unicode.html">Unicode support; based on ICU</A>.</LI></UL>
+            Added <A href="unicode.html">Unicode support; based on ICU</A>.
+         <LI>
+            Changed newline support to recognise \f as a line separator (all character 
+            types), and \x85 as a line separator for wide characters / Unicode only.</LI></UL>
      <P>Boost 1.32.1.</P>
      <UL>
         <LI>
--- a/doc/Attic/standards.html
+++ b/doc/Attic/standards.html
@ -61,6 +61,166 @@
         string sort keys produced by the system; if you need this, and the default 
         implementation doesn't work on your platform, then you will need to supply a 
         custom traits class.</P>
+      <H3>Unicode</H3>
+      <P>The following comments refer to&nbsp;<A href="http://www.unicode.org/reports/tr18/">Unicode 
+            Technical
+            <SPAN>Standard 
+</SPAN>#18: Unicode Regular Expressions</A>&nbsp;version 9.</P>
+      <P>
+         <TABLE id="Table3" cellSpacing="1" cellPadding="1" width="100%" border="0">
+            <TR>
+               <TD>#</TD>
+               <TD>Feature</TD>
+               <TD>Support</TD>
+            </TR>
+            <TR>
+               <TD>1.1</TD>
+               <TD>Hex Notation</TD>
+               <TD>Yes: use \x{DDDD} to refer to code point UDDDD.</TD>
+            </TR>
+            <TR>
+               <TD>1.2</TD>
+               <TD>Character Properties</TD>
+               <TD>All the names listed under the&nbsp;<A href="http://www.unicode.org/reports/tr18/#Categories">General 
+                     Category Property</A> are supported.&nbsp; Script names and Other Names are 
+                  not currently supported.</TD>
+            </TR>
+            <TR>
+               <TD>1.3</TD>
+               <TD><A name="Subtraction_and_Intersection">Subtraction</A> and Intersection</TD>
+               <TD>
+                  <P>Indirectly support by forward-lookahead:
+                  </P>
+                  <P>(?=[[:X:]])[[:Y:]]</P>
+                  <P>Gives the intersection of character properties X and Y.</P>
+                  <P>(?![[:X:]])[[:Y:]]</P>
+                  <P>Gives everything in Y that is not in X (subtraction).</P>
+               </TD>
+            </TR>
+            <TR>
+               <TD>1.4</TD>
+               <TD><A name="Simple_Word_Boundaries">Simple Word Boundaries</A></TD>
+               <TD>Conforming: non-spacing marks are included in the set of word characters.</TD>
+            </TR>
+            <TR>
+               <TD>1.5</TD>
+               <TD>Caseless Matching</TD>
+               <TD>Supported, note that at this level, case transformations are 1:1, many to many 
+                  case folding operations are not supported (for example&nbsp;"<22>" to "SS").</TD>
+            </TR>
+            <TR>
+               <TD>1.6</TD>
+               <TD>Line Boundaries</TD>
+               <TD>Supported, except that "." matches only one character of "\r\n". Other than 
+                  that word boundaries match correctly; including not matching in the middle of a 
+                  "\r\n" sequence.</TD>
+            </TR>
+            <TR>
+               <TD>1.7</TD>
+               <TD>Code Points</TD>
+               <TD>Supported: provided you use the <A href="icu_string.html">u32* algorithms</A>, 
+                  then UTF-8, UTF-16 and UTF-32 are all treated as sequences of 32-bit code 
+                  points.</TD>
+            </TR>
+            <TR>
+               <TD>2.1</TD>
+               <TD>Canonical Equivalence</TD>
+               <TD>Not supported: it is up to the user of the library to convert all text into 
+                  the same canonical form as the regular expression.</TD>
+            </TR>
+            <TR>
+               <TD>2.2</TD>
+               <TD>Default Grapheme Clusters</TD>
+               <TD>Not supported.</TD>
+            </TR>
+            <TR>
+               <TD>2.3</TD>
+               <TD><!--StartFragment -->
+                  <P><A name="Default_Word_Boundaries">Default Word Boundaries</A></P>
+               </TD>
+               <TD>Not supported.</TD>
+            </TR>
+            <TR>
+               <TD>2.4</TD>
+               <TD><!--StartFragment -->
+                  <P><A name="Default_Loose_Matches">Default Loose Matches</A></P>
+               </TD>
+               <TD>Not Supported.</TD>
+            </TR>
+            <TR>
+               <TD>2.5</TD>
+               <TD>Name Properties</TD>
+               <TD>Supported: the expression "[[:name:]]" or \N{name} matches the named character 
+                  "name".</TD>
+            </TR>
+            <TR>
+               <TD>2.6</TD>
+               <TD>Wildcard properties</TD>
+               <TD>Not Supported.</TD>
+            </TR>
+            <TR>
+               <TD>3.1</TD>
+               <TD>Tailored Punctuation.</TD>
+               <TD>Not Supported.</TD>
+            </TR>
+            <TR>
+               <TD>3.2</TD>
+               <TD>Tailored Grapheme Clusters</TD>
+               <TD>Not Supported.</TD>
+            </TR>
+            <TR>
+               <TD>3.3</TD>
+               <TD>Tailored Word Boundaries.</TD>
+               <TD>Not Supported.</TD>
+            </TR>
+            <TR>
+               <TD>3.4</TD>
+               <TD>Tailored Loose Matches</TD>
+               <TD>Partial support: [[=c=]] matches characters with the same primary equivalence 
+                  class as "c".</TD>
+            </TR>
+            <TR>
+               <TD>3.5</TD>
+               <TD>Tailored Ranges</TD>
+               <TD>Supported: [a-b] matches any character that collates in the range a to b, when 
+                  the expression is constructed with the <A href="syntax_option_type.html">collate</A>
+                  flag set.</TD>
+            </TR>
+            <TR>
+               <TD>3.6</TD>
+               <TD>Context Matches</TD>
+               <TD>Not Supported.</TD>
+            </TR>
+            <TR>
+               <TD>3.7</TD>
+               <TD>Incremental Matches</TD>
+               <TD>Supported: pass the flag <A href="match_flag_type.html">match_partial</A> to 
+                  the regex algorithms.</TD>
+            </TR>
+            <TR>
+               <TD>3.8</TD>
+               <TD>Unicode Set Sharing</TD>
+               <TD>Not Supported.</TD>
+            </TR>
+            <TR>
+               <TD>3.9</TD>
+               <TD>Possible Match Sets</TD>
+               <TD>Not supported, however this information is used internally to optimise the 
+                  matching of regular expressions, and return quickly if no match is possible.</TD>
+            </TR>
+            <TR>
+               <TD>3.10</TD>
+               <TD>Folded Matching</TD>
+               <TD>Partial Support:&nbsp; It is possible to achieve a similar effect by using a 
+                  custom regular expression traits class.</TD>
+            </TR>
+            <TR>
+               <TD>3.11</TD>
+               <TD>Custom&nbsp;Submatch Evaluation</TD>
+               <TD>Not Supported.</TD>
+            </TR>
+         </TABLE>
+      </P>
      <HR>
      <p>Revised&nbsp; 
         <!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan --> 
@ -73,3 +233,4 @@
            or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
   </body>
 </html>
+
--- a/doc/Attic/syntax_extended.html
+++ b/doc/Attic/syntax_extended.html
@ -155,7 +155,7 @@ aaaa</PRE>
         <P>matches a NUL character.</P>
         <H5>Equivalence classes:</H5>
         <P>
-            An expression of the form[[=col=]], matches any character or collating element 
+            An expression of theform[[=col=]], matches any character or collating element 
            whose primary sort key is the same as that for collating element <EM>col</EM>, 
            as with colating elements the name <EM>col</EM> may be a <A href="collating_names.html">
               symbolic name</A>.&nbsp; A primary sort key is one that ignores case, 
@ -242,6 +242,12 @@ aaaa</PRE>
                  <TD>An octal escape sequence - matches the single character whose code point is 
                     0ddd.</TD>
               </TR>
+               <TR>
+                  <TD>\N{Name}</TD>
+                  <TD>Matches the single character which has the <A href="collating_names.html">symbolic 
+                        name</A> <EM>name.&nbsp; </EM>For example \N{newline} matches the single 
+                     character \n.</TD>
+               </TR>
            </TABLE>
         </P>
         <H5>"Single character" character&nbsp;classes:</H5>
@ -298,6 +304,41 @@ aaaa</PRE>
               </TR>
            </TABLE>
         </P>
+         <H5>
+            <H5>Character Properties</H5>
+         </H5>
+         <P dir="ltr">The character property names in the following table are all 
+            equivalent to the <A href="character_class_names.html">names used in character 
+               classes</A>.</P>
+         <H5>
+            <TABLE id="Table9" cellSpacing="1" cellPadding="1" width="100%" border="0">
+               <TR>
+                  <TD><STRONG>Form</STRONG></TD>
+                  <TD><STRONG>Description</STRONG></TD>
+                  <TD><STRONG>Equivalent character set form</STRONG></TD>
+               </TR>
+               <TR>
+                  <TD>\pX</TD>
+                  <TD>Matches any character that has the property X.</TD>
+                  <TD>[[:X:]]</TD>
+               </TR>
+               <TR>
+                  <TD>\p{Name}</TD>
+                  <TD>Matches any character that has the property <EM>Name</EM>.</TD>
+                  <TD>[[:Name:]]</TD>
+               </TR>
+               <TR>
+                  <TD>\PX</TD>
+                  <TD>Matches any character that does not have the property X.</TD>
+                  <TD>[^[:X:]]</TD>
+               </TR>
+               <TR>
+                  <TD>\P{Name}</TD>
+                  <TD>Matches any character that does not have the property <EM>Name</EM>.</TD>
+                  <TD>[^[:Name:]]</TD>
+               </TR>
+            </TABLE>
+         </H5>
         <H5>Word Boundaries</H5>
         <P>The following escape sequences match the boundaries of words:</P>
         <P>
--- a/doc/Attic/syntax_perl.html
+++ b/doc/Attic/syntax_perl.html
@ -169,7 +169,7 @@ aaaa</PRE>
         <P>matches a NUL character.</P>
         <H5>Equivalence classes:</H5>
         <P>
-            An expression of the form[[=col=]], matches any character or collating element 
+            An expression of theform[[=col=]], matches any character or collating element 
            whose primary sort key is the same as that for collating element <EM>col</EM>, 
            as with colating elements the name <EM>col</EM> may be a <A href="collating_names.html">
               symbolic name</A>.&nbsp; A primary sort key is one that ignores case, 
@ -250,6 +250,12 @@ aaaa</PRE>
                  <TD>An octal escape sequence - matches the single character whose code point is 
                     0ddd.</TD>
               </TR>
+               <TR>
+                  <TD>\N{name}</TD>
+                  <TD>Matches the single character which has the <A href="collating_names.html">symbolic 
+                        name</A> <EM>name.&nbsp; </EM>For example \N{newline} matches the single 
+                     character \n.</TD>
+               </TR>
            </TABLE>
         </P>
         <H5>"Single character" character&nbsp;classes:</H5>
@ -306,6 +312,38 @@ aaaa</PRE>
               </TR>
            </TABLE>
         </P>
+         <H5>Character Properties</H5>
+         <P>The character property names in the following table are all equivalent to the <A href="character_class_names.html">
+               names used in character classes</A>.</P>
+         <P>
+            <TABLE id="Table9" cellSpacing="1" cellPadding="1" width="100%" border="0">
+               <TR>
+                  <TD><STRONG>Form</STRONG></TD>
+                  <TD><STRONG>Description</STRONG></TD>
+                  <TD><STRONG>Equivalent character set form</STRONG></TD>
+               </TR>
+               <TR>
+                  <TD>\pX</TD>
+                  <TD>Matches any character that has the property X.</TD>
+                  <TD>[[:X:]]</TD>
+               </TR>
+               <TR>
+                  <TD>\p{Name}</TD>
+                  <TD>Matches any character that has the property <EM>Name</EM>.</TD>
+                  <TD>[[:Name:]]</TD>
+               </TR>
+               <TR>
+                  <TD>\PX</TD>
+                  <TD>Matches any character that does not have the property X.</TD>
+                  <TD>[^[:X:]]</TD>
+               </TR>
+               <TR>
+                  <TD>\P{Name}</TD>
+                  <TD>Matches any character that does not have the property <EM>Name</EM>.</TD>
+                  <TD>[^[:Name:]]</TD>
+               </TR>
+            </TABLE>
+         </P>
         <H5>Word Boundaries</H5>
         <P>The following escape sequences match the boundaries of words:</P>
         <P>
--- a/doc/collating_names.html
+++ b/doc/collating_names.html
@ -24,10 +24,17 @@
      </P>
      <HR>
      <p></p>
-      <P>
-         The following are treated as valid digraphs when used as a collating name:</P>
+      <H3>Contents</H3>
+      <dl class="index">
+         <dt><A href="#digraphs">Digraphs</A></dt>
+         <dt><A href="#posix">POSIX Symbolic Names</A></dt>
+         <dt><A href="#unicode">Unicode Symbolic Names</A></dt>
+      </dl>
+      <H3><A name="digraphs"></A>Digraphs</H3>
+      <P>The following are treated as valid digraphs when used as a collating name:</P>
      <P>"ae", "Ae", "AE", "ch", "Ch", "CH", "ll", "Ll", "LL", "ss", "Ss", "SS", "nj", 
         "Nj", "NJ", "dz", "Dz", "DZ", "lj", "Lj", "LJ".</P>
+      <H3><A name="posix"></A>POSIX Symbolic Names</H3>
      <P>The following symbolic names are recognised as valid collating element names, 
         in addition to any single character:</P>
      <P>
@ -342,15 +349,18 @@
            </TR>
         </TABLE>
      </P>
+      <P>
+         <H3><A name="unicode"></A>Named Unicode Characters</H3>
+      <P>When using <A href="icu_strings.html">Unicode aware regular expressions</A> (with 
+         the <EM>u32regex </EM>type), all the normal symbolic names for Unicode 
+         characters (those given in Unidata.txt) are recognised.</P>
      <P>
         <HR>
+      </P>
      <P></P>
-      <p>Revised 
-         <!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan --> 
-         24 Oct 2003 
-         <!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
-      <p><i><EFBFBD> Copyright John Maddock&nbsp;1998- 
-            <!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan -->  2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
+      <p>Revised 12 Jan 2005 
+         <!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan --></p>
+      <p><i><EFBFBD> Copyright John Maddock&nbsp;2004-2005</i></p>
      <P><I>Use, modification and distribution are subject to the Boost Software License, 
            Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
            or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
--- a/doc/history.html
+++ b/doc/history.html
@ -40,7 +40,10 @@
         <LI>
            Added <A href="mfc_strings.html">MFC/ATL string wrappers</A>.
         <LI>
-            Added <A href="unicode.html">Unicode support; based on ICU</A>.</LI></UL>
+            Added <A href="unicode.html">Unicode support; based on ICU</A>.
+         <LI>
+            Changed newline support to recognise \f as a line separator (all character 
+            types), and \x85 as a line separator for wide characters / Unicode only.</LI></UL>
      <P>Boost 1.32.1.</P>
      <UL>
         <LI>
--- a/doc/standards.html
+++ b/doc/standards.html
@ -61,6 +61,166 @@
         string sort keys produced by the system; if you need this, and the default 
         implementation doesn't work on your platform, then you will need to supply a 
         custom traits class.</P>
+      <H3>Unicode</H3>
+      <P>The following comments refer to&nbsp;<A href="http://www.unicode.org/reports/tr18/">Unicode 
+            Technical
+            <SPAN>Standard 
+</SPAN>#18: Unicode Regular Expressions</A>&nbsp;version 9.</P>
+      <P>
+         <TABLE id="Table3" cellSpacing="1" cellPadding="1" width="100%" border="0">
+            <TR>
+               <TD>#</TD>
+               <TD>Feature</TD>
+               <TD>Support</TD>
+            </TR>
+            <TR>
+               <TD>1.1</TD>
+               <TD>Hex Notation</TD>
+               <TD>Yes: use \x{DDDD} to refer to code point UDDDD.</TD>
+            </TR>
+            <TR>
+               <TD>1.2</TD>
+               <TD>Character Properties</TD>
+               <TD>All the names listed under the&nbsp;<A href="http://www.unicode.org/reports/tr18/#Categories">General 
+                     Category Property</A> are supported.&nbsp; Script names and Other Names are 
+                  not currently supported.</TD>
+            </TR>
+            <TR>
+               <TD>1.3</TD>
+               <TD><A name="Subtraction_and_Intersection">Subtraction</A> and Intersection</TD>
+               <TD>
+                  <P>Indirectly support by forward-lookahead:
+                  </P>
+                  <P>(?=[[:X:]])[[:Y:]]</P>
+                  <P>Gives the intersection of character properties X and Y.</P>
+                  <P>(?![[:X:]])[[:Y:]]</P>
+                  <P>Gives everything in Y that is not in X (subtraction).</P>
+               </TD>
+            </TR>
+            <TR>
+               <TD>1.4</TD>
+               <TD><A name="Simple_Word_Boundaries">Simple Word Boundaries</A></TD>
+               <TD>Conforming: non-spacing marks are included in the set of word characters.</TD>
+            </TR>
+            <TR>
+               <TD>1.5</TD>
+               <TD>Caseless Matching</TD>
+               <TD>Supported, note that at this level, case transformations are 1:1, many to many 
+                  case folding operations are not supported (for example&nbsp;"<22>" to "SS").</TD>
+            </TR>
+            <TR>
+               <TD>1.6</TD>
+               <TD>Line Boundaries</TD>
+               <TD>Supported, except that "." matches only one character of "\r\n". Other than 
+                  that word boundaries match correctly; including not matching in the middle of a 
+                  "\r\n" sequence.</TD>
+            </TR>
+            <TR>
+               <TD>1.7</TD>
+               <TD>Code Points</TD>
+               <TD>Supported: provided you use the <A href="icu_string.html">u32* algorithms</A>, 
+                  then UTF-8, UTF-16 and UTF-32 are all treated as sequences of 32-bit code 
+                  points.</TD>
+            </TR>
+            <TR>
+               <TD>2.1</TD>
+               <TD>Canonical Equivalence</TD>
+               <TD>Not supported: it is up to the user of the library to convert all text into 
+                  the same canonical form as the regular expression.</TD>
+            </TR>
+            <TR>
+               <TD>2.2</TD>
+               <TD>Default Grapheme Clusters</TD>
+               <TD>Not supported.</TD>
+            </TR>
+            <TR>
+               <TD>2.3</TD>
+               <TD><!--StartFragment -->
+                  <P><A name="Default_Word_Boundaries">Default Word Boundaries</A></P>
+               </TD>
+               <TD>Not supported.</TD>
+            </TR>
+            <TR>
+               <TD>2.4</TD>
+               <TD><!--StartFragment -->
+                  <P><A name="Default_Loose_Matches">Default Loose Matches</A></P>
+               </TD>
+               <TD>Not Supported.</TD>
+            </TR>
+            <TR>
+               <TD>2.5</TD>
+               <TD>Name Properties</TD>
+               <TD>Supported: the expression "[[:name:]]" or \N{name} matches the named character 
+                  "name".</TD>
+            </TR>
+            <TR>
+               <TD>2.6</TD>
+               <TD>Wildcard properties</TD>
+               <TD>Not Supported.</TD>
+            </TR>
+            <TR>
+               <TD>3.1</TD>
+               <TD>Tailored Punctuation.</TD>
+               <TD>Not Supported.</TD>
+            </TR>
+            <TR>
+               <TD>3.2</TD>
+               <TD>Tailored Grapheme Clusters</TD>
+               <TD>Not Supported.</TD>
+            </TR>
+            <TR>
+               <TD>3.3</TD>
+               <TD>Tailored Word Boundaries.</TD>
+               <TD>Not Supported.</TD>
+            </TR>
+            <TR>
+               <TD>3.4</TD>
+               <TD>Tailored Loose Matches</TD>
+               <TD>Partial support: [[=c=]] matches characters with the same primary equivalence 
+                  class as "c".</TD>
+            </TR>
+            <TR>
+               <TD>3.5</TD>
+               <TD>Tailored Ranges</TD>
+               <TD>Supported: [a-b] matches any character that collates in the range a to b, when 
+                  the expression is constructed with the <A href="syntax_option_type.html">collate</A>
+                  flag set.</TD>
+            </TR>
+            <TR>
+               <TD>3.6</TD>
+               <TD>Context Matches</TD>
+               <TD>Not Supported.</TD>
+            </TR>
+            <TR>
+               <TD>3.7</TD>
+               <TD>Incremental Matches</TD>
+               <TD>Supported: pass the flag <A href="match_flag_type.html">match_partial</A> to 
+                  the regex algorithms.</TD>
+            </TR>
+            <TR>
+               <TD>3.8</TD>
+               <TD>Unicode Set Sharing</TD>
+               <TD>Not Supported.</TD>
+            </TR>
+            <TR>
+               <TD>3.9</TD>
+               <TD>Possible Match Sets</TD>
+               <TD>Not supported, however this information is used internally to optimise the 
+                  matching of regular expressions, and return quickly if no match is possible.</TD>
+            </TR>
+            <TR>
+               <TD>3.10</TD>
+               <TD>Folded Matching</TD>
+               <TD>Partial Support:&nbsp; It is possible to achieve a similar effect by using a 
+                  custom regular expression traits class.</TD>
+            </TR>
+            <TR>
+               <TD>3.11</TD>
+               <TD>Custom&nbsp;Submatch Evaluation</TD>
+               <TD>Not Supported.</TD>
+            </TR>
+         </TABLE>
+      </P>
      <HR>
      <p>Revised&nbsp; 
         <!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan --> 
@ -73,3 +233,4 @@
            or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
   </body>
 </html>
+
--- a/doc/syntax_extended.html
+++ b/doc/syntax_extended.html
@ -155,7 +155,7 @@ aaaa</PRE>
         <P>matches a NUL character.</P>
         <H5>Equivalence classes:</H5>
         <P>
-            An expression of the form[[=col=]], matches any character or collating element 
+            An expression of theform[[=col=]], matches any character or collating element 
            whose primary sort key is the same as that for collating element <EM>col</EM>, 
            as with colating elements the name <EM>col</EM> may be a <A href="collating_names.html">
               symbolic name</A>.&nbsp; A primary sort key is one that ignores case, 
@ -242,6 +242,12 @@ aaaa</PRE>
                  <TD>An octal escape sequence - matches the single character whose code point is 
                     0ddd.</TD>
               </TR>
+               <TR>
+                  <TD>\N{Name}</TD>
+                  <TD>Matches the single character which has the <A href="collating_names.html">symbolic 
+                        name</A> <EM>name.&nbsp; </EM>For example \N{newline} matches the single 
+                     character \n.</TD>
+               </TR>
            </TABLE>
         </P>
         <H5>"Single character" character&nbsp;classes:</H5>
@ -298,6 +304,41 @@ aaaa</PRE>
               </TR>
            </TABLE>
         </P>
+         <H5>
+            <H5>Character Properties</H5>
+         </H5>
+         <P dir="ltr">The character property names in the following table are all 
+            equivalent to the <A href="character_class_names.html">names used in character 
+               classes</A>.</P>
+         <H5>
+            <TABLE id="Table9" cellSpacing="1" cellPadding="1" width="100%" border="0">
+               <TR>
+                  <TD><STRONG>Form</STRONG></TD>
+                  <TD><STRONG>Description</STRONG></TD>
+                  <TD><STRONG>Equivalent character set form</STRONG></TD>
+               </TR>
+               <TR>
+                  <TD>\pX</TD>
+                  <TD>Matches any character that has the property X.</TD>
+                  <TD>[[:X:]]</TD>
+               </TR>
+               <TR>
+                  <TD>\p{Name}</TD>
+                  <TD>Matches any character that has the property <EM>Name</EM>.</TD>
+                  <TD>[[:Name:]]</TD>
+               </TR>
+               <TR>
+                  <TD>\PX</TD>
+                  <TD>Matches any character that does not have the property X.</TD>
+                  <TD>[^[:X:]]</TD>
+               </TR>
+               <TR>
+                  <TD>\P{Name}</TD>
+                  <TD>Matches any character that does not have the property <EM>Name</EM>.</TD>
+                  <TD>[^[:Name:]]</TD>
+               </TR>
+            </TABLE>
+         </H5>
         <H5>Word Boundaries</H5>
         <P>The following escape sequences match the boundaries of words:</P>
         <P>
--- a/doc/syntax_perl.html
+++ b/doc/syntax_perl.html
@ -169,7 +169,7 @@ aaaa</PRE>
         <P>matches a NUL character.</P>
         <H5>Equivalence classes:</H5>
         <P>
-            An expression of the form[[=col=]], matches any character or collating element 
+            An expression of theform[[=col=]], matches any character or collating element 
            whose primary sort key is the same as that for collating element <EM>col</EM>, 
            as with colating elements the name <EM>col</EM> may be a <A href="collating_names.html">
               symbolic name</A>.&nbsp; A primary sort key is one that ignores case, 
@ -250,6 +250,12 @@ aaaa</PRE>
                  <TD>An octal escape sequence - matches the single character whose code point is 
                     0ddd.</TD>
               </TR>
+               <TR>
+                  <TD>\N{name}</TD>
+                  <TD>Matches the single character which has the <A href="collating_names.html">symbolic 
+                        name</A> <EM>name.&nbsp; </EM>For example \N{newline} matches the single 
+                     character \n.</TD>
+               </TR>
            </TABLE>
         </P>
         <H5>"Single character" character&nbsp;classes:</H5>
@ -306,6 +312,38 @@ aaaa</PRE>
               </TR>
            </TABLE>
         </P>
+         <H5>Character Properties</H5>
+         <P>The character property names in the following table are all equivalent to the <A href="character_class_names.html">
+               names used in character classes</A>.</P>
+         <P>
+            <TABLE id="Table9" cellSpacing="1" cellPadding="1" width="100%" border="0">
+               <TR>
+                  <TD><STRONG>Form</STRONG></TD>
+                  <TD><STRONG>Description</STRONG></TD>
+                  <TD><STRONG>Equivalent character set form</STRONG></TD>
+               </TR>
+               <TR>
+                  <TD>\pX</TD>
+                  <TD>Matches any character that has the property X.</TD>
+                  <TD>[[:X:]]</TD>
+               </TR>
+               <TR>
+                  <TD>\p{Name}</TD>
+                  <TD>Matches any character that has the property <EM>Name</EM>.</TD>
+                  <TD>[[:Name:]]</TD>
+               </TR>
+               <TR>
+                  <TD>\PX</TD>
+                  <TD>Matches any character that does not have the property X.</TD>
+                  <TD>[^[:X:]]</TD>
+               </TR>
+               <TR>
+                  <TD>\P{Name}</TD>
+                  <TD>Matches any character that does not have the property <EM>Name</EM>.</TD>
+                  <TD>[^[:Name:]]</TD>
+               </TR>
+            </TABLE>
+         </P>
         <H5>Word Boundaries</H5>
         <P>The following escape sequences match the boundaries of words:</P>
         <P>
--- a/include/boost/regex/v4/basic_regex_creator.hpp
+++ b/include/boost/regex/v4/basic_regex_creator.hpp
@ -842,6 +842,8 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
            l_map[0] |= mask_init;
            l_map['\n'] |= mask;
            l_map['\r'] |= mask;
+            l_map['\f'] |= mask;
+            l_map[0x85] |= mask;
         }
         // now figure out if we can match a NULL string at this point:
         if(pnull)
--- a/include/boost/regex/v4/basic_regex_parser.hpp
+++ b/include/boost/regex/v4/basic_regex_parser.hpp
@ -2012,7 +2012,7 @@ bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_st
      m_alt_jumps.pop_back();
      this->m_pdata->m_data.align();
      re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
-      BOOST_ASSERT(jmp->type = syntax_element_jump);
+      BOOST_ASSERT(jmp->type == syntax_element_jump);
      jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
   }
   return true;
--- a/include/boost/regex/v4/regex_traits_defaults.hpp
+++ b/include/boost/regex/v4/regex_traits_defaults.hpp
@ -105,12 +105,18 @@ inline bool is_combining<wchar_t>(wchar_t c)
 template <class charT>
 inline bool is_separator(charT c)
 {
-   return BOOST_REGEX_MAKE_BOOL((c == static_cast<charT>('\n')) || (c == static_cast<charT>('\r')) || (static_cast<int>(c) == 0x2028) || (static_cast<int>(c) == 0x2029));
+   return BOOST_REGEX_MAKE_BOOL(
+      (c == static_cast<charT>('\n')) 
+      || (c == static_cast<charT>('\r')) 
+      || (c == static_cast<charT>('\f')) 
+      || (static_cast<boost::uint16_t>(c) == 0x2028u) 
+      || (static_cast<boost::uint16_t>(c) == 0x2029u) 
+      || (static_cast<boost::uint16_t>(c) == 0x85u));
 }
 template <>
 inline bool is_separator<char>(char c)
 {
-   return BOOST_REGEX_MAKE_BOOL((c == '\n') || (c == '\r'));
+   return BOOST_REGEX_MAKE_BOOL((c == '\n') || (c == '\r') || (c == '\f'));
 }

 //
--- a/src/icu.cpp
+++ b/src/icu.cpp
@ -347,8 +347,8 @@ icu_regex_traits::char_class_type icu_regex_traits::lookup_classname(const char_
      U_GC_LU_MASK,
      mask_unicode,
      U_GC_LU_MASK,
-      char_class_type(U_GC_L_MASK | U_GC_ND_MASK) | mask_underscore, 
-      char_class_type(U_GC_L_MASK | U_GC_ND_MASK) | mask_underscore, 
+      char_class_type(U_GC_L_MASK | U_GC_ND_MASK | U_GC_MN_MASK) | mask_underscore, 
+      char_class_type(U_GC_L_MASK | U_GC_ND_MASK | U_GC_MN_MASK) | mask_underscore, 
      char_class_type(U_GC_ND_MASK) | mask_xdigit,
   };

--- a/test/regress/test_icu.cpp
+++ b/test/regress/test_icu.cpp
@ -86,6 +86,64 @@ void compare_result(const MR1& w1, const MR2& w2, boost::mpl::int_<1> const*)
   }
 }

+void test_icu_grep(const boost::u32regex& r, const std::vector< ::UChar32>& search_text)
+{
+   typedef std::vector< ::UChar32>::const_iterator const_iterator;
+   typedef boost::u32regex_iterator<const_iterator> test_iterator;
+   boost::regex_constants::match_flag_type opts = test_info<wchar_t>::match_options();
+   const int* answer_table = test_info<wchar_t>::answer_table();
+   test_iterator start(search_text.begin(), search_text.end(), r, opts), end;
+   test_iterator copy(start);
+   const_iterator last_end = search_text.begin();
+   while(start != end)
+   {
+      if(start != copy)
+      {
+         BOOST_REGEX_TEST_ERROR("Failed iterator != comparison.", wchar_t);
+      }
+      if(!(start == copy))
+      {
+         BOOST_REGEX_TEST_ERROR("Failed iterator == comparison.", wchar_t);
+      }
+      test_result(*start, search_text.begin(), answer_table);
+      // test $` and $' :
+      if(start->prefix().first != last_end)
+      {
+         BOOST_REGEX_TEST_ERROR("Incorrect position for start of $`", wchar_t);
+      }
+      if(start->prefix().second != (*start)[0].first)
+      {
+         BOOST_REGEX_TEST_ERROR("Incorrect position for end of $`", wchar_t);
+      }
+      if(start->prefix().matched != (start->prefix().first != start->prefix().second))
+      {
+         BOOST_REGEX_TEST_ERROR("Incorrect position for matched member of $`", wchar_t);
+      }
+      if(start->suffix().first != (*start)[0].second)
+      {
+         BOOST_REGEX_TEST_ERROR("Incorrect position for start of $'", wchar_t);
+      }
+      if(start->suffix().second != search_text.end())
+      {
+         BOOST_REGEX_TEST_ERROR("Incorrect position for end of $'", wchar_t);
+      }
+      if(start->suffix().matched != (start->suffix().first != start->suffix().second))
+      {
+         BOOST_REGEX_TEST_ERROR("Incorrect position for matched member of $'", wchar_t);
+      }
+      last_end = (*start)[0].second;
+      ++start;
+      ++copy;
+      // move on the answer table to next set of answers;
+      if(*answer_table != -2)
+         while(*answer_table++ != -2){}
+   }
+   if(answer_table[0] >= 0)
+   {
+      // we should have had a match but didn't:
+      BOOST_REGEX_TEST_ERROR("Expected match was not found.", wchar_t);
+   }
+}

 void test_icu(const wchar_t&, const test_regex_search_tag& )
 {
@ -204,6 +262,10 @@ void test_icu(const wchar_t&, const test_regex_search_tag& )
            }
         }
      }
+      //
+      // finally try a grep:
+      //
+      test_icu_grep(r, search_text);
   }
   catch(const boost::bad_expression& e)
   {
--- a/test/regress/test_sets.cpp
+++ b/test/regress/test_sets.cpp
@ -305,5 +305,65 @@ void test_sets2()
   TEST_INVALID_REGEX("\\N{}", perl);
   TEST_INVALID_REGEX("\\N{invalid-name}", perl);
   TEST_INVALID_REGEX("\\N{zero", perl);
+
+   // and repeat with POSIX-extended syntax:
+   TEST_REGEX_SEARCH("\\pl+", extended, "ABabcAB", match_default, make_array(2, 5, -2, -2));
+   TEST_REGEX_SEARCH("\\Pl+", extended, "abABCab", match_default, make_array(2, 5, -2, -2));
+   TEST_REGEX_SEARCH("\\pu+", extended, "abABCab", match_default, make_array(2, 5, -2, -2));
+   TEST_REGEX_SEARCH("\\Pu+", extended, "ABabcAB", match_default, make_array(2, 5, -2, -2));
+   TEST_REGEX_SEARCH("\\pd+", extended, "AB012AB", match_default, make_array(2, 5, -2, -2));
+   TEST_REGEX_SEARCH("\\PD+", extended, "01abc01", match_default, make_array(2, 5, -2, -2));
+   TEST_REGEX_SEARCH("\\ps+", extended, "AB   AB", match_default, make_array(2, 5, -2, -2));
+   TEST_REGEX_SEARCH("\\PS+", extended, "  abc  ", match_default, make_array(2, 5, -2, -2));
+
+   TEST_REGEX_SEARCH("\\p{alnum}+", extended, "-%@a0X_-", match_default, make_array(3, 6, -2, -2));
+   TEST_REGEX_SEARCH("\\p{alpha}+", extended, " -%@aX_0-", match_default, make_array(4, 6, -2, -2));
+   TEST_REGEX_SEARCH("\\p{blank}+", extended, "a  \tb", match_default, make_array(1, 4, -2, -2));
+   TEST_REGEX_SEARCH("\\p{cntrl}+", extended, " a\n\tb", match_default, make_array(2, 4, -2, -2));
+   TEST_REGEX_SEARCH("\\p{digit}+", extended, "a019b", match_default, make_array(1, 4, -2, -2));
+   TEST_REGEX_SEARCH("\\p{graph}+", extended, " a%b ", match_default, make_array(1, 4, -2, -2));
+   TEST_REGEX_SEARCH("\\p{lower}+", extended, "AabC", match_default, make_array(1, 3, -2, -2));
+   TEST_REGEX_SEARCH("\\p{print}+", extended, "AabC", match_default, make_array(0, 4, -2, -2));
+   TEST_REGEX_SEARCH("\\p{punct}+", extended, " %-&\t", match_default, make_array(1, 4, -2, -2));
+   TEST_REGEX_SEARCH("\\p{space}+", extended, "a \n\t\rb", match_default, make_array(1, 5, -2, -2));
+   TEST_REGEX_SEARCH("\\p{upper}+", extended, "aBCd", match_default, make_array(1, 3, -2, -2));
+   TEST_REGEX_SEARCH("\\p{xdigit}+", extended, "p0f3Cx", match_default, make_array(1, 5, -2, -2));
+   TEST_REGEX_SEARCH("\\P{alnum}+", extended, "-%@a", match_default, make_array(0, 3, -2, -2));
+   TEST_REGEX_SEARCH("\\P{alpha}+", extended, " -%@a", match_default, make_array(0, 4, -2, -2));
+   TEST_REGEX_SEARCH("\\P{blank}+", extended, "a  ", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\P{cntrl}+", extended, " a\n", match_default, make_array(0, 2, -2, -2));
+   TEST_REGEX_SEARCH("\\P{digit}+", extended, "a0", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\P{graph}+", extended, " a", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\P{lower}+", extended, "Aa", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\P{print}+", extended, "Absc", match_default, make_array(-2, -2));
+   TEST_REGEX_SEARCH("\\P{punct}+", extended, " %", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\P{space}+", extended, "a ", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\P{upper}+", extended, "aB", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\P{xdigit}+", extended, "pf", match_default, make_array(0, 1, -2, -2));
+
+   TEST_INVALID_REGEX("\\p{invalid class}", extended);
+   TEST_INVALID_REGEX("\\p{upper", extended);
+   TEST_INVALID_REGEX("\\p{", extended);
+   TEST_INVALID_REGEX("\\p", extended);
+   TEST_INVALID_REGEX("\\P{invalid class}", extended);
+   TEST_INVALID_REGEX("\\P{upper", extended);
+   TEST_INVALID_REGEX("\\P{", extended);
+   TEST_INVALID_REGEX("\\P", extended);
+
+   // try named characters:
+   TEST_REGEX_SEARCH("\\N{zero}", extended, "0", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\N{one}", extended, "1", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\N{two}", extended, "2", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\N{three}", extended, "3", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\N{a}", extended, "bac", match_default, make_array(1, 2, -2, -2));
+   TEST_REGEX_SEARCH("\\N{\xf0}", extended, "b\xf0x", match_default, make_array(1, 2, -2, -2));
+   TEST_REGEX_SEARCH("\\N{right-curly-bracket}", extended, "}", match_default, make_array(0, 1, -2, -2));
+   TEST_REGEX_SEARCH("\\N{NUL}", extended, "\0", match_default, make_array(0, 1, -2, -2));
+
+   TEST_INVALID_REGEX("\\N", extended);
+   TEST_INVALID_REGEX("\\N{", extended);
+   TEST_INVALID_REGEX("\\N{}", extended);
+   TEST_INVALID_REGEX("\\N{invalid-name}", extended);
+   TEST_INVALID_REGEX("\\N{zero", extended);
 }

--- a/test/regress/test_unicode.cpp
+++ b/test/regress/test_unicode.cpp
@ -147,6 +147,14 @@ void test_unicode()
   TEST_REGEX_SEARCH_U(L"[\\N{MODIFIER LETTER LOW ACUTE ACCENT}]", perl, L"\x02CF", match_default, make_array(0, 1, -2, -2));
   TEST_REGEX_SEARCH_U(L"[\\N{SUPERSCRIPT ONE}]", perl, L"\x00B9", match_default, make_array(0, 1, -2, -2));
   TEST_REGEX_SEARCH_U(L"\\N{CJK UNIFIED IDEOGRAPH-7FED}", perl, L"\x7FED", match_default, make_array(0, 1, -2, -2));
+   
+   TEST_REGEX_SEARCH_U(L"\\w+", perl, L" e\x301" L"coute ", match_default, make_array(1, 8, -2, -2));
+   TEST_REGEX_SEARCH_U(L"^", perl, L" \x2028 \x2029 \x000D\x000A \x000A \x000C \x000D \x0085 ", 
+      match_default | match_not_bol, make_array(2, 2, -2, 4, 4, -2, 7, 7, -2, 9, 9, -2, 11, 11, -2, 13, 13, -2, 15, 15, -2, -2));
+   TEST_REGEX_SEARCH_U(L"$", perl, L" \x2028 \x2029 \x000D\x000A \x000A \x000C \x000D \x0085 ", 
+      match_default | match_not_eol, make_array(1, 1, -2, 3, 3, -2, 5, 5, -2, 8, 8, -2, 10, 10, -2, 12, 12, -2, 14, 14, -2, -2));
+   TEST_REGEX_SEARCH_U(L".", perl, L" \x2028\x2029\x000D\x000A\x000A\x000C\x000D\x0085 ", 
+      match_default | match_not_dot_newline, make_array(0, 1, -2, 9, 10, -2, -2));
 }

 #else
--- a/tools/generate/tables.cpp
+++ b/tools/generate/tables.cpp
@ -18,6 +18,7 @@
  */

 #include <map>
+#include <vector>
 #include <string>
 #include <iostream>
 #include <algorithm>
@ -27,9 +28,16 @@
 std::string g_char_type;
 std::string g_data_type;
 std::map<std::string, std::string> g_table;
+std::map<std::string, std::pair<std::string, std::string> > g_help_table;

 void add(std::string key, std::string data)
 {
+   g_table[key] = data;
+   if(key.size() <= 2)
+      g_help_table[data].first = key;
+   else
+      g_help_table[data].second = key;
+
   std::string::size_type i = 0;
   while(i < key.size())
   {
@ -41,7 +49,6 @@ void add(std::string key, std::string data)
         ++i;
      }
   }
-   g_table[key] = data;
 }

 #define ADD(x, y) add(BOOST_STRINGIZE(x), BOOST_STRINGIZE(y))
@ -88,6 +95,33 @@ void generate_code()
   g_table.clear();
 }

+void generate_html()
+{
+   // start by producing a sorted list:
+   std::vector<std::pair<std::string, std::string> > v;
+   std::map<std::string, std::pair<std::string, std::string> >::const_iterator i, j;
+   i = g_help_table.begin();
+   j = g_help_table.end();
+   while(i != j)
+   {
+      v.push_back(i->second);
+      ++i;
+   }
+   std::sort(v.begin(), v.end());
+
+   std::vector<std::pair<std::string, std::string> >::const_iterator h, k;
+   h = v.begin();
+   k = v.end();
+
+   std::cout << "<table width=\"100%\"><tr><td><b>Short Name</b></td><td><b>Long Name</b></td></tr>\n";
+   while(h != k)
+   {
+      std::cout << "<tr><td>" << (h->first.size() ? h->first : std::string(" ")) << "</td><td>" << h->second << "</td></tr>\n";
+      ++h;
+   }
+   std::cout << "</table>\n\n";
+}
+
 int main()
 {
   g_char_type = "::UChar32";
@ -178,5 +212,6 @@ int main()
   ADD(Titlecase, U_GC_LT_MASK); 

   generate_code();
+   generate_html();
   return 0;
 }