From 91b21e78ffa883f45cb0cc42be9fd59ea642cba1 Mon Sep 17 00:00:00 2001
From: John Maddock <john@johnmaddock.co.uk>
Date: Sat, 16 Apr 2005 16:06:45 +0000
Subject: [PATCH] Added better documentation on POSIX-conformance.

[SVN r28278]
---
 doc/Attic/history.html            |  3 +++
 doc/Attic/syntax_basic.html       |  6 ++++--
 doc/Attic/syntax_extended.html    | 18 ++++++++++------
 doc/Attic/syntax_option_type.html | 36 ++++++++++++++++++++++++++-----
 doc/history.html                  |  3 +++
 doc/syntax_basic.html             |  6 ++++--
 doc/syntax_extended.html          | 18 ++++++++++------
 doc/syntax_option_type.html       | 36 ++++++++++++++++++++++++++-----
 8 files changed, 98 insertions(+), 28 deletions(-)
diff --git a/doc/Attic/history.html b/doc/Attic/history.html
index 6f3618e5..c3a4907b 100644
--- a/doc/Attic/history.html
+++ b/doc/Attic/history.html
@@ -29,6 +29,9 @@
          <LI>
          Completely rewritten expression parsing code, and traits class support; now 
          conforms to the standardization proposal.
+         <LI>
+         POSIX-extended and POSIX-basic regular expressions now enforce the letter of 
+         the POSIX standard much more closely than before.
          <LI>
             Added <A href="syntax_perl.html#Perl">support for (?imsx-imsx) constructs</A>.
          <LI>
diff --git a/doc/Attic/syntax_basic.html b/doc/Attic/syntax_basic.html
index cce1434b..f781948c 100644
--- a/doc/Attic/syntax_basic.html
+++ b/doc/Attic/syntax_basic.html
@@ -128,13 +128,16 @@ aaaa</PRE>
             point of a range, for example: [[.ae.]-c] matches the character sequence "ae", 
             plus any single character in the rangle "ae"-c, assuming that "ae" is treated 
             as a single collating element in the current locale.</P>
+         <P>Collating elements may be used in place of escapes (which are not normally 
+            allowed inside character sets), for example [[.^.]abc] would match either one 
+            of the characters 'abc^'.</P>
          <P>As an extension, a collating element may also be specified via its <A href="collating_names.html">
                symbolic name</A>, for example:</P>
          <P>[[.NUL.]]</P>
          <P>matches a NUL character.</P>
          <H5>Equivalence classes:</H5>
          <P>
-            An expression of the form[[=col=]], matches any character or collating element 
+            An expression of theform[[=col=]], matches any character or collating element 
             whose primary sort key is the same as that for collating element <EM>col</EM>, 
             as with collating elements the name <EM>col</EM> may be a <A href="collating_names.html">
                symbolic name</A>.&nbsp; A primary sort key is one that ignores case, 
@@ -233,4 +236,3 @@ aaaa</PRE>
       </I>
    </body>
 </html>
-
diff --git a/doc/Attic/syntax_extended.html b/doc/Attic/syntax_extended.html
index bfba568a..d9253166 100644
--- a/doc/Attic/syntax_extended.html
+++ b/doc/Attic/syntax_extended.html
@@ -130,10 +130,11 @@ aaaa</PRE>
          <P>For example [a-c] will match any single character in the range 'a' to 
             'c'.&nbsp; By default, for POSIX-Extended regular expressions, a character <EM>x</EM>
             is within the range <EM>y</EM> to <EM>z</EM>, if it collates within that 
-            range;&nbsp;this results in locale specific behavior.&nbsp; This behavior can 
-            be turned off by unsetting the <EM><A href="syntax_option_type.html#extended">collate</A></EM>
-            option flag - in which case whether a character appears within a range is 
-            determined by comparing the code points of the characters only</P>
+            range;&nbsp;<EM><STRONG>this results in locale specific behavior</STRONG></EM> .&nbsp; 
+            This behavior can be turned off by unsetting the <EM><A href="syntax_option_type.html#extended">
+                  collate</A></EM> option flag - in which case whether a character appears 
+            within a range is determined by comparing the code points of the characters 
+            only.</P>
          <H5>Negation:</H5>
          <P>If the bracket-expression begins with the ^ character, then it matches the 
             complement of the characters it contains, for example [^a-c] matches any 
@@ -149,13 +150,16 @@ aaaa</PRE>
             point of a range, for example: [[.ae.]-c] matches the character sequence "ae", 
             plus any single character in the range "ae"-c, assuming that "ae" is treated as 
             a single collating element in the current locale.</P>
+         <P>Collating elements may be used in place of escapes (which are not normally 
+            allowed inside character sets), for example [[.^.]abc] would match either one 
+            of the characters 'abc^'.</P>
          <P>As an extension, a collating element may also be specified via its <A href="collating_names.html">
                symbolic name</A>, for example:</P>
          <P>[[.NUL.]]</P>
          <P>matches a NUL character.</P>
          <H5>Equivalence classes:</H5>
          <P>
-            An expression of theform[[=col=]], matches any character or collating element 
+            An expression oftheform[[=col=]], matches any character or collating element 
             whose primary sort key is the same as that for collating element <EM>col</EM>, 
             as with colating elements the name <EM>col</EM> may be a <A href="collating_names.html">
                symbolic name</A>.&nbsp; A primary sort key is one that ignores case, 
@@ -177,9 +181,9 @@ aaaa</PRE>
          <LI>
          The effect of any ordinary character being preceded by an escape is undefined.
          <LI>
-            An escape inside a character class declaration shall match itself (in other 
+            An escape inside a character class declaration shall match itself: in other 
             words the escape character is not "special" inside a character class 
-            declaration).</LI></UL>
+            declaration; so [\^] will match either a literal '\' or a '^'.</LI></UL>
       <P>However, that's rather restrictive, so the following standard-compatible 
          extensions are also supported by Boost.Regex:</P>
       <BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
diff --git a/doc/Attic/syntax_option_type.html b/doc/Attic/syntax_option_type.html
index 7ea438d7..e346af32 100644
--- a/doc/Attic/syntax_option_type.html
+++ b/doc/Attic/syntax_option_type.html
@@ -153,7 +153,7 @@ static const syntax_option_type collate;
                      speed with which regular expressions are matched, and less to the speed with 
                      which regular expression objects are constructed. Otherwise it has no 
                      detectable effect on the program output.&nbsp; This currently has no effect for 
-                     boost.regex.</P>
+                     Boost.Regex.</P>
                </TD>
             </TR>
             <TR>
@@ -250,8 +250,9 @@ static const syntax_option_type collate;
                   <P>That is to say: the same as POSIX extended syntax, but with escape sequences in 
                      character classes permitted.</P>
                   <P>In addition some perl-style escape sequences are supported (actually the awk 
-                     syntax requires \a \b \t \v \f \n and \r to be recognised,&nbsp;but other 
-                     escape sequences invoke undefined behavior according to the POSIX standard).</P>
+                     syntax only requires \a \b \t \v \f \n and \r to be recognised,&nbsp;all other 
+                     Perl-style escape sequences invoke undefined behavior according to the POSIX 
+                     standard, but are in fact recognised by Boost.Regex).</P>
                </TD>
             </TR>
          </TABLE>
@@ -297,7 +298,10 @@ static const syntax_option_type collate;
                <TD>collate</TD>
                <TD>Yes</TD>
                <TD>
-                  <P>Specifies that character ranges of the form "[a-b]" should be locale sensitive.</P>
+                  <P>Specifies that character ranges of the form "[a-b]" should be locale 
+                     sensitive.&nbsp; <STRONG>This bit is</STRONG> <STRONG>on by default</STRONG> for 
+                     POSIX-Extended regular expressions, but can be unset to force ranges to be 
+                     compared by code point only.</P>
                </TD>
             </TR>
             <TR>
@@ -307,6 +311,21 @@ static const syntax_option_type collate;
                   operator |.&nbsp; Allows newline separated lists to be used as a list of 
                   alternatives.</TD>
             </TR>
+            <TR>
+               <TD>no_escape_in_lists</TD>
+               <TD>No</TD>
+               <TD>When set this makes the escape character ordinary inside lists, so that [\b] 
+                  would match either '\' or 'b'. <STRONG>This bit is one by default</STRONG> for 
+                  POSIX-Extended regular expressions, but can be unset to force escapes to be 
+                  recognised inside lists.</TD>
+            </TR>
+            <TR>
+               <TD>no_bk_refs</TD>
+               <TD>No</TD>
+               <TD>When set then backreferences are disabled.&nbsp; <STRONG>This bit is</STRONG> <STRONG>
+                     on by default</STRONG> for POSIX-Extended regular expressions, but can be 
+                  unset to support for backreferences on.</TD>
+            </TR>
          </TABLE>
       </P>
       <H4><A name="basic"></A>Options for POSIX Basic Regular Expressions:</H4>
@@ -415,6 +434,14 @@ static const syntax_option_type collate;
                <TD>No</TD>
                <TD>When set then character classes such as [[:alnum:]] are not allowed.</TD>
             </TR>
+            <TR>
+               <TD>no_escape_in_lists</TD>
+               <TD>No</TD>
+               <TD>When set this makes the escape character ordinary inside lists, so that [\b] 
+                  would match either '\' or 'b'. <STRONG>This bit is one by default</STRONG> for 
+                  POSIX-basic regular expressions, but can be unset to force escapes to be 
+                  recognised inside lists.</TD>
+            </TR>
             <TR>
                <TD>no_intervals</TD>
                <TD>No</TD>
@@ -492,4 +519,3 @@ static const syntax_option_type collate;
             or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
    </body>
 </html>
-
diff --git a/doc/history.html b/doc/history.html
index 6f3618e5..c3a4907b 100644
--- a/doc/history.html
+++ b/doc/history.html
@@ -29,6 +29,9 @@
          <LI>
          Completely rewritten expression parsing code, and traits class support; now 
          conforms to the standardization proposal.
+         <LI>
+         POSIX-extended and POSIX-basic regular expressions now enforce the letter of 
+         the POSIX standard much more closely than before.
          <LI>
             Added <A href="syntax_perl.html#Perl">support for (?imsx-imsx) constructs</A>.
          <LI>
diff --git a/doc/syntax_basic.html b/doc/syntax_basic.html
index cce1434b..f781948c 100644
--- a/doc/syntax_basic.html
+++ b/doc/syntax_basic.html
@@ -128,13 +128,16 @@ aaaa</PRE>
             point of a range, for example: [[.ae.]-c] matches the character sequence "ae", 
             plus any single character in the rangle "ae"-c, assuming that "ae" is treated 
             as a single collating element in the current locale.</P>
+         <P>Collating elements may be used in place of escapes (which are not normally 
+            allowed inside character sets), for example [[.^.]abc] would match either one 
+            of the characters 'abc^'.</P>
          <P>As an extension, a collating element may also be specified via its <A href="collating_names.html">
                symbolic name</A>, for example:</P>
          <P>[[.NUL.]]</P>
          <P>matches a NUL character.</P>
          <H5>Equivalence classes:</H5>
          <P>
-            An expression of the form[[=col=]], matches any character or collating element 
+            An expression of theform[[=col=]], matches any character or collating element 
             whose primary sort key is the same as that for collating element <EM>col</EM>, 
             as with collating elements the name <EM>col</EM> may be a <A href="collating_names.html">
                symbolic name</A>.&nbsp; A primary sort key is one that ignores case, 
@@ -233,4 +236,3 @@ aaaa</PRE>
       </I>
    </body>
 </html>
-
diff --git a/doc/syntax_extended.html b/doc/syntax_extended.html
index bfba568a..d9253166 100644
--- a/doc/syntax_extended.html
+++ b/doc/syntax_extended.html
@@ -130,10 +130,11 @@ aaaa</PRE>
          <P>For example [a-c] will match any single character in the range 'a' to 
             'c'.&nbsp; By default, for POSIX-Extended regular expressions, a character <EM>x</EM>
             is within the range <EM>y</EM> to <EM>z</EM>, if it collates within that 
-            range;&nbsp;this results in locale specific behavior.&nbsp; This behavior can 
-            be turned off by unsetting the <EM><A href="syntax_option_type.html#extended">collate</A></EM>
-            option flag - in which case whether a character appears within a range is 
-            determined by comparing the code points of the characters only</P>
+            range;&nbsp;<EM><STRONG>this results in locale specific behavior</STRONG></EM> .&nbsp; 
+            This behavior can be turned off by unsetting the <EM><A href="syntax_option_type.html#extended">
+                  collate</A></EM> option flag - in which case whether a character appears 
+            within a range is determined by comparing the code points of the characters 
+            only.</P>
          <H5>Negation:</H5>
          <P>If the bracket-expression begins with the ^ character, then it matches the 
             complement of the characters it contains, for example [^a-c] matches any 
@@ -149,13 +150,16 @@ aaaa</PRE>
             point of a range, for example: [[.ae.]-c] matches the character sequence "ae", 
             plus any single character in the range "ae"-c, assuming that "ae" is treated as 
             a single collating element in the current locale.</P>
+         <P>Collating elements may be used in place of escapes (which are not normally 
+            allowed inside character sets), for example [[.^.]abc] would match either one 
+            of the characters 'abc^'.</P>
          <P>As an extension, a collating element may also be specified via its <A href="collating_names.html">
                symbolic name</A>, for example:</P>
          <P>[[.NUL.]]</P>
          <P>matches a NUL character.</P>
          <H5>Equivalence classes:</H5>
          <P>
-            An expression of theform[[=col=]], matches any character or collating element 
+            An expression oftheform[[=col=]], matches any character or collating element 
             whose primary sort key is the same as that for collating element <EM>col</EM>, 
             as with colating elements the name <EM>col</EM> may be a <A href="collating_names.html">
                symbolic name</A>.&nbsp; A primary sort key is one that ignores case, 
@@ -177,9 +181,9 @@ aaaa</PRE>
          <LI>
          The effect of any ordinary character being preceded by an escape is undefined.
          <LI>
-            An escape inside a character class declaration shall match itself (in other 
+            An escape inside a character class declaration shall match itself: in other 
             words the escape character is not "special" inside a character class 
-            declaration).</LI></UL>
+            declaration; so [\^] will match either a literal '\' or a '^'.</LI></UL>
       <P>However, that's rather restrictive, so the following standard-compatible 
          extensions are also supported by Boost.Regex:</P>
       <BLOCKQUOTE dir="ltr" style="MARGIN-RIGHT: 0px">
diff --git a/doc/syntax_option_type.html b/doc/syntax_option_type.html
index 7ea438d7..e346af32 100644
--- a/doc/syntax_option_type.html
+++ b/doc/syntax_option_type.html
@@ -153,7 +153,7 @@ static const syntax_option_type collate;
                      speed with which regular expressions are matched, and less to the speed with 
                      which regular expression objects are constructed. Otherwise it has no 
                      detectable effect on the program output.&nbsp; This currently has no effect for 
-                     boost.regex.</P>
+                     Boost.Regex.</P>
                </TD>
             </TR>
             <TR>
@@ -250,8 +250,9 @@ static const syntax_option_type collate;
                   <P>That is to say: the same as POSIX extended syntax, but with escape sequences in 
                      character classes permitted.</P>
                   <P>In addition some perl-style escape sequences are supported (actually the awk 
-                     syntax requires \a \b \t \v \f \n and \r to be recognised,&nbsp;but other 
-                     escape sequences invoke undefined behavior according to the POSIX standard).</P>
+                     syntax only requires \a \b \t \v \f \n and \r to be recognised,&nbsp;all other 
+                     Perl-style escape sequences invoke undefined behavior according to the POSIX 
+                     standard, but are in fact recognised by Boost.Regex).</P>
                </TD>
             </TR>
          </TABLE>
@@ -297,7 +298,10 @@ static const syntax_option_type collate;
                <TD>collate</TD>
                <TD>Yes</TD>
                <TD>
-                  <P>Specifies that character ranges of the form "[a-b]" should be locale sensitive.</P>
+                  <P>Specifies that character ranges of the form "[a-b]" should be locale 
+                     sensitive.&nbsp; <STRONG>This bit is</STRONG> <STRONG>on by default</STRONG> for 
+                     POSIX-Extended regular expressions, but can be unset to force ranges to be 
+                     compared by code point only.</P>
                </TD>
             </TR>
             <TR>
@@ -307,6 +311,21 @@ static const syntax_option_type collate;
                   operator |.&nbsp; Allows newline separated lists to be used as a list of 
                   alternatives.</TD>
             </TR>
+            <TR>
+               <TD>no_escape_in_lists</TD>
+               <TD>No</TD>
+               <TD>When set this makes the escape character ordinary inside lists, so that [\b] 
+                  would match either '\' or 'b'. <STRONG>This bit is one by default</STRONG> for 
+                  POSIX-Extended regular expressions, but can be unset to force escapes to be 
+                  recognised inside lists.</TD>
+            </TR>
+            <TR>
+               <TD>no_bk_refs</TD>
+               <TD>No</TD>
+               <TD>When set then backreferences are disabled.&nbsp; <STRONG>This bit is</STRONG> <STRONG>
+                     on by default</STRONG> for POSIX-Extended regular expressions, but can be 
+                  unset to support for backreferences on.</TD>
+            </TR>
          </TABLE>
       </P>
       <H4><A name="basic"></A>Options for POSIX Basic Regular Expressions:</H4>
@@ -415,6 +434,14 @@ static const syntax_option_type collate;
                <TD>No</TD>
                <TD>When set then character classes such as [[:alnum:]] are not allowed.</TD>
             </TR>
+            <TR>
+               <TD>no_escape_in_lists</TD>
+               <TD>No</TD>
+               <TD>When set this makes the escape character ordinary inside lists, so that [\b] 
+                  would match either '\' or 'b'. <STRONG>This bit is one by default</STRONG> for 
+                  POSIX-basic regular expressions, but can be unset to force escapes to be 
+                  recognised inside lists.</TD>
+            </TR>
             <TR>
                <TD>no_intervals</TD>
                <TD>No</TD>
@@ -492,4 +519,3 @@ static const syntax_option_type collate;
             or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
    </body>
 </html>
-