mirror of
https://github.com/boostorg/regex.git
synced 2025-07-29 12:07:28 +02:00
Updated Unicode compatibility, and finished off the docs.
[SVN r26681]
This commit is contained in:
@ -24,10 +24,17 @@
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>
|
||||
The following are treated as valid digraphs when used as a collating name:</P>
|
||||
<H3>Contents</H3>
|
||||
<dl class="index">
|
||||
<dt><A href="#digraphs">Digraphs</A></dt>
|
||||
<dt><A href="#posix">POSIX Symbolic Names</A></dt>
|
||||
<dt><A href="#unicode">Unicode Symbolic Names</A></dt>
|
||||
</dl>
|
||||
<H3><A name="digraphs"></A>Digraphs</H3>
|
||||
<P>The following are treated as valid digraphs when used as a collating name:</P>
|
||||
<P>"ae", "Ae", "AE", "ch", "Ch", "CH", "ll", "Ll", "LL", "ss", "Ss", "SS", "nj",
|
||||
"Nj", "NJ", "dz", "Dz", "DZ", "lj", "Lj", "LJ".</P>
|
||||
<H3><A name="posix"></A>POSIX Symbolic Names</H3>
|
||||
<P>The following symbolic names are recognised as valid collating element names,
|
||||
in addition to any single character:</P>
|
||||
<P>
|
||||
@ -342,15 +349,18 @@
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>
|
||||
<H3><A name="unicode"></A>Named Unicode Characters</H3>
|
||||
<P>When using <A href="icu_strings.html">Unicode aware regular expressions</A> (with
|
||||
the <EM>u32regex </EM>type), all the normal symbolic names for Unicode
|
||||
characters (those given in Unidata.txt) are recognised.</P>
|
||||
<P>
|
||||
<HR>
|
||||
</P>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<p>Revised 12 Jan 2005
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 2004-2005</i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
|
@ -40,7 +40,10 @@
|
||||
<LI>
|
||||
Added <A href="mfc_strings.html">MFC/ATL string wrappers</A>.
|
||||
<LI>
|
||||
Added <A href="unicode.html">Unicode support; based on ICU</A>.</LI></UL>
|
||||
Added <A href="unicode.html">Unicode support; based on ICU</A>.
|
||||
<LI>
|
||||
Changed newline support to recognise \f as a line separator (all character
|
||||
types), and \x85 as a line separator for wide characters / Unicode only.</LI></UL>
|
||||
<P>Boost 1.32.1.</P>
|
||||
<UL>
|
||||
<LI>
|
||||
|
@ -61,6 +61,166 @@
|
||||
string sort keys produced by the system; if you need this, and the default
|
||||
implementation doesn't work on your platform, then you will need to supply a
|
||||
custom traits class.</P>
|
||||
<H3>Unicode</H3>
|
||||
<P>The following comments refer to <A href="http://www.unicode.org/reports/tr18/">Unicode
|
||||
Technical
|
||||
<SPAN>Standard
|
||||
</SPAN>#18: Unicode Regular Expressions</A> version 9.</P>
|
||||
<P>
|
||||
<TABLE id="Table3" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<TD>#</TD>
|
||||
<TD>Feature</TD>
|
||||
<TD>Support</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>1.1</TD>
|
||||
<TD>Hex Notation</TD>
|
||||
<TD>Yes: use \x{DDDD} to refer to code point UDDDD.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>1.2</TD>
|
||||
<TD>Character Properties</TD>
|
||||
<TD>All the names listed under the <A href="http://www.unicode.org/reports/tr18/#Categories">General
|
||||
Category Property</A> are supported. Script names and Other Names are
|
||||
not currently supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>1.3</TD>
|
||||
<TD><A name="Subtraction_and_Intersection">Subtraction</A> and Intersection</TD>
|
||||
<TD>
|
||||
<P>Indirectly support by forward-lookahead:
|
||||
</P>
|
||||
<P>(?=[[:X:]])[[:Y:]]</P>
|
||||
<P>Gives the intersection of character properties X and Y.</P>
|
||||
<P>(?![[:X:]])[[:Y:]]</P>
|
||||
<P>Gives everything in Y that is not in X (subtraction).</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>1.4</TD>
|
||||
<TD><A name="Simple_Word_Boundaries">Simple Word Boundaries</A></TD>
|
||||
<TD>Conforming: non-spacing marks are included in the set of word characters.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>1.5</TD>
|
||||
<TD>Caseless Matching</TD>
|
||||
<TD>Supported, note that at this level, case transformations are 1:1, many to many
|
||||
case folding operations are not supported (for example "<22>" to "SS").</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>1.6</TD>
|
||||
<TD>Line Boundaries</TD>
|
||||
<TD>Supported, except that "." matches only one character of "\r\n". Other than
|
||||
that word boundaries match correctly; including not matching in the middle of a
|
||||
"\r\n" sequence.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>1.7</TD>
|
||||
<TD>Code Points</TD>
|
||||
<TD>Supported: provided you use the <A href="icu_string.html">u32* algorithms</A>,
|
||||
then UTF-8, UTF-16 and UTF-32 are all treated as sequences of 32-bit code
|
||||
points.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>2.1</TD>
|
||||
<TD>Canonical Equivalence</TD>
|
||||
<TD>Not supported: it is up to the user of the library to convert all text into
|
||||
the same canonical form as the regular expression.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>2.2</TD>
|
||||
<TD>Default Grapheme Clusters</TD>
|
||||
<TD>Not supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>2.3</TD>
|
||||
<TD><!--StartFragment -->
|
||||
<P><A name="Default_Word_Boundaries">Default Word Boundaries</A></P>
|
||||
</TD>
|
||||
<TD>Not supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>2.4</TD>
|
||||
<TD><!--StartFragment -->
|
||||
<P><A name="Default_Loose_Matches">Default Loose Matches</A></P>
|
||||
</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>2.5</TD>
|
||||
<TD>Name Properties</TD>
|
||||
<TD>Supported: the expression "[[:name:]]" or \N{name} matches the named character
|
||||
"name".</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>2.6</TD>
|
||||
<TD>Wildcard properties</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.1</TD>
|
||||
<TD>Tailored Punctuation.</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.2</TD>
|
||||
<TD>Tailored Grapheme Clusters</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.3</TD>
|
||||
<TD>Tailored Word Boundaries.</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.4</TD>
|
||||
<TD>Tailored Loose Matches</TD>
|
||||
<TD>Partial support: [[=c=]] matches characters with the same primary equivalence
|
||||
class as "c".</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.5</TD>
|
||||
<TD>Tailored Ranges</TD>
|
||||
<TD>Supported: [a-b] matches any character that collates in the range a to b, when
|
||||
the expression is constructed with the <A href="syntax_option_type.html">collate</A>
|
||||
flag set.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.6</TD>
|
||||
<TD>Context Matches</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.7</TD>
|
||||
<TD>Incremental Matches</TD>
|
||||
<TD>Supported: pass the flag <A href="match_flag_type.html">match_partial</A> to
|
||||
the regex algorithms.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.8</TD>
|
||||
<TD>Unicode Set Sharing</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.9</TD>
|
||||
<TD>Possible Match Sets</TD>
|
||||
<TD>Not supported, however this information is used internally to optimise the
|
||||
matching of regular expressions, and return quickly if no match is possible.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.10</TD>
|
||||
<TD>Folded Matching</TD>
|
||||
<TD>Partial Support: It is possible to achieve a similar effect by using a
|
||||
custom regular expression traits class.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.11</TD>
|
||||
<TD>Custom Submatch Evaluation</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
@ -73,3 +233,4 @@
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
@ -155,7 +155,7 @@ aaaa</PRE>
|
||||
<P>matches a NUL character.</P>
|
||||
<H5>Equivalence classes:</H5>
|
||||
<P>
|
||||
An expression of the form[[=col=]], matches any character or collating element
|
||||
An expression of theform[[=col=]], matches any character or collating element
|
||||
whose primary sort key is the same as that for collating element <EM>col</EM>,
|
||||
as with colating elements the name <EM>col</EM> may be a <A href="collating_names.html">
|
||||
symbolic name</A>. A primary sort key is one that ignores case,
|
||||
@ -242,6 +242,12 @@ aaaa</PRE>
|
||||
<TD>An octal escape sequence - matches the single character whose code point is
|
||||
0ddd.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\N{Name}</TD>
|
||||
<TD>Matches the single character which has the <A href="collating_names.html">symbolic
|
||||
name</A> <EM>name. </EM>For example \N{newline} matches the single
|
||||
character \n.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>"Single character" character classes:</H5>
|
||||
@ -298,6 +304,41 @@ aaaa</PRE>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>
|
||||
<H5>Character Properties</H5>
|
||||
</H5>
|
||||
<P dir="ltr">The character property names in the following table are all
|
||||
equivalent to the <A href="character_class_names.html">names used in character
|
||||
classes</A>.</P>
|
||||
<H5>
|
||||
<TABLE id="Table9" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<TD><STRONG>Form</STRONG></TD>
|
||||
<TD><STRONG>Description</STRONG></TD>
|
||||
<TD><STRONG>Equivalent character set form</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\pX</TD>
|
||||
<TD>Matches any character that has the property X.</TD>
|
||||
<TD>[[:X:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\p{Name}</TD>
|
||||
<TD>Matches any character that has the property <EM>Name</EM>.</TD>
|
||||
<TD>[[:Name:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\PX</TD>
|
||||
<TD>Matches any character that does not have the property X.</TD>
|
||||
<TD>[^[:X:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\P{Name}</TD>
|
||||
<TD>Matches any character that does not have the property <EM>Name</EM>.</TD>
|
||||
<TD>[^[:Name:]]</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</H5>
|
||||
<H5>Word Boundaries</H5>
|
||||
<P>The following escape sequences match the boundaries of words:</P>
|
||||
<P>
|
||||
|
@ -169,7 +169,7 @@ aaaa</PRE>
|
||||
<P>matches a NUL character.</P>
|
||||
<H5>Equivalence classes:</H5>
|
||||
<P>
|
||||
An expression of the form[[=col=]], matches any character or collating element
|
||||
An expression of theform[[=col=]], matches any character or collating element
|
||||
whose primary sort key is the same as that for collating element <EM>col</EM>,
|
||||
as with colating elements the name <EM>col</EM> may be a <A href="collating_names.html">
|
||||
symbolic name</A>. A primary sort key is one that ignores case,
|
||||
@ -250,6 +250,12 @@ aaaa</PRE>
|
||||
<TD>An octal escape sequence - matches the single character whose code point is
|
||||
0ddd.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\N{name}</TD>
|
||||
<TD>Matches the single character which has the <A href="collating_names.html">symbolic
|
||||
name</A> <EM>name. </EM>For example \N{newline} matches the single
|
||||
character \n.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>"Single character" character classes:</H5>
|
||||
@ -306,6 +312,38 @@ aaaa</PRE>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Character Properties</H5>
|
||||
<P>The character property names in the following table are all equivalent to the <A href="character_class_names.html">
|
||||
names used in character classes</A>.</P>
|
||||
<P>
|
||||
<TABLE id="Table9" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<TD><STRONG>Form</STRONG></TD>
|
||||
<TD><STRONG>Description</STRONG></TD>
|
||||
<TD><STRONG>Equivalent character set form</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\pX</TD>
|
||||
<TD>Matches any character that has the property X.</TD>
|
||||
<TD>[[:X:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\p{Name}</TD>
|
||||
<TD>Matches any character that has the property <EM>Name</EM>.</TD>
|
||||
<TD>[[:Name:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\PX</TD>
|
||||
<TD>Matches any character that does not have the property X.</TD>
|
||||
<TD>[^[:X:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\P{Name}</TD>
|
||||
<TD>Matches any character that does not have the property <EM>Name</EM>.</TD>
|
||||
<TD>[^[:Name:]]</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Word Boundaries</H5>
|
||||
<P>The following escape sequences match the boundaries of words:</P>
|
||||
<P>
|
||||
|
@ -24,10 +24,17 @@
|
||||
</P>
|
||||
<HR>
|
||||
<p></p>
|
||||
<P>
|
||||
The following are treated as valid digraphs when used as a collating name:</P>
|
||||
<H3>Contents</H3>
|
||||
<dl class="index">
|
||||
<dt><A href="#digraphs">Digraphs</A></dt>
|
||||
<dt><A href="#posix">POSIX Symbolic Names</A></dt>
|
||||
<dt><A href="#unicode">Unicode Symbolic Names</A></dt>
|
||||
</dl>
|
||||
<H3><A name="digraphs"></A>Digraphs</H3>
|
||||
<P>The following are treated as valid digraphs when used as a collating name:</P>
|
||||
<P>"ae", "Ae", "AE", "ch", "Ch", "CH", "ll", "Ll", "LL", "ss", "Ss", "SS", "nj",
|
||||
"Nj", "NJ", "dz", "Dz", "DZ", "lj", "Lj", "LJ".</P>
|
||||
<H3><A name="posix"></A>POSIX Symbolic Names</H3>
|
||||
<P>The following symbolic names are recognised as valid collating element names,
|
||||
in addition to any single character:</P>
|
||||
<P>
|
||||
@ -342,15 +349,18 @@
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<P>
|
||||
<H3><A name="unicode"></A>Named Unicode Characters</H3>
|
||||
<P>When using <A href="icu_strings.html">Unicode aware regular expressions</A> (with
|
||||
the <EM>u32regex </EM>type), all the normal symbolic names for Unicode
|
||||
characters (those given in Unidata.txt) are recognised.</P>
|
||||
<P>
|
||||
<HR>
|
||||
</P>
|
||||
<P></P>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
24 Oct 2003
|
||||
<!--webbot bot="Timestamp" endspan i-checksum="39359" --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 1998-
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%Y" startspan --> 2003<!--webbot bot="Timestamp" endspan i-checksum="39359" --></i></p>
|
||||
<p>Revised 12 Jan 2005
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan --></p>
|
||||
<p><i><EFBFBD> Copyright John Maddock 2004-2005</i></p>
|
||||
<P><I>Use, modification and distribution are subject to the Boost Software License,
|
||||
Version 1.0. (See accompanying file <A href="../../../LICENSE_1_0.txt">LICENSE_1_0.txt</A>
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
|
@ -40,7 +40,10 @@
|
||||
<LI>
|
||||
Added <A href="mfc_strings.html">MFC/ATL string wrappers</A>.
|
||||
<LI>
|
||||
Added <A href="unicode.html">Unicode support; based on ICU</A>.</LI></UL>
|
||||
Added <A href="unicode.html">Unicode support; based on ICU</A>.
|
||||
<LI>
|
||||
Changed newline support to recognise \f as a line separator (all character
|
||||
types), and \x85 as a line separator for wide characters / Unicode only.</LI></UL>
|
||||
<P>Boost 1.32.1.</P>
|
||||
<UL>
|
||||
<LI>
|
||||
|
@ -61,6 +61,166 @@
|
||||
string sort keys produced by the system; if you need this, and the default
|
||||
implementation doesn't work on your platform, then you will need to supply a
|
||||
custom traits class.</P>
|
||||
<H3>Unicode</H3>
|
||||
<P>The following comments refer to <A href="http://www.unicode.org/reports/tr18/">Unicode
|
||||
Technical
|
||||
<SPAN>Standard
|
||||
</SPAN>#18: Unicode Regular Expressions</A> version 9.</P>
|
||||
<P>
|
||||
<TABLE id="Table3" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<TD>#</TD>
|
||||
<TD>Feature</TD>
|
||||
<TD>Support</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>1.1</TD>
|
||||
<TD>Hex Notation</TD>
|
||||
<TD>Yes: use \x{DDDD} to refer to code point UDDDD.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>1.2</TD>
|
||||
<TD>Character Properties</TD>
|
||||
<TD>All the names listed under the <A href="http://www.unicode.org/reports/tr18/#Categories">General
|
||||
Category Property</A> are supported. Script names and Other Names are
|
||||
not currently supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>1.3</TD>
|
||||
<TD><A name="Subtraction_and_Intersection">Subtraction</A> and Intersection</TD>
|
||||
<TD>
|
||||
<P>Indirectly support by forward-lookahead:
|
||||
</P>
|
||||
<P>(?=[[:X:]])[[:Y:]]</P>
|
||||
<P>Gives the intersection of character properties X and Y.</P>
|
||||
<P>(?![[:X:]])[[:Y:]]</P>
|
||||
<P>Gives everything in Y that is not in X (subtraction).</P>
|
||||
</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>1.4</TD>
|
||||
<TD><A name="Simple_Word_Boundaries">Simple Word Boundaries</A></TD>
|
||||
<TD>Conforming: non-spacing marks are included in the set of word characters.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>1.5</TD>
|
||||
<TD>Caseless Matching</TD>
|
||||
<TD>Supported, note that at this level, case transformations are 1:1, many to many
|
||||
case folding operations are not supported (for example "<22>" to "SS").</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>1.6</TD>
|
||||
<TD>Line Boundaries</TD>
|
||||
<TD>Supported, except that "." matches only one character of "\r\n". Other than
|
||||
that word boundaries match correctly; including not matching in the middle of a
|
||||
"\r\n" sequence.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>1.7</TD>
|
||||
<TD>Code Points</TD>
|
||||
<TD>Supported: provided you use the <A href="icu_string.html">u32* algorithms</A>,
|
||||
then UTF-8, UTF-16 and UTF-32 are all treated as sequences of 32-bit code
|
||||
points.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>2.1</TD>
|
||||
<TD>Canonical Equivalence</TD>
|
||||
<TD>Not supported: it is up to the user of the library to convert all text into
|
||||
the same canonical form as the regular expression.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>2.2</TD>
|
||||
<TD>Default Grapheme Clusters</TD>
|
||||
<TD>Not supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>2.3</TD>
|
||||
<TD><!--StartFragment -->
|
||||
<P><A name="Default_Word_Boundaries">Default Word Boundaries</A></P>
|
||||
</TD>
|
||||
<TD>Not supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>2.4</TD>
|
||||
<TD><!--StartFragment -->
|
||||
<P><A name="Default_Loose_Matches">Default Loose Matches</A></P>
|
||||
</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>2.5</TD>
|
||||
<TD>Name Properties</TD>
|
||||
<TD>Supported: the expression "[[:name:]]" or \N{name} matches the named character
|
||||
"name".</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>2.6</TD>
|
||||
<TD>Wildcard properties</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.1</TD>
|
||||
<TD>Tailored Punctuation.</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.2</TD>
|
||||
<TD>Tailored Grapheme Clusters</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.3</TD>
|
||||
<TD>Tailored Word Boundaries.</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.4</TD>
|
||||
<TD>Tailored Loose Matches</TD>
|
||||
<TD>Partial support: [[=c=]] matches characters with the same primary equivalence
|
||||
class as "c".</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.5</TD>
|
||||
<TD>Tailored Ranges</TD>
|
||||
<TD>Supported: [a-b] matches any character that collates in the range a to b, when
|
||||
the expression is constructed with the <A href="syntax_option_type.html">collate</A>
|
||||
flag set.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.6</TD>
|
||||
<TD>Context Matches</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.7</TD>
|
||||
<TD>Incremental Matches</TD>
|
||||
<TD>Supported: pass the flag <A href="match_flag_type.html">match_partial</A> to
|
||||
the regex algorithms.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.8</TD>
|
||||
<TD>Unicode Set Sharing</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.9</TD>
|
||||
<TD>Possible Match Sets</TD>
|
||||
<TD>Not supported, however this information is used internally to optimise the
|
||||
matching of regular expressions, and return quickly if no match is possible.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.10</TD>
|
||||
<TD>Folded Matching</TD>
|
||||
<TD>Partial Support: It is possible to achieve a similar effect by using a
|
||||
custom regular expression traits class.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>3.11</TD>
|
||||
<TD>Custom Submatch Evaluation</TD>
|
||||
<TD>Not Supported.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<HR>
|
||||
<p>Revised
|
||||
<!--webbot bot="Timestamp" S-Type="EDITED" S-Format="%d %B, %Y" startspan -->
|
||||
@ -73,3 +233,4 @@
|
||||
or copy at <A href="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</A>)</I></P>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
@ -155,7 +155,7 @@ aaaa</PRE>
|
||||
<P>matches a NUL character.</P>
|
||||
<H5>Equivalence classes:</H5>
|
||||
<P>
|
||||
An expression of the form[[=col=]], matches any character or collating element
|
||||
An expression of theform[[=col=]], matches any character or collating element
|
||||
whose primary sort key is the same as that for collating element <EM>col</EM>,
|
||||
as with colating elements the name <EM>col</EM> may be a <A href="collating_names.html">
|
||||
symbolic name</A>. A primary sort key is one that ignores case,
|
||||
@ -242,6 +242,12 @@ aaaa</PRE>
|
||||
<TD>An octal escape sequence - matches the single character whose code point is
|
||||
0ddd.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\N{Name}</TD>
|
||||
<TD>Matches the single character which has the <A href="collating_names.html">symbolic
|
||||
name</A> <EM>name. </EM>For example \N{newline} matches the single
|
||||
character \n.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>"Single character" character classes:</H5>
|
||||
@ -298,6 +304,41 @@ aaaa</PRE>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>
|
||||
<H5>Character Properties</H5>
|
||||
</H5>
|
||||
<P dir="ltr">The character property names in the following table are all
|
||||
equivalent to the <A href="character_class_names.html">names used in character
|
||||
classes</A>.</P>
|
||||
<H5>
|
||||
<TABLE id="Table9" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<TD><STRONG>Form</STRONG></TD>
|
||||
<TD><STRONG>Description</STRONG></TD>
|
||||
<TD><STRONG>Equivalent character set form</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\pX</TD>
|
||||
<TD>Matches any character that has the property X.</TD>
|
||||
<TD>[[:X:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\p{Name}</TD>
|
||||
<TD>Matches any character that has the property <EM>Name</EM>.</TD>
|
||||
<TD>[[:Name:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\PX</TD>
|
||||
<TD>Matches any character that does not have the property X.</TD>
|
||||
<TD>[^[:X:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\P{Name}</TD>
|
||||
<TD>Matches any character that does not have the property <EM>Name</EM>.</TD>
|
||||
<TD>[^[:Name:]]</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</H5>
|
||||
<H5>Word Boundaries</H5>
|
||||
<P>The following escape sequences match the boundaries of words:</P>
|
||||
<P>
|
||||
|
@ -169,7 +169,7 @@ aaaa</PRE>
|
||||
<P>matches a NUL character.</P>
|
||||
<H5>Equivalence classes:</H5>
|
||||
<P>
|
||||
An expression of the form[[=col=]], matches any character or collating element
|
||||
An expression of theform[[=col=]], matches any character or collating element
|
||||
whose primary sort key is the same as that for collating element <EM>col</EM>,
|
||||
as with colating elements the name <EM>col</EM> may be a <A href="collating_names.html">
|
||||
symbolic name</A>. A primary sort key is one that ignores case,
|
||||
@ -250,6 +250,12 @@ aaaa</PRE>
|
||||
<TD>An octal escape sequence - matches the single character whose code point is
|
||||
0ddd.</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\N{name}</TD>
|
||||
<TD>Matches the single character which has the <A href="collating_names.html">symbolic
|
||||
name</A> <EM>name. </EM>For example \N{newline} matches the single
|
||||
character \n.</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>"Single character" character classes:</H5>
|
||||
@ -306,6 +312,38 @@ aaaa</PRE>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Character Properties</H5>
|
||||
<P>The character property names in the following table are all equivalent to the <A href="character_class_names.html">
|
||||
names used in character classes</A>.</P>
|
||||
<P>
|
||||
<TABLE id="Table9" cellSpacing="1" cellPadding="1" width="100%" border="0">
|
||||
<TR>
|
||||
<TD><STRONG>Form</STRONG></TD>
|
||||
<TD><STRONG>Description</STRONG></TD>
|
||||
<TD><STRONG>Equivalent character set form</STRONG></TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\pX</TD>
|
||||
<TD>Matches any character that has the property X.</TD>
|
||||
<TD>[[:X:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\p{Name}</TD>
|
||||
<TD>Matches any character that has the property <EM>Name</EM>.</TD>
|
||||
<TD>[[:Name:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\PX</TD>
|
||||
<TD>Matches any character that does not have the property X.</TD>
|
||||
<TD>[^[:X:]]</TD>
|
||||
</TR>
|
||||
<TR>
|
||||
<TD>\P{Name}</TD>
|
||||
<TD>Matches any character that does not have the property <EM>Name</EM>.</TD>
|
||||
<TD>[^[:Name:]]</TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
</P>
|
||||
<H5>Word Boundaries</H5>
|
||||
<P>The following escape sequences match the boundaries of words:</P>
|
||||
<P>
|
||||
|
@ -842,6 +842,8 @@ void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state,
|
||||
l_map[0] |= mask_init;
|
||||
l_map['\n'] |= mask;
|
||||
l_map['\r'] |= mask;
|
||||
l_map['\f'] |= mask;
|
||||
l_map[0x85] |= mask;
|
||||
}
|
||||
// now figure out if we can match a NULL string at this point:
|
||||
if(pnull)
|
||||
|
@ -2012,7 +2012,7 @@ bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_st
|
||||
m_alt_jumps.pop_back();
|
||||
this->m_pdata->m_data.align();
|
||||
re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
|
||||
BOOST_ASSERT(jmp->type = syntax_element_jump);
|
||||
BOOST_ASSERT(jmp->type == syntax_element_jump);
|
||||
jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
|
||||
}
|
||||
return true;
|
||||
|
@ -105,12 +105,18 @@ inline bool is_combining<wchar_t>(wchar_t c)
|
||||
template <class charT>
|
||||
inline bool is_separator(charT c)
|
||||
{
|
||||
return BOOST_REGEX_MAKE_BOOL((c == static_cast<charT>('\n')) || (c == static_cast<charT>('\r')) || (static_cast<int>(c) == 0x2028) || (static_cast<int>(c) == 0x2029));
|
||||
return BOOST_REGEX_MAKE_BOOL(
|
||||
(c == static_cast<charT>('\n'))
|
||||
|| (c == static_cast<charT>('\r'))
|
||||
|| (c == static_cast<charT>('\f'))
|
||||
|| (static_cast<boost::uint16_t>(c) == 0x2028u)
|
||||
|| (static_cast<boost::uint16_t>(c) == 0x2029u)
|
||||
|| (static_cast<boost::uint16_t>(c) == 0x85u));
|
||||
}
|
||||
template <>
|
||||
inline bool is_separator<char>(char c)
|
||||
{
|
||||
return BOOST_REGEX_MAKE_BOOL((c == '\n') || (c == '\r'));
|
||||
return BOOST_REGEX_MAKE_BOOL((c == '\n') || (c == '\r') || (c == '\f'));
|
||||
}
|
||||
|
||||
//
|
||||
|
@ -347,8 +347,8 @@ icu_regex_traits::char_class_type icu_regex_traits::lookup_classname(const char_
|
||||
U_GC_LU_MASK,
|
||||
mask_unicode,
|
||||
U_GC_LU_MASK,
|
||||
char_class_type(U_GC_L_MASK | U_GC_ND_MASK) | mask_underscore,
|
||||
char_class_type(U_GC_L_MASK | U_GC_ND_MASK) | mask_underscore,
|
||||
char_class_type(U_GC_L_MASK | U_GC_ND_MASK | U_GC_MN_MASK) | mask_underscore,
|
||||
char_class_type(U_GC_L_MASK | U_GC_ND_MASK | U_GC_MN_MASK) | mask_underscore,
|
||||
char_class_type(U_GC_ND_MASK) | mask_xdigit,
|
||||
};
|
||||
|
||||
|
@ -86,6 +86,64 @@ void compare_result(const MR1& w1, const MR2& w2, boost::mpl::int_<1> const*)
|
||||
}
|
||||
}
|
||||
|
||||
void test_icu_grep(const boost::u32regex& r, const std::vector< ::UChar32>& search_text)
|
||||
{
|
||||
typedef std::vector< ::UChar32>::const_iterator const_iterator;
|
||||
typedef boost::u32regex_iterator<const_iterator> test_iterator;
|
||||
boost::regex_constants::match_flag_type opts = test_info<wchar_t>::match_options();
|
||||
const int* answer_table = test_info<wchar_t>::answer_table();
|
||||
test_iterator start(search_text.begin(), search_text.end(), r, opts), end;
|
||||
test_iterator copy(start);
|
||||
const_iterator last_end = search_text.begin();
|
||||
while(start != end)
|
||||
{
|
||||
if(start != copy)
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Failed iterator != comparison.", wchar_t);
|
||||
}
|
||||
if(!(start == copy))
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Failed iterator == comparison.", wchar_t);
|
||||
}
|
||||
test_result(*start, search_text.begin(), answer_table);
|
||||
// test $` and $' :
|
||||
if(start->prefix().first != last_end)
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Incorrect position for start of $`", wchar_t);
|
||||
}
|
||||
if(start->prefix().second != (*start)[0].first)
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Incorrect position for end of $`", wchar_t);
|
||||
}
|
||||
if(start->prefix().matched != (start->prefix().first != start->prefix().second))
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Incorrect position for matched member of $`", wchar_t);
|
||||
}
|
||||
if(start->suffix().first != (*start)[0].second)
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Incorrect position for start of $'", wchar_t);
|
||||
}
|
||||
if(start->suffix().second != search_text.end())
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Incorrect position for end of $'", wchar_t);
|
||||
}
|
||||
if(start->suffix().matched != (start->suffix().first != start->suffix().second))
|
||||
{
|
||||
BOOST_REGEX_TEST_ERROR("Incorrect position for matched member of $'", wchar_t);
|
||||
}
|
||||
last_end = (*start)[0].second;
|
||||
++start;
|
||||
++copy;
|
||||
// move on the answer table to next set of answers;
|
||||
if(*answer_table != -2)
|
||||
while(*answer_table++ != -2){}
|
||||
}
|
||||
if(answer_table[0] >= 0)
|
||||
{
|
||||
// we should have had a match but didn't:
|
||||
BOOST_REGEX_TEST_ERROR("Expected match was not found.", wchar_t);
|
||||
}
|
||||
}
|
||||
|
||||
void test_icu(const wchar_t&, const test_regex_search_tag& )
|
||||
{
|
||||
@ -204,6 +262,10 @@ void test_icu(const wchar_t&, const test_regex_search_tag& )
|
||||
}
|
||||
}
|
||||
}
|
||||
//
|
||||
// finally try a grep:
|
||||
//
|
||||
test_icu_grep(r, search_text);
|
||||
}
|
||||
catch(const boost::bad_expression& e)
|
||||
{
|
||||
|
@ -305,5 +305,65 @@ void test_sets2()
|
||||
TEST_INVALID_REGEX("\\N{}", perl);
|
||||
TEST_INVALID_REGEX("\\N{invalid-name}", perl);
|
||||
TEST_INVALID_REGEX("\\N{zero", perl);
|
||||
|
||||
// and repeat with POSIX-extended syntax:
|
||||
TEST_REGEX_SEARCH("\\pl+", extended, "ABabcAB", match_default, make_array(2, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\Pl+", extended, "abABCab", match_default, make_array(2, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\pu+", extended, "abABCab", match_default, make_array(2, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\Pu+", extended, "ABabcAB", match_default, make_array(2, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\pd+", extended, "AB012AB", match_default, make_array(2, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\PD+", extended, "01abc01", match_default, make_array(2, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\ps+", extended, "AB AB", match_default, make_array(2, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\PS+", extended, " abc ", match_default, make_array(2, 5, -2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH("\\p{alnum}+", extended, "-%@a0X_-", match_default, make_array(3, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\p{alpha}+", extended, " -%@aX_0-", match_default, make_array(4, 6, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\p{blank}+", extended, "a \tb", match_default, make_array(1, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\p{cntrl}+", extended, " a\n\tb", match_default, make_array(2, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\p{digit}+", extended, "a019b", match_default, make_array(1, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\p{graph}+", extended, " a%b ", match_default, make_array(1, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\p{lower}+", extended, "AabC", match_default, make_array(1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\p{print}+", extended, "AabC", match_default, make_array(0, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\p{punct}+", extended, " %-&\t", match_default, make_array(1, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\p{space}+", extended, "a \n\t\rb", match_default, make_array(1, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\p{upper}+", extended, "aBCd", match_default, make_array(1, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\p{xdigit}+", extended, "p0f3Cx", match_default, make_array(1, 5, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\P{alnum}+", extended, "-%@a", match_default, make_array(0, 3, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\P{alpha}+", extended, " -%@a", match_default, make_array(0, 4, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\P{blank}+", extended, "a ", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\P{cntrl}+", extended, " a\n", match_default, make_array(0, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\P{digit}+", extended, "a0", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\P{graph}+", extended, " a", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\P{lower}+", extended, "Aa", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\P{print}+", extended, "Absc", match_default, make_array(-2, -2));
|
||||
TEST_REGEX_SEARCH("\\P{punct}+", extended, " %", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\P{space}+", extended, "a ", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\P{upper}+", extended, "aB", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\P{xdigit}+", extended, "pf", match_default, make_array(0, 1, -2, -2));
|
||||
|
||||
TEST_INVALID_REGEX("\\p{invalid class}", extended);
|
||||
TEST_INVALID_REGEX("\\p{upper", extended);
|
||||
TEST_INVALID_REGEX("\\p{", extended);
|
||||
TEST_INVALID_REGEX("\\p", extended);
|
||||
TEST_INVALID_REGEX("\\P{invalid class}", extended);
|
||||
TEST_INVALID_REGEX("\\P{upper", extended);
|
||||
TEST_INVALID_REGEX("\\P{", extended);
|
||||
TEST_INVALID_REGEX("\\P", extended);
|
||||
|
||||
// try named characters:
|
||||
TEST_REGEX_SEARCH("\\N{zero}", extended, "0", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\N{one}", extended, "1", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\N{two}", extended, "2", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\N{three}", extended, "3", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\N{a}", extended, "bac", match_default, make_array(1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\N{\xf0}", extended, "b\xf0x", match_default, make_array(1, 2, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\N{right-curly-bracket}", extended, "}", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH("\\N{NUL}", extended, "\0", match_default, make_array(0, 1, -2, -2));
|
||||
|
||||
TEST_INVALID_REGEX("\\N", extended);
|
||||
TEST_INVALID_REGEX("\\N{", extended);
|
||||
TEST_INVALID_REGEX("\\N{}", extended);
|
||||
TEST_INVALID_REGEX("\\N{invalid-name}", extended);
|
||||
TEST_INVALID_REGEX("\\N{zero", extended);
|
||||
}
|
||||
|
||||
|
@ -147,6 +147,14 @@ void test_unicode()
|
||||
TEST_REGEX_SEARCH_U(L"[\\N{MODIFIER LETTER LOW ACUTE ACCENT}]", perl, L"\x02CF", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH_U(L"[\\N{SUPERSCRIPT ONE}]", perl, L"\x00B9", match_default, make_array(0, 1, -2, -2));
|
||||
TEST_REGEX_SEARCH_U(L"\\N{CJK UNIFIED IDEOGRAPH-7FED}", perl, L"\x7FED", match_default, make_array(0, 1, -2, -2));
|
||||
|
||||
TEST_REGEX_SEARCH_U(L"\\w+", perl, L" e\x301" L"coute ", match_default, make_array(1, 8, -2, -2));
|
||||
TEST_REGEX_SEARCH_U(L"^", perl, L" \x2028 \x2029 \x000D\x000A \x000A \x000C \x000D \x0085 ",
|
||||
match_default | match_not_bol, make_array(2, 2, -2, 4, 4, -2, 7, 7, -2, 9, 9, -2, 11, 11, -2, 13, 13, -2, 15, 15, -2, -2));
|
||||
TEST_REGEX_SEARCH_U(L"$", perl, L" \x2028 \x2029 \x000D\x000A \x000A \x000C \x000D \x0085 ",
|
||||
match_default | match_not_eol, make_array(1, 1, -2, 3, 3, -2, 5, 5, -2, 8, 8, -2, 10, 10, -2, 12, 12, -2, 14, 14, -2, -2));
|
||||
TEST_REGEX_SEARCH_U(L".", perl, L" \x2028\x2029\x000D\x000A\x000A\x000C\x000D\x0085 ",
|
||||
match_default | match_not_dot_newline, make_array(0, 1, -2, 9, 10, -2, -2));
|
||||
}
|
||||
|
||||
#else
|
||||
|
@ -18,6 +18,7 @@
|
||||
*/
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
@ -27,9 +28,16 @@
|
||||
std::string g_char_type;
|
||||
std::string g_data_type;
|
||||
std::map<std::string, std::string> g_table;
|
||||
std::map<std::string, std::pair<std::string, std::string> > g_help_table;
|
||||
|
||||
void add(std::string key, std::string data)
|
||||
{
|
||||
g_table[key] = data;
|
||||
if(key.size() <= 2)
|
||||
g_help_table[data].first = key;
|
||||
else
|
||||
g_help_table[data].second = key;
|
||||
|
||||
std::string::size_type i = 0;
|
||||
while(i < key.size())
|
||||
{
|
||||
@ -41,7 +49,6 @@ void add(std::string key, std::string data)
|
||||
++i;
|
||||
}
|
||||
}
|
||||
g_table[key] = data;
|
||||
}
|
||||
|
||||
#define ADD(x, y) add(BOOST_STRINGIZE(x), BOOST_STRINGIZE(y))
|
||||
@ -88,6 +95,33 @@ void generate_code()
|
||||
g_table.clear();
|
||||
}
|
||||
|
||||
void generate_html()
|
||||
{
|
||||
// start by producing a sorted list:
|
||||
std::vector<std::pair<std::string, std::string> > v;
|
||||
std::map<std::string, std::pair<std::string, std::string> >::const_iterator i, j;
|
||||
i = g_help_table.begin();
|
||||
j = g_help_table.end();
|
||||
while(i != j)
|
||||
{
|
||||
v.push_back(i->second);
|
||||
++i;
|
||||
}
|
||||
std::sort(v.begin(), v.end());
|
||||
|
||||
std::vector<std::pair<std::string, std::string> >::const_iterator h, k;
|
||||
h = v.begin();
|
||||
k = v.end();
|
||||
|
||||
std::cout << "<table width=\"100%\"><tr><td><b>Short Name</b></td><td><b>Long Name</b></td></tr>\n";
|
||||
while(h != k)
|
||||
{
|
||||
std::cout << "<tr><td>" << (h->first.size() ? h->first : std::string(" ")) << "</td><td>" << h->second << "</td></tr>\n";
|
||||
++h;
|
||||
}
|
||||
std::cout << "</table>\n\n";
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
g_char_type = "::UChar32";
|
||||
@ -178,5 +212,6 @@ int main()
|
||||
ADD(Titlecase, U_GC_LT_MASK);
|
||||
|
||||
generate_code();
|
||||
generate_html();
|
||||
return 0;
|
||||
}
|
||||
|
Reference in New Issue
Block a user