merged changes in regex5 branch

[SVN r26692]
This commit is contained in:
John Maddock
2005-01-13 17:06:21 +00:00
parent de0ab9092a
commit 71a0e020e2
275 changed files with 37305 additions and 27154 deletions

View File

@ -5,6 +5,41 @@ subproject libs/regex/example ;
# bring in the rules for testing
import testing ;
#
# test for MFC by looking inside VC++ include directories:
#
if ! $(gMFC_CHECK)
{
gMFC_CHECK = true ;
if $(VS71COMNTOOLS)
{
VS71COMNTOOLS = $(VS71COMNTOOLS:J=" ") ;
if [ GLOB $(VS71COMNTOOLS)..\\..\\VC7\\atlmfc\\include : cstringt.h ]
{
ECHO MFC/ATL regex wrappers will be tested when building with VC7.1 ;
REGEX_MFC_OPTS += "<vc-7_1><*><define>TEST_MFC=1" ;
}
}
if $(VSCOMNTOOLS)
{
VSCOMNTOOLS = $(VSCOMNTOOLS:J=" ") ;
if [ GLOB $(VSCOMNTOOLS)\\..\\..\\VC7\\atlmfc\\include : cstringt.h ]
{
ECHO MFC/ATL regex wrappers will be tested when building with VC7 ;
REGEX_MFC_OPTS += "<vc7><*><define>TEST_MFC=1" ;
}
}
if $(VS80COMNTOOLS)
{
VS80COMNTOOLS = $(VS80COMNTOOLS:J=" ") ;
if [ GLOB $(VS80COMNTOOLS)..\\..\\VC8\\atlmfc\\include : cstringt.h ]
{
ECHO MFC/ATL regex wrappers will be tested when building with VC8 ;
REGEX_MFC_OPTS += "<vc-8_0><*><define>TEST_MFC=1" ;
}
}
}
rule regex-test-run ( sources + : input * )
{
return [
@ -20,6 +55,7 @@ rule regex-test-run ( sources + : input * )
: # test-files
: # requirements
<threading>multi
$(REGEX_MFC_OPTS)
: # test name
] ;
}
@ -29,6 +65,8 @@ test-suite regex-examples :
[ regex-test-run timer/regex_timer.cpp <template>../build/msvc-stlport-tricky : $(BOOST_ROOT)/libs/regex/example/timer/input_script.txt ]
[ regex-test-run jgrep/jgrep.cpp jgrep/main.cpp : -n boost/ $(BOOST_ROOT)/boost/regex.hpp ]
[ regex-test-run snippets/credit_card_example.cpp ]
[ regex-test-run snippets/mfc_example.cpp ]
[ regex-test-run snippets/icu_example.cpp ]
[ regex-test-run snippets/partial_regex_grep.cpp : $(BOOST_ROOT)/libs/regex/index.htm ]
[ regex-test-run snippets/partial_regex_match.cpp : 1234-5678-8765-4 ]
[ regex-test-run snippets/regex_grep_example_1.cpp : $(BOOST_ROOT)/boost/rational.hpp ]

View File

@ -1,7 +1,7 @@
# copyright John Maddock 2003
project
: requirements <threading>multi
: requirements <threading>multi <link>shared:<define>BOOST_REGEX_DYN_LINK=1
;
@ -26,6 +26,8 @@ test-suite regex-examples :
[ regex-test-run timer/regex_timer.cpp : $(BOOST_ROOT)/libs/regex/example/timer/input_script.txt ]
[ regex-test-run jgrep/jgrep.cpp jgrep/main.cpp : -n boost/ $(BOOST_ROOT)/boost/regex.hpp ]
[ regex-test-run snippets/credit_card_example.cpp ]
[ regex-test-run snippets/mfc_example.cpp ]
[ regex-test-run snippets/icu_example.cpp ]
[ regex-test-run snippets/partial_regex_grep.cpp : $(BOOST_ROOT)/libs/regex/index.htm ]
[ regex-test-run snippets/partial_regex_match.cpp : 1234-5678-8765-4 ]
[ regex-test-run snippets/regex_grep_example_1.cpp : $(BOOST_ROOT)/boost/rational.hpp ]

View File

@ -1,317 +0,0 @@
/*
* (C) Copyright Christain Engstrom 2001.
* Distributed under the Boost Software License, Version 1.0. (See
* accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
*/
#include "iso8859_1_regex_traits.hpp"
bool iso8859_1_regex_traits::is_class(char c, boost::uint_fast32_t f) const
{
static const boost::uint_fast32_t cntrl = base::char_class_cntrl;
static const boost::uint_fast32_t space = base::char_class_space;
static const boost::uint_fast32_t blank = base::char_class_blank;
static const boost::uint_fast32_t digit = base::char_class_digit;
static const boost::uint_fast32_t xdigit = base::char_class_xdigit;
static const boost::uint_fast32_t underscore = base::char_class_underscore;
static const boost::uint_fast32_t punct = base::char_class_punct;
static const boost::uint_fast32_t upper = base::char_class_upper |
base::char_class_alpha;
static const boost::uint_fast32_t lower = base::char_class_lower |
base::char_class_alpha;
static const boost::uint_fast32_t flag_table[UCHAR_MAX + 1] = {
// Hex Oct Dec Char
cntrl , // 0 0 0
cntrl , // 1 1 1
cntrl , // 2 2 2
cntrl , // 3 3 3
cntrl , // 4 4 4
cntrl , // 5 5 5
cntrl , // 6 6 6
cntrl , // 7 7 7
cntrl , // 8 10 8
cntrl | space | blank , // 9 11 9 <HT>
cntrl | space , // a 12 10 <LF>
cntrl | space , // b 13 11 <VT>
cntrl | space , // c 14 12 <FF>
cntrl | space , // d 15 13 <CR>
cntrl , // e 16 14
cntrl , // f 17 15
cntrl , // 10 20 16
cntrl , // 11 21 17
cntrl , // 12 22 18
cntrl , // 13 23 19
cntrl , // 14 24 20
cntrl , // 15 25 21
cntrl , // 16 26 22
cntrl , // 17 27 23
cntrl , // 18 30 24
cntrl , // 19 31 25
cntrl , // 1a 32 26
cntrl , // 1b 33 27
cntrl , // 1c 34 28
cntrl , // 1d 35 29
cntrl , // 1e 36 30
cntrl , // 1f 37 31
space | blank , // 20 40 32 | Space
punct , // 21 41 33 !
punct , // 22 42 34 "
punct , // 23 43 35 #
punct , // 24 44 36 $
punct , // 25 45 37 %
punct , // 26 46 38 &
punct , // 27 47 39 '
punct , // 28 50 40 (
punct , // 29 51 41 )
punct , // 2a 52 42 *
punct , // 2b 53 43 +
punct , // 2c 54 44
punct , // 2d 55 45 -
punct , // 2e 56 46 .
punct , // 2f 57 47 /
digit | xdigit , // 30 60 48 0
digit | xdigit , // 31 61 49 1
digit | xdigit , // 32 62 50 2
digit | xdigit , // 33 63 51 3
digit | xdigit , // 34 64 52 4
digit | xdigit , // 35 65 53 5
digit | xdigit , // 36 66 54 6
digit | xdigit , // 37 67 55 7
digit | xdigit , // 38 70 56 8
digit | xdigit , // 39 71 57 9
punct , // 3a 72 58 :
punct , // 3b 73 59 ;
punct , // 3c 74 60 <
punct , // 3d 75 61 =
punct , // 3e 76 62 >
punct , // 3f 77 63 ?
punct , // 40 100 64 @
upper | xdigit , // 41 101 65 A
upper | xdigit , // 42 102 66 B
upper | xdigit , // 43 103 67 C
upper | xdigit , // 44 104 68 D
upper | xdigit , // 45 105 69 E
upper | xdigit , // 46 106 70 F
upper , // 47 107 71 G
upper , // 48 110 72 H
upper , // 49 111 73 I
upper , // 4a 112 74 J
upper , // 4b 113 75 K
upper , // 4c 114 76 L
upper , // 4d 115 77 M
upper , // 4e 116 78 N
upper , // 4f 117 79 O
upper , // 50 120 80 P
upper , // 51 121 81 Q
upper , // 52 122 82 R
upper , // 53 123 83 S
upper , // 54 124 84 T
upper , // 55 125 85 U
upper , // 56 126 86 V
upper , // 57 127 87 W
upper , // 58 130 88 X
upper , // 59 131 89 Y
upper , // 5a 132 90 Z
punct , // 5b 133 91 [ | Left square bracket
punct , // 5c 134 92 \ | Backslash
punct , // 5d 135 93 ] | Right square bracket
punct , // 5e 136 94 ^ | Circumflex
punct | underscore , // 5f 137 95 _ | Underscore
punct , // 60 140 96 `
lower | xdigit , // 61 141 97 a
lower | xdigit , // 62 142 98 b
lower | xdigit , // 63 143 99 c
lower | xdigit , // 64 144 100 d
lower | xdigit , // 65 145 101 e
lower | xdigit , // 66 146 102 f
lower , // 67 147 103 g
lower , // 68 150 104 h
lower , // 69 151 105 i
lower , // 6a 152 106 j
lower , // 6b 153 107 k
lower , // 6c 154 108 l
lower , // 6d 155 109 m
lower , // 6e 156 110 n
lower , // 6f 157 111 o
lower , // 70 160 112 p
lower , // 71 161 113 q
lower , // 72 162 114 r
lower , // 73 163 115 s
lower , // 74 164 116 t
lower , // 75 165 117 u
lower , // 76 166 118 v
lower , // 77 167 119 w
lower , // 78 170 120 x
lower , // 79 171 121 y
lower , // 7a 172 122 z
punct , // 7b 173 123 {
punct , // 7c 174 124 |
punct , // 7d 175 125 }
punct , // 7e 176 126 ~
cntrl , // 7f 177 127
cntrl , // 80 200 128
cntrl , // 81 201 129
cntrl , // 82 202 130
cntrl , // 83 203 131
cntrl , // 84 204 132
cntrl , // 85 205 133
cntrl , // 86 206 134
cntrl , // 87 207 135
cntrl , // 88 210 136
cntrl , // 89 211 137
cntrl , // 8a 212 138
cntrl , // 8b 213 139
cntrl , // 8c 214 140
cntrl , // 8d 215 141
cntrl , // 8e 216 142
cntrl , // 8f 217 143
cntrl , // 90 220 144
cntrl , // 91 221 145
cntrl , // 92 222 146
cntrl , // 93 223 147
cntrl , // 94 224 148
cntrl , // 95 225 149
cntrl , // 96 226 150
cntrl , // 97 227 151
cntrl , // 98 230 152
cntrl , // 99 231 153
cntrl , // 9a 232 154
cntrl , // 9b 233 155
cntrl , // 9c 234 156
cntrl , // 9d 235 157
cntrl , // 9e 236 158
cntrl , // 9f 237 159
punct , // a0 240 160 <20> | Non-breaking space
punct , // a1 241 161 <20> | Inverted exclamation mark
punct , // a2 242 162 <20> | Cent sign
punct , // a3 243 163 <20> | Pound sign
punct , // a4 244 164 <20> | Currency sign
punct , // a5 245 165 <20> | Yen sign
punct , // a6 246 166 <20> | Broken bar
punct , // a7 247 167 <20> | Section sign
punct , // a8 250 168 <20> | Diaeresis
punct , // a9 251 169 <20> | Copyright sign
punct , // aa 252 170 <20> | Feminine ordinal indicator
punct , // ab 253 171 <20> | Left-pointing double angle
punct , // ac 254 172 <20> | Not sign
punct , // ad 255 173 <20> | Soft hyphen
punct , // ae 256 174 <20> | Registered trademark sign
punct , // af 257 175 <20> | Macron
punct , // b0 260 176 <20> | Degree sign
punct , // b1 261 177 <20> | Plus-minus sign
punct , // b2 262 178 <20> | Superscript two
punct , // b3 263 179 <20> | Superscript three
punct , // b4 264 180 <20> | Acute accent
punct , // b5 265 181 <20> | Micro sign
punct , // b6 266 182 <20> | Pilcrow sign
punct , // b7 267 183 <20> | Middle dot
punct , // b8 270 184 <20> | Cedilla
punct , // b9 271 185 <20> | Superscript one
punct , // ba 272 186 <20> | Masculine ordinal indicator
punct , // bb 273 187 <20> | Right-pointing double angle
punct , // bc 274 188 <20> | Fraction one quarter
punct , // bd 275 189 <20> | Fraction one half
punct , // be 276 190 <20> | Fraction three quarters
punct , // bf 277 191 <20> | Inverted question mark
upper , // c0 300 192 <20> | "A" with grave accent
upper , // c1 301 193 <20> | "A" with acute accent
upper , // c2 302 194 <20> | "A" with circumflex
upper , // c3 303 195 <20> | "A" with tilde
upper , // c4 304 196 <20> | "A" with diaeresis
upper , // c5 305 197 <20> | "A" with ring above
upper , // c6 306 198 <20> | "AE" ligature
upper , // c7 307 199 <20> | "C" with cedilla
upper , // c8 310 200 <20> | "E" with grave accent
upper , // c9 311 201 <20> | "E" with acute accent
upper , // ca 312 202 <20> | "E" with circumflex
upper , // cb 313 203 <20> | "E" with diaeresis
upper , // cc 314 204 <20> | "I" with grave accent
upper , // cd 315 205 <20> | "I" with acute accent
upper , // ce 316 206 <20> | "I" with circumflex
upper , // cf 317 207 <20> | "I" with diaeresis
upper , // d0 320 208 <20> | Upper Icelandic D (Eth)
upper , // d1 321 209 <20> | "N" with tilde
upper , // d2 322 210 <20> | "O" with grave accent
upper , // d3 323 211 <20> | "O" with acute accent
upper , // d4 324 212 <20> | "O" with circumflex
upper , // d5 325 213 <20> | "O" with tilde
upper , // d6 326 214 <20> | "O" with diaeresis
punct , // d7 327 215 <20> | Multiplication sign
upper , // d8 330 216 <20> | "O" with stroke
upper , // d9 331 217 <20> | "U" with grave accent
upper , // da 332 218 <20> | "U" with acute accent
upper , // db 333 219 <20> | "U" with circumflex
upper , // dc 334 220 <20> | "U" with diaeresis
upper , // dd 335 221 <20> | "Y" with acute accent
upper , // de 336 222 <20> | Upper Icelandic T (Thorn)
lower , // df 337 223 <20> | German sharp s
lower , // e0 340 224 <20> | "a" with grave accent
lower , // e1 341 225 <20> | "a" with acute accent
lower , // e2 342 226 <20> | "a" with circumflex
lower , // e3 343 227 <20> | "a" with tilde
lower , // e4 344 228 <20> | "a" with diaeresis
lower , // e5 345 229 <20> | "a" with ring above
lower , // e6 346 230 <20> | Latin small ligature ae
lower , // e7 347 231 <20> | "c" with cedilla
lower , // e8 350 232 <20> | "e" with grave accent
lower , // e9 351 233 <20> | "e" with acute accent
lower , // ea 352 234 <20> | "e" with circumflex
lower , // eb 353 235 <20> | "e" with diaeresis
lower , // ec 354 236 <20> | "i" with grave accent
lower , // ed 355 237 <20> | "i" with acute accent
lower , // ee 356 238 <20> | "i" with circumflex
lower , // ef 357 239 <20> | "i" with diaeresis
lower , // f0 360 240 <20> | Lower Icelandic d (eth)
lower , // f1 361 241 <20> | "n" with tilde
lower , // f2 362 242 <20> | "o" with grave accent
lower , // f3 363 243 <20> | "o" with acute accent
lower , // f4 364 244 <20> | "o" with circumflex
lower , // f5 365 245 <20> | "o" with tilde
lower , // f6 366 246 <20> | "o" with diaeresis
punct , // f7 367 247 <20> | Division sign
lower , // f8 370 248 <20> | "o" with oblique bar
lower , // f9 371 249 <20> | "u" with grave accent
lower , // fa 372 250 <20> | "u" with acute accent
lower , // fb 373 251 <20> | "u" with circumflex
lower , // fc 374 252 <20> | "u" with diaeresis
lower , // fd 375 253 <20> | "y" with acute accent
lower , // fe 376 254 <20> | Lower Icelandic t (thorn)
lower // ff 377 255 <20> | "y" with diaeresis
};
return (flag_table[unsigned char(c)] & f) != 0;
}
//============================================================================
char iso8859_1_regex_traits::translate(char c, bool icase) const
{
//----------------------------------------------------------------------------
// "ABCDEFGHIJKLMNOPQRSTUVWXYZ" -> "abcdefghijklmnopqrstuvwxyz"
// "<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>" -> "<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>"
//----------------------------------------------------------------------------
static const char lower_case_map[UCHAR_MAX + 1] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,215,248,249,250,251,252,253,254,223,
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
};
return icase ? lower_case_map[unsigned char(c)] : c;
}

View File

@ -1,24 +0,0 @@
/*
* (C) Copyright Christain Engstrom 2001.
* Distributed under the Boost Software License, Version 1.0. (See
* accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
*/
#ifndef BOOST_ISO8859_1_REGEX_TRAITS_HPP
#define BOOST_ISO8859_1_REGEX_TRAITS_HPP
class iso8859_1_regex_traits : public boost::c_regex_traits<char> {
private:
typedef boost::c_regex_traits<char> base;
public:
iso8859_1_regex_traits() {}
~iso8859_1_regex_traits() {}
bool is_class(char c, boost::uint_fast32_t f) const;
char translate(char c, bool icase) const;
};
#endif

View File

@ -1,49 +0,0 @@
I have written a class iso8859_1_regex_traits, which I enclose. Any
comments will be very welcome.
The class is derived from boost::c_regex_traits<char>, and just redefines
the member function is_class so that it will handle the character classes
for all the 8 bit characters in the ISO8859-1 (Latin 1) alphabet. The
enclosed file iso8859_1.txt lists the character classes for all the 256
characters.
For the 7 bit ASCII characters up to octal 177, the character classes are
identical to the ones returned by boost::c_regex_traits<char> and
boost::w32_regex_traits<char>. (I have only tried this under Windows.)
For the characters above octal 177, iso8859_1_regex_traits differs from
boost::w32_regex_traits<char> in the following ways:
1) The characters between octal 200 and octal 237 all belong to the
character class cntrl only, whereas most of them belong to print graph punct
in Windows. This reflects a true difference between the ISO8859-1 and
Windows character sets, so it should be pretty uncontroversial.
2) The superscript digits <20><><EFBFBD> (octal 271, 262 and 263) belong to print graph
punct only in iso8859_1_regex_traits. In w32_regex_traits these characters
are defined to belong to digit as well, which automatically makes them
members of alnum and word too. I consider this to be a mistake made by
those who defined the Windows locale, as I can see few, if any, uses for a
character class defined in this manner, and the definition precludes the use
of the digit character class for many normal and useful purposes, such as
checking if a string contains an integer number. (Please note that this
criticism is directed towards Microsoft itself rather than towards the regex
package, since this definition is a part of Visual C++.)
3) The non-breaking space character (octal 240) belongs to print graph punct
in iso8859_1_regex_traits. In w32_regex_traits it is instead defined to
belong print blank space, i.e.: it is defined in exactly the same way as the
ordinary octal 40 space character. Again I consider this to be a mistake in
the Windows locale, that renders the space character class fairly useless
for any applications where non-breaking spaces might occur. After all, the
whole point in having both ordinary and non-breaking spaces in the character
set is that programs should be able to treat them differently, but with the
Windows locale, this is not possible.
As stated above I look forward to any comments, both relating to the
implementation as such and to the choices I have made in the definition of
the character classes.
Christian Engstr<74>m

View File

@ -53,6 +53,8 @@ public:
ogrep_predicate(unsigned int& i, const char* p, iterator start, iterator end) : lines(i), filename(p), last_line(-1), end_of_storage(end), last_line_start(start) {}
ogrep_predicate(const ogrep_predicate& o) : lines(o.lines), filename(o.filename), last_line(o.last_line), end_of_storage(o.end_of_storage), last_line_start(o.last_line_start) {}
bool operator () (const boost::match_results<iterator, Allocator>& i);
private:
void operator=(const ogrep_predicate&);
};
// ideally we'd ignor the allocator type and use a template member function

View File

@ -19,15 +19,7 @@
#include <boost/regex.hpp>
// case sensitive reg_expression determines our allocator type:
typedef boost::reg_expression<char> re_type;
typedef re_type::allocator_type allocator_type;
// now declare static (global) data, including an allocator
// instance which we'll pass to all instances that require an allocator.
extern allocator_type a;
typedef boost::basic_regex<char> re_type;
extern re_type e;
// flags for output:

View File

@ -15,40 +15,26 @@
*/
#include <stdio.h>
#include <stdlib.h>
#include <boost/regex.hpp>
#ifdef JM_OLD_IOSTREAM
#include <iostream.h>
#else
#include <cstdio>
#include <cstdlib>
#include <iostream>
using std::cout;
using std::cin;
using std::cerr;
using std::endl;
#endif
#ifdef __BORLANDC__
# pragma hrdstop
#endif
#include <algorithm>
#ifdef BOOST_REGEX_V3
#include <boost/regex/v3/fileiter.hpp>
#else
#include <boost/regex.hpp>
#include <boost/regex/v4/fileiter.hpp>
#endif
#include "jgrep.h"
#ifndef BOOST_REGEX_NO_FILEITER
#ifndef JM_ALGO_INCLUDED
// HP and SGI STL's use <algo.h> instead
// this will have been pulled in by <jm_cfg.h>
// for std::distance
#include <algorithm>
#ifdef BOOST_NO_STDC_NAMESPACE
namespace std{
using ::strcpy;
using ::strcat;
using ::sprintf;
}
#endif
allocator_type a;
re_type e(a);
re_type e;
//rei_type ei(a);
// flags for output:
@ -65,7 +51,7 @@ bool verbose = false;
void usage()
{
cout <<
std::cout <<
"jgrep version 0.95\n"
"usage: jgrep [-options] expression file [files...]\n"
"\n"
@ -84,7 +70,7 @@ void usage()
"expression: a regular expression, or a literal string if -r- is specified\n"
"\n"
"files: one or more files to search, the names can contain the wildcard\n"
" characters ? and *\n" << endl;
" characters ? and *\n" << std::endl;
}
@ -123,9 +109,9 @@ void parse_switch(const char* flag)
verbose = false;
break;
default:
cout << "Undefined option -";
cout.put(*flag);
cout << endl;
std::cout << "Undefined option -";
std::cout.put(*flag);
std::cout << std::endl;
}
// turn off prev character:
break;
@ -159,9 +145,9 @@ void parse_switch(const char* flag)
case '+':
break;
default:
cout << "Undefined option -";
cout.put(*flag);
cout << endl;
std::cout << "Undefined option -";
std::cout.put(*flag);
std::cout << std::endl;
}
++flag;
}
@ -172,7 +158,6 @@ using namespace boost;
void HandleFile(const char* wild)
{
using namespace boost;
jm_trace("Handling file " << wild);
file_iterator end;
file_iterator start(wild);
@ -193,14 +178,12 @@ void HandleFile(const char* wild)
std::strcat(buf, directory_iterator::separator());
std::strcat(buf, "*");
}
jm_trace("Enumerating directories: " << buf);
directory_iterator dstart(buf);
directory_iterator dend;
// now get the file mask bit of "wild":
const char* ptr = wild + rootlen;
if(*ptr) ++ptr;
jm_trace("File mask part is: " << ptr);
while(dstart != dend)
{
@ -217,7 +200,6 @@ int done = 0;
void HandleArg(const char* arg)
{
using namespace boost;
jm_trace("Handling argument: " << arg);
if(*arg == '-')
{
parse_switch(arg);
@ -286,7 +268,7 @@ int main(int argc, char * argv[])
int main(int argc, char * argv[])
{
std::cout <<
std::std::cout <<
"\n<note>\n"
"This functionality is not available on with this compiler on this platform.\n"
"</note>\n";

View File

@ -1,14 +1,21 @@
/*
*
* Copyright (c) 2004
* Copyright (c) 2003-2004
* Dr John Maddock
*
* Use, modification and distribution are subject to the
* Boost Software License, Version 1.0. (See accompanying file
* Use, modification and distribution are subject to the
* Boost Software License, Version 1.0. (See accompanying file
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
*
*/
/*
* LOCATION: see http://www.boost.org for most recent version.
* FILE captures_example.cpp
* VERSION see <boost/version.hpp>
* DESCRIPTION: Demonstrate the behaviour of captures.
*/
#include <boost/regex.hpp>
#include <iostream>

View File

@ -0,0 +1,182 @@
/*
*
* Copyright (c) 2004
* Dr John Maddock
*
* Use, modification and distribution are subject to the
* Boost Software License, Version 1.0. (See accompanying file
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
*
*/
/*
* LOCATION: see http://www.boost.org for most recent version.
* FILE mfc_example.cpp
* VERSION see <boost/version.hpp>
* DESCRIPTION: examples of using Boost.Regex with MFC and ATL string types.
*/
#include <boost/regex/config.hpp>
#ifdef BOOST_HAS_ICU
#include <boost/regex/icu.hpp>
#include <iostream>
#include <assert.h>
//
// Find out if *password* meets our password requirements,
// as defined by the regular expression *requirements*.
//
bool is_valid_password(const UnicodeString& password, const UnicodeString& requirements)
{
return boost::u32regex_match(password, boost::make_u32regex(requirements));
}
//
// Extract filename part of a path from a UTF-8 encoded std::string and return the result
// as another std::string:
//
std::string get_filename(const std::string& path)
{
boost::u32regex r = boost::make_u32regex("(?:\\A|.*\\\\)([^\\\\]+)");
boost::smatch what;
if(boost::u32regex_match(path, what, r))
{
// extract $1 as a CString:
return what.str(1);
}
else
{
throw std::runtime_error("Invalid pathname");
}
}
UnicodeString extract_greek(const UnicodeString& text)
{
// searches through some UTF-16 encoded text for a block encoded in Greek,
// this expression is imperfect, but the best we can do for now - searching
// for specific scripts is actually pretty hard to do right.
boost::u32regex r = boost::make_u32regex(L"[\\x{370}-\\x{3FF}](?:[^[:L*:]]|[\\x{370}-\\x{3FF}])*");
boost::u16match what;
if(boost::u32regex_search(text, what, r))
{
// extract $0 as a CString:
return UnicodeString(what[0].first, what.length(0));
}
else
{
throw std::runtime_error("No Greek found!");
}
}
void enumerate_currencies(const std::string& text)
{
// enumerate and print all the currency symbols, along
// with any associated numeric values:
const char* re =
"([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?"
"([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?"
"(?(1)"
"|(?(2)"
"[[:Cf:][:Cc:][:Z*:]]*"
")"
"[[:Sc:]]"
")";
boost::u32regex r = boost::make_u32regex(re);
boost::u32regex_iterator<std::string::const_iterator> i(boost::make_u32regex_iterator(text, r)), j;
while(i != j)
{
std::cout << (*i)[0] << std::endl;
++i;
}
}
void enumerate_currencies2(const std::string& text)
{
// enumerate and print all the currency symbols, along
// with any associated numeric values:
const char* re =
"([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?"
"([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?"
"(?(1)"
"|(?(2)"
"[[:Cf:][:Cc:][:Z*:]]*"
")"
"[[:Sc:]]"
")";
boost::u32regex r = boost::make_u32regex(re);
boost::u32regex_token_iterator<std::string::const_iterator>
i(boost::make_u32regex_token_iterator(text, r, 1)), j;
while(i != j)
{
std::cout << *i << std::endl;
++i;
}
}
//
// Take a credit card number as a string of digits,
// and reformat it as a human readable string with "-"
// separating each group of four digit;,
// note that we're mixing a UTF-32 regex, with a UTF-16
// string and a UTF-8 format specifier, and it still all
// just works:
//
const boost::u32regex e = boost::make_u32regex("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z");
const char* human_format = "$1-$2-$3-$4";
UnicodeString human_readable_card_number(const UnicodeString& s)
{
return boost::u32regex_replace(s, e, human_format);
}
int main()
{
// password checks using u32regex_match:
UnicodeString pwd = "abcDEF---";
UnicodeString pwd_check = "(?=.*[[:lower:]])(?=.*[[:upper:]])(?=.*[[:punct:]]).{6,}";
bool b = is_valid_password(pwd, pwd_check);
assert(b);
pwd = "abcD-";
b = is_valid_password(pwd, pwd_check);
assert(!b);
// filename extraction with u32regex_match:
std::string file = "abc.hpp";
file = get_filename(file);
assert(file == "abc.hpp");
file = "c:\\a\\b\\c\\d.h";
file = get_filename(file);
assert(file == "d.h");
// Greek text extraction with u32regex_search:
UnicodeString text = L"Some where in \x0391\x039D\x0395\x0398\x0391 2004";
UnicodeString greek = extract_greek(text);
assert(greek == L"\x0391\x039D\x0395\x0398\x0391 2004");
// extract currency symbols with associated value, use iterator interface:
std::string text2 = " $100.23 or \xC2\xA3""198.12 "; // \xC2\xA3 is the <20> sign encoded in UTF-8
enumerate_currencies(text2);
enumerate_currencies2(text2);
UnicodeString credit_card_number = "1234567887654321";
credit_card_number = human_readable_card_number(credit_card_number);
assert(credit_card_number == "1234-5678-8765-4321");
return 0;
}
#else
#include <iostream>
int main()
{
std::cout << "<NOTE>ICU support not enabled, feature unavailable</NOTE>";
return 0;
}
#endif

View File

@ -0,0 +1,162 @@
/*
*
* Copyright (c) 2004
* Dr John Maddock
*
* Use, modification and distribution are subject to the
* Boost Software License, Version 1.0. (See accompanying file
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
*
*/
/*
* LOCATION: see http://www.boost.org for most recent version.
* FILE mfc_example.cpp
* VERSION see <boost/version.hpp>
* DESCRIPTION: examples of using Boost.Regex with MFC and ATL string types.
*/
#ifdef TEST_MFC
#include <boost/regex/mfc.hpp>
#include <cstringt.h>
#include <atlstr.h>
#include <assert.h>
#include <tchar.h>
#include <iostream>
#ifdef _UNICODE
#define cout wcout
#endif
//
// Find out if *password* meets our password requirements,
// as defined by the regular expression *requirements*.
//
bool is_valid_password(const CString& password, const CString& requirements)
{
return boost::regex_match(password, boost::make_regex(requirements));
}
//
// Extract filename part of a path from a CString and return the result
// as another CString:
//
CString get_filename(const CString& path)
{
boost::tregex r(__T("(?:\\A|.*\\\\)([^\\\\]+)"));
boost::tmatch what;
if(boost::regex_match(path, what, r))
{
// extract $1 as a CString:
return CString(what[1].first, what.length(1));
}
else
{
throw std::runtime_error("Invalid pathname");
}
}
CString extract_postcode(const CString& address)
{
// searches throw address for a UK postcode and returns the result,
// the expression used is by Phil A. on www.regxlib.com:
boost::tregex r(__T("^(([A-Z]{1,2}[0-9]{1,2})|([A-Z]{1,2}[0-9][A-Z]))\\s?([0-9][A-Z]{2})$"));
boost::tmatch what;
if(boost::regex_search(address, what, r))
{
// extract $0 as a CString:
return CString(what[0].first, what.length());
}
else
{
throw std::runtime_error("No postcode found");
}
}
void enumerate_links(const CString& html)
{
// enumerate and print all the <a> links in some HTML text,
// the expression used is by Andew Lee on www.regxlib.com:
boost::tregex r(__T("href=[\"\']((http:\\/\\/|\\.\\/|\\/)?\\w+(\\.\\w+)*(\\/\\w+(\\.\\w+)?)*(\\/|\\?\\w*=\\w*(&\\w*=\\w*)*)?)[\"\']"));
boost::tregex_iterator i(boost::make_regex_iterator(html, r)), j;
while(i != j)
{
std::cout << (*i)[1] << std::endl;
++i;
}
}
void enumerate_links2(const CString& html)
{
// enumerate and print all the <a> links in some HTML text,
// the expression used is by Andew Lee on www.regxlib.com:
boost::tregex r(__T("href=[\"\']((http:\\/\\/|\\.\\/|\\/)?\\w+(\\.\\w+)*(\\/\\w+(\\.\\w+)?)*(\\/|\\?\\w*=\\w*(&\\w*=\\w*)*)?)[\"\']"));
boost::tregex_token_iterator i(boost::make_regex_token_iterator(html, r, 1)), j;
while(i != j)
{
std::cout << *i << std::endl;
++i;
}
}
//
// Take a credit card number as a string of digits,
// and reformat it as a human readable string with "-"
// separating each group of four digits:
//
const boost::tregex e(__T("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z"));
const CString human_format = __T("$1-$2-$3-$4");
CString human_readable_card_number(const CString& s)
{
return boost::regex_replace(s, e, human_format);
}
int main()
{
// password checks using regex_match:
CString pwd = "abcDEF---";
CString pwd_check = "(?=.*[[:lower:]])(?=.*[[:upper:]])(?=.*[[:punct:]]).{6,}";
bool b = is_valid_password(pwd, pwd_check);
assert(b);
pwd = "abcD-";
b = is_valid_password(pwd, pwd_check);
assert(!b);
// filename extraction with regex_match:
CString file = "abc.hpp";
file = get_filename(file);
assert(file == "abc.hpp");
file = "c:\\a\\b\\c\\d.h";
file = get_filename(file);
assert(file == "d.h");
// postcode extraction with regex_search:
CString address = "Joe Bloke, 001 Somestreet, Somewhere,\nPL2 8AB";
CString postcode = extract_postcode(address);
assert(postcode = "PL2 8NV");
// html link extraction with regex_iterator:
CString text = "<dt><a href=\"syntax_perl.html\">Perl Regular Expressions</a></dt><dt><a href=\"syntax_extended.html\">POSIX-Extended Regular Expressions</a></dt><dt><a href=\"syntax_basic.html\">POSIX-Basic Regular Expressions</a></dt>";
enumerate_links(text);
enumerate_links2(text);
CString credit_card_number = "1234567887654321";
credit_card_number = human_readable_card_number(credit_card_number);
assert(credit_card_number == "1234-5678-8765-4321");
return 0;
}
#else
#include <iostream>
int main()
{
std::cout << "<NOTE>MFC support not enabled, feature unavailable</NOTE>";
return 0;
}
#endif

View File

@ -33,9 +33,14 @@ using std::getline;
#include <boost/config.hpp>
#include <boost/regex.hpp>
#include <boost/cregex.hpp>
#include <boost/timer.hpp>
#include <boost/smart_ptr.hpp>
#if defined(_WIN32) && defined(BOOST_REGEX_USE_WIN32_LOCALE)
#include <windows.h>
#endif
#if (defined(_MSC_VER) && (_MSC_VER <= 1300)) || defined(__sgi)
// maybe no Koenig lookup, use using declaration instead:
using namespace boost;
@ -367,7 +372,9 @@ int main(int argc, char**argv)
return 0;
}
#if defined(_WIN32) && defined(BOOST_REGEX_USE_WIN32_LOCALE)
#pragma message(lib, "user32.lib")
#endif