include/boost/regex/v5/c_regex_traits.hpp

80.2% Lines (134/167) 100.0% List of functions (20/22)
c_regex_traits.hpp
f(x) Functions (22)
Function Calls Lines Blocks
boost::c_regex_traits<char>::c_regex_traits() :65 45053x 100.0% 100.0% boost::c_regex_traits<char>::length(char const*) :72 480x 100.0% 100.0% boost::c_regex_traits<char>::translate(char) const :77 223247316x 100.0% 100.0% boost::c_regex_traits<char>::translate_nocase(char) const :81 2505216x 100.0% 100.0% <unknown function 83> :83 boost::c_regex_traits<wchar_t>::c_regex_traits() :110 45374x 100.0% 100.0% boost::c_regex_traits<wchar_t>::length(wchar_t const*) :117 720x 100.0% 100.0% <unknown function 119> :119 boost::c_regex_traits<wchar_t>::translate(wchar_t) const :122 41355169x 100.0% 100.0% boost::c_regex_traits<wchar_t>::translate_nocase(wchar_t) const :126 2281320x 100.0% 100.0% boost::c_regex_traits<char>::transform[abi:cxx11](char const*, char const*) :153 168727x 80.0% 91.0% boost::c_regex_traits<char>::transform_primary[abi:cxx11](char const*, char const*) :181 31289x 48.1% 58.0% boost::c_regex_traits<char>::lookup_classname(char const*, char const*) :228 376969x 100.0% 96.0% boost::c_regex_traits<char>::isctype(char, unsigned int) :269 5596680x 100.0% 100.0% boost::c_regex_traits<char>::lookup_collatename[abi:cxx11](char const*, char const*) :288 4116x 54.5% 100.0% boost::c_regex_traits<char>::value(char, int) :297 717228x 100.0% 100.0% boost::c_regex_traits<wchar_t>::transform[abi:cxx11](wchar_t const*, wchar_t const*) :309 160891x 80.0% 90.0% boost::c_regex_traits<wchar_t>::transform_primary[abi:cxx11](wchar_t const*, wchar_t const*) :337 31769x 48.1% 49.0% boost::c_regex_traits<wchar_t>::lookup_classname(wchar_t const*, wchar_t const*) :384 379397x 100.0% 96.0% boost::c_regex_traits<wchar_t>::isctype(wchar_t, unsigned int) :425 7950552x 100.0% 100.0% boost::c_regex_traits<wchar_t>::lookup_collatename[abi:cxx11](wchar_t const*, wchar_t const*) :445 4116x 73.3% 100.0% boost::c_regex_traits<wchar_t>::value(wchar_t, int) :459 96648x 100.0% 100.0%
Line TLA Hits Source Code
1 /*
2 *
3 * Copyright (c) 2004
4 * John Maddock
5 *
6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 *
10 */
11
12 /*
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE c_regex_traits.hpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Declares regular expression traits class that wraps the global C locale.
17 */
18
19 #ifndef BOOST_C_REGEX_TRAITS_HPP_INCLUDED
20 #define BOOST_C_REGEX_TRAITS_HPP_INCLUDED
21
22 #ifndef BOOST_REGEX_AS_MODULE
23 #include <cctype>
24 #include <cstdint>
25 #include <cwctype>
26 #endif
27
28 #include <boost/regex/config.hpp>
29 #include <boost/regex/v5/regex_workaround.hpp>
30 #include <boost/regex/v5/primary_transform.hpp>
31 #include <boost/regex/v5/regex_traits_defaults.hpp>
32
33 namespace boost{
34
35 namespace BOOST_REGEX_DETAIL_NS {
36
37 enum
38 {
39 char_class_space = 1 << 0,
40 char_class_print = 1 << 1,
41 char_class_cntrl = 1 << 2,
42 char_class_upper = 1 << 3,
43 char_class_lower = 1 << 4,
44 char_class_alpha = 1 << 5,
45 char_class_digit = 1 << 6,
46 char_class_punct = 1 << 7,
47 char_class_xdigit = 1 << 8,
48 char_class_alnum = char_class_alpha | char_class_digit,
49 char_class_graph = char_class_alnum | char_class_punct,
50 char_class_blank = 1 << 9,
51 char_class_word = 1 << 10,
52 char_class_unicode = 1 << 11,
53 char_class_horizontal = 1 << 12,
54 char_class_vertical = 1 << 13
55 };
56
57 }
58
59 BOOST_REGEX_MODULE_EXPORT template <class charT>
60 struct c_regex_traits;
61
62 BOOST_REGEX_MODULE_EXPORT template<>
63 struct c_regex_traits<char>
64 {
65 45053x c_regex_traits(){}
66 typedef char char_type;
67 typedef std::size_t size_type;
68 typedef std::string string_type;
69 struct locale_type{};
70 typedef std::uint32_t char_class_type;
71
72 480x static size_type length(const char_type* p)
73 {
74 480x return (std::strlen)(p);
75 }
76
77 223247316x char translate(char c) const
78 {
79 223247316x return c;
80 }
81 2505216x char translate_nocase(char c) const
82 {
83 2505216x return static_cast<char>((std::tolower)(static_cast<unsigned char>(c)));
84 }
85
86 static string_type transform(const char* p1, const char* p2);
87 static string_type transform_primary(const char* p1, const char* p2);
88
89 static char_class_type lookup_classname(const char* p1, const char* p2);
90 static string_type lookup_collatename(const char* p1, const char* p2);
91
92 static bool isctype(char, char_class_type);
93 static int value(char, int);
94
95 locale_type imbue(locale_type l)
96 { return l; }
97 locale_type getloc()const
98 { return locale_type(); }
99
100 private:
101 // this type is not copyable:
102 c_regex_traits(const c_regex_traits&);
103 c_regex_traits& operator=(const c_regex_traits&);
104 };
105
106 #ifndef BOOST_NO_WREGEX
107 BOOST_REGEX_MODULE_EXPORT template<>
108 struct c_regex_traits<wchar_t>
109 {
110 45374x c_regex_traits(){}
111 typedef wchar_t char_type;
112 typedef std::size_t size_type;
113 typedef std::wstring string_type;
114 struct locale_type{};
115 typedef std::uint32_t char_class_type;
116
117 720x static size_type length(const char_type* p)
118 {
119 720x return (std::wcslen)(p);
120 }
121
122 41355169x wchar_t translate(wchar_t c) const
123 {
124 41355169x return c;
125 }
126 2281320x wchar_t translate_nocase(wchar_t c) const
127 {
128 2281320x return (std::towlower)(c);
129 }
130
131 static string_type transform(const wchar_t* p1, const wchar_t* p2);
132 static string_type transform_primary(const wchar_t* p1, const wchar_t* p2);
133
134 static char_class_type lookup_classname(const wchar_t* p1, const wchar_t* p2);
135 static string_type lookup_collatename(const wchar_t* p1, const wchar_t* p2);
136
137 static bool isctype(wchar_t, char_class_type);
138 static int value(wchar_t, int);
139
140 locale_type imbue(locale_type l)
141 { return l; }
142 locale_type getloc()const
143 { return locale_type(); }
144
145 private:
146 // this type is not copyable:
147 c_regex_traits(const c_regex_traits&);
148 c_regex_traits& operator=(const c_regex_traits&);
149 };
150
151 #endif // BOOST_NO_WREGEX
152
153 168727x inline c_regex_traits<char>::string_type c_regex_traits<char>::transform(const char* p1, const char* p2)
154 {
155 168727x std::string result(10, ' ');
156 168727x std::size_t s = result.size();
157 std::size_t r;
158 168727x std::string src(p1, p2);
159 168727x while (s < (r = std::strxfrm(&*result.begin(), src.c_str(), s)))
160 {
161 #if defined(_CPPLIB_VER)
162 //
163 // A bug in VC11 and 12 causes the program to hang if we pass a null-string
164 // to std::strxfrm, but only for certain locales :-(
165 // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware).
166 //
167 if (r == INT_MAX)
168 {
169 result.erase();
170 result.insert(result.begin(), static_cast<char>(0));
171 return result;
172 }
173 #endif
174 result.append(r - s + 3, ' ');
175 s = result.size();
176 }
177 168727x result.erase(r);
178 323394x return result;
179 168727x }
180
181 31289x inline c_regex_traits<char>::string_type c_regex_traits<char>::transform_primary(const char* p1, const char* p2)
182 {
183 static char s_delim;
184 31289x static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast<c_regex_traits<char>*>(0), &s_delim);
185 31289x std::string result;
186 //
187 // What we do here depends upon the format of the sort key returned by
188 // sort key returned by this->transform:
189 //
190 31289x switch (s_collate_type)
191 {
192 31289x case ::boost::BOOST_REGEX_DETAIL_NS::sort_C:
193 case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown:
194 // the best we can do is translate to lower case, then get a regular sort key:
195 {
196 31289x result.assign(p1, p2);
197 62602x for (std::string::size_type i = 0; i < result.size(); ++i)
198 31313x result[i] = static_cast<char>((std::tolower)(static_cast<unsigned char>(result[i])));
199 31289x result = transform(&*result.begin(), &*result.begin() + result.size());
200 31289x break;
201 }
202 case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed:
203 {
204 // get a regular sort key, and then truncate it:
205 result = transform(p1, p2);
206 result.erase(s_delim);
207 break;
208 }
209 case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim:
210 // get a regular sort key, and then truncate everything after the delim:
211 result = transform(p1, p2);
212 if ((!result.empty()) && (result[0] == s_delim))
213 break;
214 std::size_t i;
215 for (i = 0; i < result.size(); ++i)
216 {
217 if (result[i] == s_delim)
218 break;
219 }
220 result.erase(i);
221 break;
222 }
223 31289x if (result.empty())
224 336x result = std::string(1, char(0));
225 31289x return result;
226 }
227
228 376969x inline c_regex_traits<char>::char_class_type c_regex_traits<char>::lookup_classname(const char* p1, const char* p2)
229 {
230 using namespace BOOST_REGEX_DETAIL_NS;
231 static const char_class_type masks[] =
232 {
233 0,
234 char_class_alnum,
235 char_class_alpha,
236 char_class_blank,
237 char_class_cntrl,
238 char_class_digit,
239 char_class_digit,
240 char_class_graph,
241 char_class_horizontal,
242 char_class_lower,
243 char_class_lower,
244 char_class_print,
245 char_class_punct,
246 char_class_space,
247 char_class_space,
248 char_class_upper,
249 char_class_unicode,
250 char_class_upper,
251 char_class_vertical,
252 char_class_alnum | char_class_word,
253 char_class_alnum | char_class_word,
254 char_class_xdigit,
255 };
256
257 376969x int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2);
258 376969x if (idx < 0)
259 {
260 2208x std::string s(p1, p2);
261 6360x for (std::string::size_type i = 0; i < s.size(); ++i)
262 4152x s[i] = static_cast<char>((std::tolower)(static_cast<unsigned char>(s[i])));
263 2208x idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size());
264 2208x }
265 376969x BOOST_REGEX_ASSERT(std::size_t(idx) + 1u < sizeof(masks) / sizeof(masks[0]));
266 376969x return masks[idx + 1];
267 }
268
269 5596680x inline bool c_regex_traits<char>::isctype(char c, char_class_type mask)
270 {
271 using namespace BOOST_REGEX_DETAIL_NS;
272 return
273 1519116x ((mask & char_class_space) && (std::isspace)(static_cast<unsigned char>(c)))
274 4816992x || ((mask & char_class_print) && (std::isprint)(static_cast<unsigned char>(c)))
275 4803312x || ((mask & char_class_cntrl) && (std::iscntrl)(static_cast<unsigned char>(c)))
276 4798560x || ((mask & char_class_upper) && (std::isupper)(static_cast<unsigned char>(c)))
277 4781400x || ((mask & char_class_lower) && (std::islower)(static_cast<unsigned char>(c)))
278 4754772x || ((mask & char_class_alpha) && (std::isalpha)(static_cast<unsigned char>(c)))
279 4275324x || ((mask & char_class_digit) && (std::isdigit)(static_cast<unsigned char>(c)))
280 4147536x || ((mask & char_class_punct) && (std::ispunct)(static_cast<unsigned char>(c)))
281 4135632x || ((mask & char_class_xdigit) && (std::isxdigit)(static_cast<unsigned char>(c)))
282 4128504x || ((mask & char_class_blank) && (std::isspace)(static_cast<unsigned char>(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c))
283 4127532x || ((mask & char_class_word) && (c == '_'))
284 4119864x || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == '\v')))
285 11193360x || ((mask & char_class_horizontal) && (std::isspace)(static_cast<unsigned char>(c)) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != '\v'));
286 }
287
288 4116x inline c_regex_traits<char>::string_type c_regex_traits<char>::lookup_collatename(const char* p1, const char* p2)
289 {
290 4116x std::string s(p1, p2);
291 4116x s = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(s);
292 4116x if (s.empty() && (p2 - p1 == 1))
293 96x s.append(1, *p1);
294 4116x return s;
295 }
296
297 717228x inline int c_regex_traits<char>::value(char c, int radix)
298 {
299 717228x char b[2] = { c, '\0', };
300 char* ep;
301 717228x int result = std::strtol(b, &ep, radix);
302 717228x if (ep == b)
303 83376x return -1;
304 633852x return result;
305 }
306
307 #ifndef BOOST_NO_WREGEX
308
309 160891x inline c_regex_traits<wchar_t>::string_type c_regex_traits<wchar_t>::transform(const wchar_t* p1, const wchar_t* p2)
310 {
311 std::size_t r;
312 160891x std::size_t s = 10;
313 321782x std::wstring src(p1, p2);
314 160891x std::wstring result(s, L' ');
315 160891x while (s < (r = std::wcsxfrm(&*result.begin(), src.c_str(), s)))
316 {
317 #if defined(_CPPLIB_VER)
318 //
319 // A bug in VC11 and 12 causes the program to hang if we pass a null-string
320 // to std::strxfrm, but only for certain locales :-(
321 // Probably effects Intel and Clang or any compiler using the VC std library (Dinkumware).
322 //
323 if (r == INT_MAX)
324 {
325 result.erase();
326 result.insert(result.begin(), static_cast<wchar_t>(0));
327 return result;
328 }
329 #endif
330 result.append(r - s + 3, L' ');
331 s = result.size();
332 }
333 160891x result.erase(r);
334 308375x return result;
335 160891x }
336
337 31769x inline c_regex_traits<wchar_t>::string_type c_regex_traits<wchar_t>::transform_primary(const wchar_t* p1, const wchar_t* p2)
338 {
339 static wchar_t s_delim;
340 31769x static const int s_collate_type = ::boost::BOOST_REGEX_DETAIL_NS::find_sort_syntax(static_cast<const c_regex_traits<wchar_t>*>(0), &s_delim);
341 31769x std::wstring result;
342 //
343 // What we do here depends upon the format of the sort key returned by
344 // sort key returned by this->transform:
345 //
346 31769x switch (s_collate_type)
347 {
348 31769x case ::boost::BOOST_REGEX_DETAIL_NS::sort_C:
349 case ::boost::BOOST_REGEX_DETAIL_NS::sort_unknown:
350 // the best we can do is translate to lower case, then get a regular sort key:
351 {
352 31769x result.assign(p1, p2);
353 63562x for (std::wstring::size_type i = 0; i < result.size(); ++i)
354 31793x result[i] = (std::towlower)(result[i]);
355 31769x result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
356 31769x break;
357 }
358 case ::boost::BOOST_REGEX_DETAIL_NS::sort_fixed:
359 {
360 // get a regular sort key, and then truncate it:
361 result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
362 result.erase(s_delim);
363 break;
364 }
365 case ::boost::BOOST_REGEX_DETAIL_NS::sort_delim:
366 // get a regular sort key, and then truncate everything after the delim:
367 result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
368 if ((!result.empty()) && (result[0] == s_delim))
369 break;
370 std::size_t i;
371 for (i = 0; i < result.size(); ++i)
372 {
373 if (result[i] == s_delim)
374 break;
375 }
376 result.erase(i);
377 break;
378 }
379 31769x if (result.empty())
380 720x result = std::wstring(1, char(0));
381 31769x return result;
382 }
383
384 379397x inline c_regex_traits<wchar_t>::char_class_type c_regex_traits<wchar_t>::lookup_classname(const wchar_t* p1, const wchar_t* p2)
385 {
386 using namespace BOOST_REGEX_DETAIL_NS;
387 static const char_class_type masks[] =
388 {
389 0,
390 char_class_alnum,
391 char_class_alpha,
392 char_class_blank,
393 char_class_cntrl,
394 char_class_digit,
395 char_class_digit,
396 char_class_graph,
397 char_class_horizontal,
398 char_class_lower,
399 char_class_lower,
400 char_class_print,
401 char_class_punct,
402 char_class_space,
403 char_class_space,
404 char_class_upper,
405 char_class_unicode,
406 char_class_upper,
407 char_class_vertical,
408 char_class_alnum | char_class_word,
409 char_class_alnum | char_class_word,
410 char_class_xdigit,
411 };
412
413 379397x int idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(p1, p2);
414 379397x if (idx < 0)
415 {
416 2208x std::wstring s(p1, p2);
417 6360x for (std::wstring::size_type i = 0; i < s.size(); ++i)
418 4152x s[i] = (std::towlower)(s[i]);
419 2208x idx = ::boost::BOOST_REGEX_DETAIL_NS::get_default_class_id(&*s.begin(), &*s.begin() + s.size());
420 2208x }
421 379397x BOOST_REGEX_ASSERT(idx + 1 < static_cast<int>(sizeof(masks) / sizeof(masks[0])));
422 379397x return masks[idx + 1];
423 }
424
425 7950552x inline bool c_regex_traits<wchar_t>::isctype(wchar_t c, char_class_type mask)
426 {
427 using namespace BOOST_REGEX_DETAIL_NS;
428 return
429 733248x ((mask & char_class_space) && (std::iswspace)(c))
430 7898628x || ((mask & char_class_print) && (std::iswprint)(c))
431 7883112x || ((mask & char_class_cntrl) && (std::iswcntrl)(c))
432 7877496x || ((mask & char_class_upper) && (std::iswupper)(c))
433 7859604x || ((mask & char_class_lower) && (std::iswlower)(c))
434 7826280x || ((mask & char_class_alpha) && (std::iswalpha)(c))
435 7199472x || ((mask & char_class_digit) && (std::iswdigit)(c))
436 7057212x || ((mask & char_class_punct) && (std::iswpunct)(c))
437 7044120x || ((mask & char_class_xdigit) && (std::iswxdigit)(c))
438 7033512x || ((mask & char_class_blank) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c))
439 7031232x || ((mask & char_class_word) && (c == '_'))
440 7024476x || ((mask & char_class_unicode) && (c & ~static_cast<wchar_t>(0xff)))
441 7024296x || ((mask & char_class_vertical) && (::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) || (c == L'\v')))
442 15901104x || ((mask & char_class_horizontal) && (std::iswspace)(c) && !::boost::BOOST_REGEX_DETAIL_NS::is_separator(c) && (c != L'\v'));
443 }
444
445 4116x inline c_regex_traits<wchar_t>::string_type c_regex_traits<wchar_t>::lookup_collatename(const wchar_t* p1, const wchar_t* p2)
446 {
447 4116x std::string name;
448 // Usual msvc warning suppression does not work here with std::string template constructor.... use a workaround instead:
449 16380x for (const wchar_t* pos = p1; pos != p2; ++pos)
450 12264x name.push_back((char)*pos);
451 4116x name = ::boost::BOOST_REGEX_DETAIL_NS::lookup_default_collate_name(name);
452 4116x if (!name.empty())
453 7728x return string_type(name.begin(), name.end());
454 252x if (p2 - p1 == 1)
455 192x return string_type(1, *p1);
456 156x return string_type();
457 4116x }
458
459 96648x inline int c_regex_traits<wchar_t>::value(wchar_t c, int radix)
460 {
461 #ifdef BOOST_BORLANDC
462 // workaround for broken wcstol:
463 if ((std::iswxdigit)(c) == 0)
464 return -1;
465 #endif
466 96648x wchar_t b[2] = { c, '\0', };
467 wchar_t* ep;
468 96648x int result = std::wcstol(b, &ep, radix);
469 96648x if (ep == b)
470 34428x return -1;
471 62220x return result;
472 }
473
474 #endif
475
476 }
477
478 #endif
479
480
481
482