String Algorithm Library: initial commit

[SVN r22431]
2025-07-29 12:07:18 +02:00 · 2004-03-04 22:12:19 +00:00
parent 3d9b6badea
commit 1deef19d55
73 changed files with 12359 additions and 0 deletions
--- a/string/doc/Jamfile.v2
+++ b/string/doc/Jamfile.v2
@ -0,0 +1,61 @@
+#  Boost string_algo library documentation Jamfile  ---------------------------------
+#
+#  Copyright Pavol Droba 2002-2003. Use, modification and
+#  distribution is subject to the Boost Software License, Version
+#  1.0. (See accompanying file LICENSE_1_0.txt or copy at
+#  htt../..//www.boost.org/LICENSE_1_0.txt)
+#
+#  See htt../..//www.boost.org for updates, documentation, and revision history.
+
+project boost/libs/algorithm/string/doc ;
+
+doxygen reference 
+    :   
+    ../../../../boost/algorithm/string.hpp
+    ../../../../boost/algorithm/string_regex.hpp
+
+    ../../../../boost/algorithm/string/classification.hpp
+    ../../../../boost/algorithm/string/iterator_range.hpp         
+    ../../../../boost/algorithm/string/sequence_traits.hpp
+    ../../../../boost/algorithm/string/std_containers_traits.hpp
+    ../../../../boost/algorithm/string/container_traits.hpp
+    ../../../../boost/algorithm/string/concept.hpp
+    ../../../../boost/algorithm/string/compare.hpp
+    ../../../../boost/algorithm/string/constants.hpp
+
+    ../../../../boost/algorithm/string/case_conv.hpp 
+
+    ../../../../boost/algorithm/string/find.hpp 
+    ../../../../boost/algorithm/string/finder.hpp
+    ../../../../boost/algorithm/string/find_iterator.hpp
+
+    ../../../../boost/algorithm/string/trim.hpp
+
+    ../../../../boost/algorithm/string/predicate.hpp
+    ../../../../boost/algorithm/string/split.hpp
+    ../../../../boost/algorithm/string/iter_find.hpp
+
+    ../../../../boost/algorithm/string/erase.hpp
+    ../../../../boost/algorithm/string/replace.hpp
+    ../../../../boost/algorithm/string/find_format.hpp
+    ../../../../boost/algorithm/string/formatter.hpp
+
+    ../../../../boost/algorithm/string/regex.hpp
+    ../../../../boost/algorithm/string/regex_find_format.hpp
+    :
+    <doxygen:param>HIDE_UNDOC_MEMBERS=YES
+    <doxygen:param>EXTRACT_PRIVATE=NO
+    <doxygen:param>ENABLE_PREPROCESSING=YES
+    <doxygen:param>MACRO_EXPANSION=YES
+    <doxygen:param>EXPAND_ONLY_PREDEF=YES
+    <doxygen:param>SEARCH_INCLUDES=YES
+    <doxygen:param>PREDEFINED="BOOST_STRING_TYPENAME=typename \"BOOST_STATIC_CONSTANT(type,var)=static const type var;\""
+    ;
+        
+boostbook string_algo 
+    : 
+    string_algo.xml 
+    ;
+
+
+
--- a/string/doc/concept.xml
+++ b/string/doc/concept.xml
@ -0,0 +1,199 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE library PUBLIC "-//Boost//DTD BoostBook XML V1.0//EN"
+"http://www.boost.org/tools/boostbook/dtd/boostbook.dtd">
+<section id="string_algo.concept" last-revision="$Date$">
+    <title>Concepts</title>
+
+    <using-namespace name="boost"/>
+    <using-namespace name="boost::string_algo"/>
+
+    <section>   
+        <title>Definitions</title>
+        
+        <table>
+            <title>Notation</title>
+            <tgroup cols="2" align="left">
+                <tbody>
+                    <row>
+                        <entry><code>F</code></entry>
+                        <entry>A type that is a model of Finder</entry>
+                    </row>
+                    <row>
+                        <entry><code>Fmt</code></entry>
+                        <entry>A type that is a model of Formatter</entry>
+                    </row>
+                    <row>
+                        <entry><code>Iter</code></entry>
+                        <entry>
+                            Iterator Type
+                        </entry>
+                    </row>
+                    <row>
+                        <entry><code>f</code></entry>
+                        <entry>Object of type <code>F</code></entry>
+                    </row>
+                    <row>
+                        <entry><code>fmt</code></entry>
+                        <entry>Object of type <code>Fmt</code></entry>
+                    </row>
+                    <row>
+                        <entry><code>i,j</code></entry>
+                        <entry>Objects of type <code>Iter</code></entry>
+                    </row>
+                    </tbody>
+            </tgroup>
+        </table>
+    </section>
+
+    <section id="string_algo.finder_concept">
+        <title>Finder Concept</title>
+
+        <para>
+            Finder is a functor which searches for an arbitrary part of a container. 
+            The result of the search is given as an <classname>iterator_range</classname> 
+            delimiting the selected part.
+        </para>
+
+        <table>             
+            <title>Valid Expressions</title>
+            <tgroup cols="3" align="left">
+                <thead>
+                    <row>   
+                        <entry>Expression</entry>
+                        <entry>Return Type</entry>
+                        <entry>Effects</entry>
+                    </row>
+                </thead>
+                <tbody>
+                    <row>
+                        <entry><code>f(i,j)</code></entry>
+                        <entry>Convertible to <code>iterator_range&lt;Iter&gt;</code></entry>
+                        <entry>Perform the search on the interval [i,j) and returns the result of the search</entry>
+                    </row>
+                </tbody>
+            </tgroup>
+        </table>
+
+        <para>
+            Various algorithms need to perform a searching in a container and a Finder is a generalization of such
+            search operations that allows algorithms to abstract from searching. For instance, generic replace
+            algorithms can replace any part of the input, and the finder is used to select the desired one.
+        </para>
+        <para>
+            Note, that it is only required that the finder works with a particular iterator type. However,
+            a Finder operation can be defined as a template, allowing the Finder to work with any iterator.
+        </para>
+        <para>
+            <emphasis role="bold">Examples</emphasis>
+        </para>
+        <para> 
+            <itemizedlist>
+                <listitem>
+                    Finder implemented as a class. This Finder always returns the whole input as a match. <code>operator()</code>
+                    is templated, so that the finder can be used on any iterator type.
+                    
+                    <programlisting>
+struct simple_finder
+{
+    template&lt;typename ForwardIteratorT&gt;
+    boost::iterator_range&lt;ForwardIterator&gt; operator()(
+        ForwardIteratorT Begin,
+        ForwardIteratorT End )
+    {
+        return boost::make_range( Begin, End );
+    }
+};
+        </programlisting>
+                </listitem>
+                <listitem>
+                    Function Finder. Finder can be any function object. That means, an ordinary function with the
+                    required signature can be used as well. However, such a function can be used only for
+                    a specific iterator type. 
+                    
+                    <programlisting>
+boost::iterator_range&lt;std::string&gt; simple_finder(
+    std::string::const_iterator Begin,
+    std::string::const_iterator End )
+{
+    return boost::make_range( Begin, End );
+}
+        </programlisting>
+                </listitem>
+            </itemizedlist>
+        </para> 
+    </section>
+    <section id="string_algo.formatter_concept">
+        <title>Formatter concept</title>
+
+        <para>
+            Formatters are used by <link linkend="string_algo.replace">replace algorithms</link>.
+            They are used in close combination with finders.
+            A formatter is a functor, which takes a result from a Finder operation and transforms it in a specific way. 
+            The operation of the formatter can use additional information provided by a specific finder,
+            for example <functionname>regex_formatter()</functionname> uses the match information from
+            <functionname>regex_finder()</functionname> to format the result of formatter operation.
+        </para>
+    
+        <table>
+            <title>Valid Expressions</title>
+            <tgroup cols="3" align="left">
+                <thead>
+                    <row>   
+                        <entry>Expression</entry>
+                        <entry>Return Type</entry>
+                        <entry>Effects</entry>
+                    </row>
+                </thead>
+                <tbody>
+                   <row>
+                        <entry><code>fmt(f(i,j))</code></entry>
+                        <entry>A container type, accessible using container traits</entry>
+                        <entry>Formats the result of the finder operation</entry>
+                    </row>
+                </tbody>
+            </tgroup>
+        </table>
+
+        <para>
+            Similarly to finders, formatters generalize format operations. When a finder is used to 
+            select a part of the input, formatter takes this selection and performs some formating
+            on it. Algorithms can abstract from formating using a formatter.
+        </para>
+        <para>
+            <emphasis role="bold">Examples</emphasis>
+        </para>
+        <para> 
+            <itemizedlist>
+                <listitem>
+                    Formatter implemented as a class. This Formatter does not perform any formating and 
+                    returns repackaged match. <code>operator()</code>
+                    is templated, so that the Formatter can be used on any Finder type.
+                    
+                    <programlisting>
+struct simple_formatter
+{
+    template&lt;typename FindResultT&gt;
+    std::string operator()( const FindResultT&amp; Match )
+    {
+        std::string Temp( Match.begin(), Match.end() );
+        return Temp;
+    }
+};
+                </programlisting>
+                </listitem>
+                <listitem>
+                    Function Formatter. Similarly to Finder, Formatter can be any function object. 
+                    However, as a function, it can be used only with a specific Finder type. 
+                  
+                    <programlisting>
+std::string simple_formatter( boost::iterator_range&lt;std::string::const_iterator&gt;&amp; Match )
+{
+    std::string Temp( Match.begin(), Match.end() );
+    return Temp;
+}
+                    </programlisting>
+                </listitem>
+            </itemizedlist>
+        </para> 
+     </section>
+</section>
--- a/string/doc/credits.xml
+++ b/string/doc/credits.xml
@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE library PUBLIC "-//Boost//DTD BoostBook XML V1.0//EN"
+"http://www.boost.org/tools/boostbook/dtd/boostbook.dtd">
+<section id="string_algo.credits" last-revision="$Date$">
+    <title>Credits</title>
+    <section id="string_algo.ack">
+        <title>Acknowledgments</title>
+        <para>
+            Thanks for everybody who gave suggestions and comments. Especially to Thorsten Ottosen, Jeff Garland
+            and the other boost members who participated.
+        </para>
+    </section>
+</section>
--- a/string/doc/design.xml
+++ b/string/doc/design.xml
@ -0,0 +1,281 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE library PUBLIC "-//Boost//DTD BoostBook XML V1.0//EN"
+"http://www.boost.org/tools/boostbook/dtd/boostbook.dtd">
+<section id="string_algo.design" last-revision="$Date$">
+    <title>Design Topics</title>
+
+    <using-namespace name="boost"/>
+    <using-namespace name="boost::string_algo"/>
+    
+    <section id="string_algo.iterator_range">
+        <title><code>iterator_range</code> class</title>
+
+        <para>
+            An <classname>iterator_range</classname> is an encapsulation of a pair of iterators that
+            delimit a sequence (or, a range). This concept is widely used by 
+            sequence manipulating algorithms. Although being so useful, there no direct support 
+            for it in the standard library (The closest thing is that some algorithms return a pair of iterators). 
+            Instead all STL algorithms have two distinct parameters for beginning and end of a range. This design 
+            is natural for implementation of generic algorithms, but it forbids to work with a range as a single value. 
+        </para> 
+        <para>
+            It is possible to encapsulate a range in <code>std::pair&lt;&gt;</code>, but
+            the <code>std::pair&lt;&gt;</code> is a too generic encapsulation, so it is not best match for a range.
+            For instance, it does not enforce that begin and end iterators are of the same type.
+        </para>
+        <para>
+            Naturally the range concept is heavily used also in this library. During the development of
+            the library, it was discovered, that there is a need for a reasonable encapsulation for it.
+            A core part of the library deals with substring searching algorithms. Any such an algorithm,
+            returns a range delimiting the result of the search. <code>std::pair&lt;&gt;</code> was considered as 
+            unsuitable. Therefore the <code>iterator_range</code> was defined.
+        </para>
+        <para>
+            The intention of the <code>iterator_range</code> class is to manage a range as a single value and provide 
+            a basic interface for common operations. Its interface is similar to that of container. 
+            In addition of <code>begin()</code>
+            and <code>end()</code> accessors, it has member functions for checking if the range is empty,
+            or to determine the size of the range. It has also a set of member typedefs that extract
+            type information from the encapsulated iterators. As such, the interface is compatible with 
+            the <link linkend="string_algo.container_traits">container traits</link> requirements so
+            it is possible to use this class as a parameter to many algorithms in this library.
+        </para>
+    </section>
+        
+    <section id="string_algo.container_traits">
+        <title>Container Traits</title>
+
+        <para>
+            Container traits provide uniform access to different types of containers. 
+            This functionality allows to write generic algorithms which work with several 
+            different kinds of containers. For this library it means, that, for instance,
+            many algorithms work with <code>std::string</code> as well as with <code>char[]</code>.
+        </para>
+        <para>
+            The following container types are supported:
+            <itemizedlist>
+                <listitem>
+                    Standard containers
+                </listitem>
+                <listitem>
+                    Built-in arrays (like int[])
+                </listitem>
+                <listitem>
+                    Null terminated strings (this includes char[],wchar_t[],char*, and wchar_t*)
+                </listitem>
+                <listitem>
+                    std::pair&lt;iterator,iterator&gt;
+                </listitem>
+            </itemizedlist>
+        </para>
+        <para>
+            Container traits support a subset of container concept (Std &sect;23.1). This subset 
+            can be described as an input container concept, e.g. a container with an immutable content. 
+            Its definition can be found in the header <headername>boost/string_algo/container_traits.hpp</headername>.
+        </para>
+        <para>
+            In the table C denotes a container and c is an object of C. 
+        </para>
+        <table>
+            <title>Container Traits</title>
+            <tgroup cols="3" align="left">
+                <thead>
+                    <row>   
+                        <entry>Name</entry>
+                        <entry>Standard container equivalent</entry>
+                        <entry>Description</entry>
+                    </row>Maeterlinck
+                </thead>
+                <tbody>
+                    <row>
+                        <entry><classname>container_value_type&lt;C&gt;</classname>::type</entry>
+                        <entry><code>C::value_type</code></entry>
+                        <entry>Type of contained values</entry>
+                    </row>
+                    <row>
+                        <entry><classname>container_difference_type&lt;C&gt;</classname>::type</entry>
+                        <entry><code>C::difference_type</code></entry>
+                        <entry>difference type of the container</entry>
+                    </row>
+                    <row>
+                        <entry><classname>container_iterator&lt;C&gt;</classname>::type</entry>
+                        <entry><code>C::iterator</code></entry>
+                        <entry>iterator type of the container</entry>
+                    </row>
+                    <row>
+                        <entry><classname>container_const_iterator&lt;C&gt;</classname>::type</entry>
+                        <entry><code>C::const_iterator</code></entry>
+                        <entry>const_iterator type of the container</entry>
+                    </row>
+                    <row>
+                        <entry><classname>container_result_iterator&lt;C&gt;</classname>::type</entry>
+                        <entry></entry>
+                        <entry>
+                            result_iterator type of the container. This type maps to <code>C::iterator</code>
+                            for mutable container and <code>C::const_iterator</code> for const containers.
+                        </entry>
+                    </row>
+                    <row>
+                        <entry><functionname>begin(c)</functionname></entry>
+                        <entry><code>c.begin()</code></entry>
+                        <entry>
+                            Gets the iterator pointing to the start of the container.
+                        </entry>
+                    </row>
+                    <row>
+                        <entry><functionname>end(c)</functionname></entry>
+                        <entry><code>c.end()</code></entry>
+                        <entry>
+                            Gets the iterator pointing to the end of the container.
+                        </entry>
+                    </row>
+                    <row>
+                        <entry><functionname>size(c)</functionname></entry>
+                        <entry><code>c.size()</code></entry>
+                        <entry>
+                            Gets the size of the container.
+                        </entry>
+                    </row>
+                    <row>
+                        <entry><functionname>empty(c)</functionname></entry>
+                        <entry><code>c.empty()</code></entry>
+                        <entry>
+                            Checks if the container is empty.
+                        </entry>
+                    </row>
+                </tbody>
+            </tgroup>
+        </table>
+
+        <para>
+            The container traits are only a temporary part of this library. There is a plan for a separate submission
+            of a container_traits library to Boost. Once it gets accepted, String Algorithm Library will be adopted to 
+            use it and the internal implementation will be deprecated.
+        </para>
+    
+    </section>
+    <section id="string_algo.sequence_traits">
+        <title>Sequence Traits</title>
+
+        <para>
+            Major difference between <code>std::list</code> and <code>std::vector</code> is not in the interfaces
+            they provide, rather in the inner details of the class and the way how it performs 
+            various operation. The problem is that it is not possible to infer this difference from the 
+            definitions of classes without some special mechanism.
+            However some algorithms can run significantly faster with the knowledge of the properties
+            of a particular container.
+        </para>
+        <para>
+            Sequence traits allows one to specify additional properties of a sequence container (see Std.&sect;32.2).
+            These properties are then used by algorithms to select optimized handling for some operations.
+            The sequence traits are declared in the header 
+            <headername>boost/string_algo/sequence_traits.hpp</headername>.
+        </para>
+
+        <para>
+            In the table C denotes a container and c is an object of C.
+        </para>
+        <table>
+            <title>Sequence Traits</title>
+            <tgroup cols="2" align="left">
+                <thead>
+                    <row>   
+                        <entry>Trait</entry>
+                        <entry>Description</entry>
+                    </row>
+                </thead>
+                <tbody>
+                    <row>
+                        <entry><classname>sequence_has_native_replace&lt;C&gt;</classname>::value</entry>
+                        <entry>Specifies that the sequence has std::string like replace method</entry>
+                    </row>
+                    <row>
+                        <entry><classname>sequence_has_stable_iterators&lt;C&gt;</classname>::value</entry>
+                        <entry>
+                            Specifies that the sequence has stable iterators. It means,
+                            that operations like <code>insert</code>/<code>erase</code>/<code>replace</code> 
+                            do not invalidate iterators.
+                        </entry>
+                    </row>
+                    <row>
+                        <entry><classname>sequence_has_const_time_insert&lt;C&gt;</classname>::value</entry>
+                        <entry>
+                            Specifies that the insert method of the sequence has 
+                            constant time complexity.
+                        </entry>
+                    </row>
+                    <row>
+                        <entry><classname>sequence_has_const_time_erase&lt;C&gt;</classname>::value</entry>
+                        <entry>
+                            Specifies that the erase method of the sequence has constant time complexity
+                        </entry>
+                    </row>
+                    </tbody>
+            </tgroup>
+        </table>
+        
+        <para>
+            Current implementation contains specializations for std::list&lt;T&gt; and
+            std::basic_string&lt;T&gt; from the standard library and SGI's std::rope&lt;T&gt; and std::slist&lt;T&gt;.
+        </para>
+    </section>
+    <section id="string_algo.find">
+        <title>Find Algorithms</title>
+
+        <para>
+            Find algorithms have similar functionality to <code>std::search()</code> algorithm. They provide a different 
+            interface which is more suitable for common string operations. 
+            Instead of returning just the start of matching subsequence they return a range which is necessary 
+            when the length of the matching subsequence is not known beforehand. 
+            This feature also allows a partitioning of  the input sequence into three 
+            parts: a prefix, a substring and a suffix. 
+        </para>
+        <para>
+            Another difference is an addition of various searching methods besides find_first, including find_regex. 
+        </para>
+        <para>
+            It the library, find algorithms are implemented in terms of 
+            <link linkend="string_algo.finder_concept">Finders</link>. Finders are used also by other facilities 
+            (replace,split).
+            For convenience, there are also function wrappers for these finders to simplify find operations.
+        </para>
+        <para>
+            Currently the library contains only naive implementation of find algorithms with complexity 
+            O(n * m) where n is the size of the input sequence and m is the size of the search sequence. 
+            There are algorithms with complexity O(n), but for smaller sequence a constant overhead is 
+            rather big. For small m &lt;&lt; n (m magnitued smaller than n) the current implementation 
+            provides acceptable efficiency. 
+            Even the C++ standard defines the required complexity for search algorithm as O(n * m). 
+            It is possible that a future version of library will also contain algorithms with linear 
+            complexity as an option
+        </para>
+    </section>
+    <section id="string_algo.replace">
+        <title>Replace Algorithms</title>
+
+        <para>
+            The implementation of replace algorithms follows the layered structure of the library. The 
+            lower layer implements generic substitution of a range in the input sequence. 
+            This layer takes a <link linkend="string_algo.finder_concept">Finder</link> object and a 
+            <link linkend="string_algo.formatter_concept">Formatter</link> object as an input. These two 
+            functors define what to replace and what to replace it with. The upper layer functions 
+            are just wrapping calls to the lower layer. Finders are shared with the find and split facility. 
+        </para>
+        <para>
+            As usual, the implementation of the lower layer is designed to work with a generic sequence while
+            taking an advantage of specific features if possible 
+            (by using <link linkend="string_algo.sequence_traits">Sequence traits</link>)
+        </para>         
+    </section>
+    <section id="string_algo.split">
+        <title>Split Algorithms</title>
+
+        <para>
+            Split algorithms are a logical extension of <link linkend="string_algo.find">find facility</link>.
+            Instead of searching for one match, the whole input is searched. The result of the search is then used 
+            to partition the input. It depends on the algorithms which parts are returned as the result of
+            split operations. It can be the matching parts (<functionname>find_all()</functionname>) of the parts in
+            between (<functionname>split()</functionname>). 
+        </para>
+    </section>
+</section>
--- a/string/doc/environment.xml
+++ b/string/doc/environment.xml
@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE library PUBLIC "-//Boost//DTD BoostBook XML V1.0//EN"
+"http://www.boost.org/tools/boostbook/dtd/boostbook.dtd">
+<section id="string_algo.env" last-revision="$Date$">
+    <title>Environment</title>
+    <section>
+        <title>Build</title>
+        <para>
+            The whole library is provided in headers. Regex variants of some algorithms, 
+            however, are dependent on the <libraryname>Boost.Regex</libraryname> library. All such algorithms are
+            separated in <headername>boost/string_algo/regex.hpp</headername>. 
+            If this header is used, an application must be linked with the <libraryname>Boost.Regex</libraryname> 
+            library. 
+        </para>
+    </section>
+
+    <section>
+        <title>Examples</title>
+        <para>
+            Examples showing the basic usage of the library can be found in the libs/string_algo/example
+            directory. There is a separate file for the each part of the library. Please follow the boost
+            build guidelines to build examples using the bjam. To successfully build regex examples 
+            the <libraryname>Boost.Regex</libraryname> library is required. 
+        </para>
+    </section>
+
+    <section>
+        <title>Tests</title>
+        <para>
+            A full set of test cases for the library is located in the libs/string_algo/test directory. 
+            The test cases can be executed using the boost build system. For the tests of regular 
+            expression variants of algorithms, the <libraryname>Boost.Regex</libraryname> library is required. 
+        </para>
+    </section>
+
+    <section>
+        <title>Portability</title>
+        <para>
+            The library has been successfully compiled and tested with the following compilers:
+            
+            <itemizedlist>
+                <listitem>Microsoft Visual C++ 7.0</listitem>
+                <listitem>Microsoft Visual C++ 7.1</listitem>
+                <listitem>GCC 3.2</listitem>
+                <listitem>GCC 3.3.1</listitem>
+            </itemizedlist>
+        </para>
+        <para>
+            There are known limitation on platforms not supporting partial template specialization. 
+            Library depends on correctly implemented std::iterator_traits class. If a standard library provided 
+            with compiler is broken, the String Algorithm Library cannot function properly. Usually it implies 
+            that primitive pointer iterators are not working with the library functions. 
+        </para>
+    </section>
+</section>
--- a/string/doc/intro.xml
+++ b/string/doc/intro.xml
@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE library PUBLIC "-//Boost//DTD BoostBook XML V1.0//EN"
+"http://www.boost.org/tools/boostbook/dtd/boostbook.dtd">
+<section id="string_algo.intro" last-revision="$Date$">
+    <title>Introduction</title>
+
+    <para>
+        The String Algorithm Library provides a generic implementation of
+        string-related algorithms which are missing in STL. It is an extension
+        to the algorithms library of STL and it includes trimming, case conversion, 
+        predicates and find/replace functions. All of them come in different variants 
+        so it is easier to choose the best fit for a particular need.
+    </para>
+    <para>
+        The implementation is not restricted to work with a particular container 
+        (like a <code>std::basic_string</code>), rather it is as generic as
+        possible. This generalization is not compromising the performance since
+        algorithms are using container specific features when it means a performance
+        gain.
+    </para>
+    <para>      
+        The library has layered structure to simplify the usage without sacrificing the
+        generalization. 
+
+        The easy-to-use interface, defined in the namespace <code>boost</code>, represents the first layer. 
+        Algorithms and classes defined here do not offer a lot of customization opportunities 
+        rather they are provided in more different variants, so a user can chose the 
+        one that suits her needs.
+
+        The second layer, defined in the namespace <code>boost::string_algo</code>, on the 
+        other hand, is generic. Basically it contains the same set of algorithms as the first layer,
+        but the interface is more flexible and allows more customization, but it is harder to use.
+        The first layer is implemented as set of wrappers around the second layer.
+    </para>
+    <para>
+        The documentation is divided into several sections. For a quick start read the 
+        <link linkend="string_algo.usage">Usage</link> section. 
+        <link linkend="string_algo.design">The Design Topics</link>,
+        <link linkend="string_algo.concept">Concepts</link> and <link linkend="string_algo.rationale">Rationale</link>
+        provide some explanation about the library design and structure an explain how it should be used.
+        See the <link linkend="string_algo.reference">Reference</link> for the complete list of provided utilities
+        and algorithms. Functions and classes in the reference are organized by the headers in which they are defined.
+        The reference contains links to the detailed description for every entity in the library.
+    </para>
+</section>
--- a/string/doc/rationale.xml
+++ b/string/doc/rationale.xml
@ -0,0 +1,76 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE library PUBLIC "-//Boost//DTD BoostBook XML V1.0//EN"
+"http://www.boost.org/tools/boostbook/dtd/boostbook.dtd">
+<section id="string_algo.rationale" last-revision="$Date$">
+    <title>Rationale</title>
+
+    <using-namespace name="boost"/>
+    <using-namespace name="boost::string_algo"/>
+    
+    <section id="string_algo.structure">
+        <title>Library structure</title>
+
+        <para>
+            When designing a library it is always a problem to find a balance between generalization
+            and usability. A generic utility can have a wider range of usage with more options for extensibility,
+            but it can also bring unwanted complexity for everyday usage. 
+        </para>
+        <para>
+            Imagine a library for drawing geometric objects. It can contain one generic function <code>draw()</code>
+            with many parameters specifying what to draw, like size, number of edges, shape etc.
+            This would allow you to draw almost anything, but usually a user only needs to draw 
+            only a triangle or a square and she will have to specify this simple request in a 
+            very complicated way.For this purpose two functions, <code>draw_triangle()</code> and
+            <code>draw_square()</code>, would suit much better then a generic <code>draw()</code> function.
+        </para>
+        <para>
+            The String Algorithm Library solves this problem by dividing the interface into two layers.
+            The first layer (defined in the namespace boost) contains ready to use algorithms specialized
+            for common tasks. They are provided in multiple variants to better suit specific needs.
+            The second layer (defined in the namespace <code>boost::string_algo</code>), provides generic interfaces with
+            more options for extending and tunning. 
+        <para>
+        </para>
+            For instance, a <functionname>boost::trim()</functionname> algorithm trims spaces from 
+            an input string. When there is a need to trim something else, there is 
+            <functionname>boost::string_algo::trim()</functionname> which interface allows one to specify a 
+            predicate which selects the characters to be removed.
+        </para>
+    </section>
+    <section it="string_algo.locale">
+        <title>Locales</title>
+
+        <para>
+            Locales have a very close relation to string processing. They contain information about
+            the character sets and are used, for example, to change the case of characters and 
+            to classify the characters. 
+        </para>
+        <para>
+            C++ allows to work with multiple different instances of locales at once. If an algorithm
+            manipulates some data in a way that requires the usage of locales, there must be a way
+            to specify them. However, one instance of locales is sufficient for most of the applications,
+            and for a user it could be very tedious to specify which locales to use on every place 
+            where it is needed. 
+        </para> 
+        <para>
+            Fortunately, the C++ standard allows to specify the <emphasis>global</emphasis> locales (using static member
+            function <code>std:locale::global()</code>). When instantiating an
+            <code>std::locale</code> class without explicit information, the instance will 
+            be initialized with the <emphasis>global</emphasis> locale. It means, that if an algorithm needs a locale,
+            it should have an <code>std::locale</code> parameter with default value <code>std::locale()</code>.
+            If a user needs to specify locales explicitly, she can do so. Otherwise the <emphasis>global</emphasis>
+            locales are used.
+        </para>
+    </section>
+    <section id="string_algo.regex">
+        <title>Regular Expressions</title>
+
+        <para>
+            Regular expressions are an essential part of text processing. For this reason, the library 
+            provides also regex variants of some algorithms. The library does not try to replace
+            <libraryname>Boost.Regex</libraryname>, but it merely wraps its functionality in a new interface.
+            As a part of this library regex algorithms integrate smoothly with other components which 
+            brings additional value.
+        </para>
+    </section>
+</section>
--- a/string/doc/string_algo.xml
+++ b/string/doc/string_algo.xml
@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE library PUBLIC "-//Boost//DTD BoostBook XML V1.0//EN"
+"http://www.boost.org/tools/boostbook/dtd/boostbook.dtd">
+<library name="string_algo" dirname="string_algo" xmlns:xi="http://www.w3.org/2001/XInclude" 
+    id="string_algo" last-revision="$Date$">
+    <libraryinfo>
+        <author>
+            <firstname>Pavol</firstname>
+            <surname>Droba</surname>
+        </author>
+
+        <copyright>
+            <year>2002</year>
+            <year>2003</year>
+            <holder>Pavol Droba</holder>
+        </copyright>
+
+        <legalnotice>
+            <para>Use, modification and distribution is subject to the Boost
+                Software License, Version 1.0. (See accompanying file
+                <filename>LICENSE_1_0.txt</filename> or copy at <ulink
+                    url="http://www.boost.org/LICENSE_1_0.txt">http://www.boost.org/LICENSE_1_0.txt</ulink>)
+            </para>
+        </legalnotice>
+
+        <librarypurpose>
+            A set of generic string-related algorithms and utilities.
+        </librarypurpose> 
+        <librarycategory name="category:algoritms"/>
+    </libraryinfo>
+
+    <title>Boost String Algorithms Library</title>  
+    <xi:include href="intro.xml"/>
+    <xi:include href="usage.xml"/>
+    <xi:include href="design.xml"/>
+    <xi:include href="concept.xml"/>
+    <xi:include href="reference.boostbook"/>
+    <xi:include href="rationale.xml"/>
+    <xi:include href="environment.xml"/>
+    <xi:include href="credits.xml"/>
+</library>
+
--- a/string/doc/usage.xml
+++ b/string/doc/usage.xml
@ -0,0 +1,275 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE library PUBLIC "-//Boost//DTD BoostBook XML V1.0//EN"
+"http://www.boost.org/tools/boostbook/dtd/boostbook.dtd">
+<section id="string_algo.usage" last-revision="$Date$">
+    <title>Usage</title>
+
+    <using-namespace name="boost"/>
+    <using-namespace name="boost::string_algo"/>
+
+
+    <section>
+        <title>First Example</title>
+        
+        <para>
+            Using the algorithms is straightforward. Let us have a look at the first example:
+        </para>
+        <programlisting>
+    #include &lt;boost/string_algo.hpp&gt;
+    using namespace std;
+    using namespace boost;
+    namespace sa=boost::string_algo
+    
+    // ...
+
+    string str1(" hello world! ");
+    trim( to_upper(str1) );  // str1 == "HELLO WORLD!"
+
+    string str2=ireplace_first_copy(str1,"hello","goodbye"); // str2 == "goodbye WORLD!"
+        </programlisting>
+        <para>
+            This example converts str1 to upper case and trims spaces from the start and the end
+            of the string. str2 is then created as a copy of str1 with "hello" replaced with "goodbye".
+            This example demonstrates several important concepts used in the library:
+        </para>
+        <itemizedlist>
+            <listitem>
+                <para><emphasis role="bold">Container parameters:</emphasis>
+                    Unlike the STL algorithms, parameters are not specified only in form
+                    of iterators. The STL convention allows for great flexibility,
+                    but it has several limitation. It is not possible to <emphasis>stack</emphasis> algorithms together, 
+                    because a container is passed in two parameters, so it is not possible to use 
+                    a return value from another algorithm. It is considerably easier to write
+                    <code>to_lower(str1)</code>, then <code>to_lower(str1.begin(), str1.end())</code>.
+                </para>
+                <para>
+                    The magic of <link linkend="string_algo.container_traits">container_traits</link> 
+                    provides a uniform way of handling different containers. 
+                    If there is a need to pass a pair of iterators, 
+                    <link linkend="string_algo.iterator_range"><code>iterator_range</code></link>
+                    can be used to package iterators into a structure with the container interface.
+                </para>
+            </listitem>
+            <listitem>
+                <para><emphasis role="bold">Copy vs. Mutable:</emphasis>
+                    Many algorithms in the library are performing a transformation of the input. 
+                    The transformation can be done in-place, mutating the input sequence, or a copy 
+                    of the transformed input can be created, leaving the input intact. None of 
+                    these possibilities is superior to the other one and both have different 
+                    advantages and disadvantages. For this reason, both are provided with the library. 
+                </para>
+            </listitem>
+            <listitem>
+                <para><emphasis role="bold">Algorithm stacking:</emphasis>
+                    Copy versions return a transformed input as a result. Mutable variants return 
+                    a reference to the input. Thus both versions allow a simple chaining of
+                    transformations within one expression (i.e. one can write 
+                    <code>trim_copy(to_upper_copy(s))</code> as well as <code>trim(to_upper(s))</code>). 
+                </para>
+            </listitem>
+            <listitem>
+                <para><emphasis role="bold">Naming:</emphasis>
+                    Naming follows the conventions from the Standard C++ Library. If there is a 
+                    copy and mutable version of the same algorithm, the mutable version has no suffix 
+                    and the copy version has suffix <emphasis>_copy</emphasis>. 
+                    Some algorithms have prefix <emphasis>i</emphasis> 
+                    (e.g. <functionname>ifind_first()</functionname>).
+                    This prefix identifies that the algorithm works in a case-insensitive manner.
+                </para>
+            </listitem>
+        </itemizedlist>
+        <para>
+            To use the library, include the <headername>boost/string_algo.hpp</headername> header. 
+            If the regex related functions are needed, include the 
+            <headername>boost/string_algo_regex.hpp</headername> header.
+        </para>
+    </section>
+    <section>
+        <title>Case conversion</title>
+        
+        <para>
+            STL has a nice way of converting character case. Unfortunately, it works only
+            for a single character and we want to convert a string, 
+        </para>
+        <programlisting>
+    string str1("HeLlO WoRld!");
+    to_upper(str1); // str1=="HELLO WORLD!"
+        </programlisting>
+        <para>
+            <functionname>to_upper()</functionname> and <functionname>to_lower()</functionname> convert the case of 
+            characters in a container using a specified locale.
+        </para>
+    </section>
+    <section>
+        <title>Predicates and Classification</title>
+        <para>
+            A part of the library deals with string related predicates. Consider this example:
+        </para>
+        <programlisting>
+    bool is_executable( string&amp; filename )
+    {
+        return 
+            iends_with(filename, ".exe") ||
+            iends_with(filename, ".com");
+    }
+
+    // ...
+    string str1("command.com");
+    cout 
+        &lt;&lt; str1
+        &lt;&lt; is_executable("command.com")? "is": "is not" 
+        &lt;&lt; "an executable" 
+        &lt;&lt; endl; // prints "command.com is an executable"
+    
+    //..
+    char text1[]="hello world!";
+    cout 
+        &lt;&lt; text1 
+        &lt;&lt; all( text1, is_lower&lt;char&gt;() )? "is": "is not"
+        &lt;&lt; "written in the lower case" 
+        &lt;&lt; endl; // prints "hello world! is written in the lower case"
+        </programlisting>
+        <para>
+            The predicates are resolving if a substring is contained in the input string
+            under various conditions. The conditions are if a string starts with the substring, 
+            ends with the substring, 
+            simply contains the substring or if both strings are equal. See the reference for 
+            <headername>boost/string_algo/predicate.hpp</headername> for more details. 
+            In addition the algorithm <functionname>all()</functionname> checks
+            all elements of a container to satisfy a condition specified by a predicate. 
+            This predicate can be any unary predicate, but the library provides a bunch of 
+            useful string-related predicates ready for use.
+            These are located in the <headername>boost/string_algo/classification.hpp</headername> header.
+        </para>
+    </section>
+    <section>
+        <title>Trimming</title>
+        
+        <para>
+            When parsing the input of a user, strings usually have unwanted leading or trailing 
+            characters. To get rid of them, we need trim functions:
+        </para>
+        <programlisting>
+    string str1="     hello world!     ";
+    string str2=trim_left_copy(str1);   // str2 == "hello world!     "
+    string str3=trim_right_copy(str2);  // str3 == "     hello world!"
+    trim(str1);                         // str1 == "hello world!"
+
+    string phone="00423333444";
+    // remove leading 0 from the phone number
+    sa::trim_left(phone,is_any_of&lt;char&gt;("0")); // phone == "423333444"
+        </programlisting>
+        <para>
+            It is possible to trim the spaces on the right, on the left or on the both sides of a string.
+            And for those cases when there is a need to remove something else than blank space, the
+            <code>string_algo</code> namespace contains generic versions of the trim algorithms. Using these, 
+            a user can specify a functor which will select the <emphasis>space</emphasis> to be removed. It is possible to use 
+            classification predicates like <functionname>is_digit()</functionname> mentioned in the previous paragraph.
+            See the reference for the <headername>boost/string_algo/trim.hpp</headername>.
+        </para>
+    </section>
+    <section>
+        <title>Find algorithms</title>
+        
+        <para>
+            The library contains a set of find algorithms. Here is an example:
+        </para>
+        <programlisting>
+    char text[]="hello dolly!";
+    iterator_range&lt;char*&gt; result=find_last(text,"ll");
+
+    transform( result.begin(), result.end(), result.begin(), bind2nd(plus&lt;char&gt;(), 1) );
+    // text = "hello dommy!"            
+
+    to_upper(result); // text == "hello doMMy!"
+        </programlisting>
+        <para>
+            We have used <functionname>find_last()</functionname> to search the <code>text</code> for "ll".
+            The result is given in the <link linkend="string_algo.iterator_range"><code>iterator_range</code></link>. 
+            This range delimits the
+            part of the input which satisfies the find criteria. In our example it is the last occurrence of "ll".
+            
+            As we can see, input of the <functionname>find_last()</functionname> algorithm can be also 
+            char[] because this type is supported by 
+            <link linkend="string_algo.container_traits">container_traits</link>.
+
+            Following lines transform the result. Notice, that 
+            <link linkend="string_algo.iterator_range"><code>iterator_range</code></link> have familiar 
+            <code>begin()</code> and <code>end()</code> methods, so it can be used like any other STL container.
+        </para>
+    </section>
+    <section>
+        <title>Replace Algorithms</title>
+        <para>
+            Find algorithms can be used for searching for a specific part of the sequence. Replace goes one step
+            further. After a matching part is found, it is substituted with something else. The substitution is computed
+            from an original, using some transformation. 
+        </para>
+        <programlisting>
+    string str1="Hello  Dolly,   Hello World!"
+    replace_first(str1, "Dolly", "Jane");      // str1 == "Hello  Jane,   Hello World!"
+    replace_last(str1, "Hello", "Goodbye");    // str1 == "Hello  Jane,   Goodbye World!"
+    erase_all(str1, " ");                      // str1 == "HelloJane,GoodbyeWorld!"
+    erase_head(str1, 6);                       // str1 == "Jane,GoodbyeWorld!"
+        </programlisting>
+        <para>
+            For the complete list of replace and erase functions see the 
+            <link linkend="string_algo.reference">reference</link>.
+            There is a lot of predefined function for common usage, however, the library allows you to 
+            define a custom <code>replace()</code> that suits a specific need. There is a generic <functionname>replace()</functionname> 
+            function which takes two parameters.
+            The first one is a <link linkend="string_algo.finder_concept">Finder</link> object, the second one is 
+            a <link linkend="string_algo.formatter_concept">Formatter</link> object. 
+            The Finder object is a functor which performs the searching for the replacement part. The Formatter object
+            takes the result of the Finder (usually a reference to the found substring) and creates a 
+            substitute for it. Replace algorithm puts these two together and makes the desired substitution. 
+        </para>
+    </section>
+    <section>
+        <title>Split</title>
+
+        <para>
+            Split algorithms allow one to divide a sequence into parts. Each part represents a 
+            <emphasis>token</emphasis> and  tokens are separated by <emphasis>separators</emphasis>. 
+            One can either search for tokens or search for separators:
+        </para>
+
+        <programlisting>
+    string str1("hello abc-*-ABC-*-aBc goodbye");
+
+    typedef vector&lt; iterator_range&lt;string::iterator&gt; &gt; find_vector_type;
+    
+    find_vector_type FindVec; // #1: Search for separators
+    ifind_all( FindVec, str1, "abc" ); // FindVec == { [abc],[ABC],[aBc] }
+
+    typdef vector&lt; string &gt; split_vector_type;
+    
+    split_vector_type SplitVec; // #2: Search for tokens
+    split( SplitVec, str1, is_any_of&lt;char&gt;("-*") ); // SplitVec == { "hello abc","ABC","aBc goodbye" }
+        </programlisting>
+        <para>
+            <code>[hello]</code> designates an <code>iterator_range</code> delimiting this substring.                       
+        </para>
+        <para>
+            The result of a split algorithm is a <emphasis>container of containers</emphasis>. There is only one restriction:
+            The inner container type must be able to hold extracted parts of the input sequence. This example 
+            shows the special case where the inner container is an 
+            <link linkend="string_algo.iterator_range"><code>iterator_range</code></link> 
+            instead of e.g. <code>std::string</code>. This way, a user gets a reference 
+            (in the form of iterators) delimiting the parts of the input sequence. Otherwise, a copy of 
+            each extracted part is created and added to the outer container.
+        </para>
+        <para>
+            So to recap, there are two basic algorithms: <functionname>find_all()</functionname> 
+            returns extracts the parts
+            matching the specification whereas <functionname>split()</functionname> uses the matching 
+            parts as delimiters, and extracts the parts in between them. 
+        </para>
+        <para>
+            Generalizations of these two algorithms are called <functionname>iter_find()</functionname> and 
+            <functionname>iter_split()</functionname>. They take a 
+            <link linkend="string_algo.finder_concept">Finder</link> object, as an argument to search for 
+            the substring. 
+        </para>     
+    </section>
+</section>