| 
									
										
										
										
											2004-03-04 22:12:19 +00:00
										 |  |  | //  Boost string_algo library example file  ---------------------------------//
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | //  Copyright Pavol Droba 2002-2003. Use, modification and
 | 
					
						
							|  |  |  | //  distribution is subject to the Boost Software License, Version
 | 
					
						
							|  |  |  | //  1.0. (See accompanying file LICENSE_1_0.txt or copy at
 | 
					
						
							|  |  |  | //  http://www.boost.org/LICENSE_1_0.txt)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | //  See http://www.boost.org for updates, documentation, and revision history.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |     RLE compression using replace framework. Goal is to compress a sequence of | 
					
						
							|  |  |  |     repeating characters into 3 bytes ( repeat mark, character and repetition count ). | 
					
						
							|  |  |  |     For simplification, it works only on numeric-value sequences. | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <string>
 | 
					
						
							|  |  |  | #include <iostream>
 | 
					
						
							|  |  |  | #include <limits>
 | 
					
						
							|  |  |  | #include <boost/detail/iterator.hpp>
 | 
					
						
							|  |  |  | #include <boost/algorithm/string/find_format.hpp>
 | 
					
						
							|  |  |  | #include <boost/algorithm/string/finder.hpp>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | using namespace std; | 
					
						
							|  |  |  | using namespace boost; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // replace mark specification, specialize for a specific element type
 | 
					
						
							| 
									
										
										
										
											2005-07-13 13:35:30 +00:00
										 |  |  | template< typename T > T repeat_mark() { return (std::numeric_limits<T>::max)(); }; | 
					
						
							| 
									
										
										
										
											2004-03-04 22:12:19 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | // Compression  -----------------------------------------------------------------------
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // compress finder -rle
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |     Find a sequence which can be compressed. It has to be at least 3-character long | 
					
						
							|  |  |  |     sequence of repetitive characters  | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | struct find_compressF  | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     // Construction
 | 
					
						
							|  |  |  |     find_compressF() {} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Operation
 | 
					
						
							|  |  |  |     template<typename ForwardIteratorT> | 
					
						
							|  |  |  |     iterator_range<ForwardIteratorT> operator()(  | 
					
						
							|  |  |  |         ForwardIteratorT Begin,  | 
					
						
							|  |  |  |         ForwardIteratorT End ) const | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         typedef ForwardIteratorT input_iterator_type; | 
					
						
							|  |  |  |         typedef typename boost::detail::iterator_traits<input_iterator_type>::value_type value_type; | 
					
						
							|  |  |  |         typedef iterator_range<input_iterator_type> result_type; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // begin of the matching segment
 | 
					
						
							|  |  |  |         input_iterator_type MStart=End; | 
					
						
							|  |  |  |         // Repetition counter
 | 
					
						
							|  |  |  |         value_type Cnt=0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // Search for a sequence of repetitive characters
 | 
					
						
							|  |  |  |         for(input_iterator_type It=Begin; It!=End;) | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             input_iterator_type It2=It++; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2005-07-13 13:55:32 +00:00
										 |  |  |             if ( It==End || Cnt>=(std::numeric_limits<value_type>::max)() ) | 
					
						
							| 
									
										
										
										
											2004-03-04 22:12:19 +00:00
										 |  |  |             { | 
					
						
							|  |  |  |                 return result_type( MStart, It ); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             if ( *It==*It2 ) | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 if ( MStart==End ) | 
					
						
							|  |  |  |                 { | 
					
						
							|  |  |  |                     // Mark the start
 | 
					
						
							|  |  |  |                     MStart=It2; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 // Increate repetition counter
 | 
					
						
							|  |  |  |                 Cnt++; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             else | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 if ( MStart!=End ) | 
					
						
							|  |  |  |                 { | 
					
						
							|  |  |  |                     if ( Cnt>2 ) | 
					
						
							|  |  |  |                         return result_type( MStart, It ); | 
					
						
							|  |  |  |                     else | 
					
						
							|  |  |  |                     { | 
					
						
							|  |  |  |                         MStart=End; | 
					
						
							|  |  |  |                         Cnt=0; | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return result_type( End, End ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // rle compress format
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |     Transform a sequence into repeat mark, character and count  | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | template<typename SeqT> | 
					
						
							|  |  |  | struct format_compressF | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | private: | 
					
						
							|  |  |  |     typedef SeqT result_type; | 
					
						
							|  |  |  |     typedef typename SeqT::value_type value_type; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | public: | 
					
						
							|  |  |  |     // Construction
 | 
					
						
							|  |  |  |     format_compressF() {}; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Operation
 | 
					
						
							|  |  |  |     template< typename ReplaceT > | 
					
						
							|  |  |  |     result_type operator()( const ReplaceT& Replace ) const | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         SeqT r; | 
					
						
							| 
									
										
										
										
											2008-06-19 12:07:24 +00:00
										 |  |  |         if(!Replace.empty()) | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             r.push_back( repeat_mark<value_type>() ); | 
					
						
							|  |  |  |             r.push_back( *(Replace.begin()) ); | 
					
						
							|  |  |  |             r.push_back( value_type( Replace.size() ) ); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2004-03-04 22:12:19 +00:00
										 |  |  |         return r; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Decompression  -----------------------------------------------------------------------
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // find decompress-rle functor
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |     find a repetition block | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | struct find_decompressF | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     // Construction
 | 
					
						
							|  |  |  |     find_decompressF() {} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Operation
 | 
					
						
							|  |  |  |     template<typename ForwardIteratorT> | 
					
						
							|  |  |  |     iterator_range<ForwardIteratorT> operator()(  | 
					
						
							|  |  |  |         ForwardIteratorT Begin,  | 
					
						
							|  |  |  |         ForwardIteratorT End ) const | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         typedef ForwardIteratorT input_iterator_type; | 
					
						
							|  |  |  |         typedef typename boost::detail::iterator_traits<input_iterator_type>::value_type value_type; | 
					
						
							|  |  |  |         typedef iterator_range<input_iterator_type> result_type; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         for(input_iterator_type It=Begin; It!=End; It++) | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             if( *It==repeat_mark<value_type>() ) | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 // Repeat mark found, extract body
 | 
					
						
							|  |  |  |                 input_iterator_type It2=It++;  | 
					
						
							|  |  |  |                  | 
					
						
							|  |  |  |                 if ( It==End ) break; | 
					
						
							|  |  |  |                     It++;  | 
					
						
							|  |  |  |                 if ( It==End ) break; | 
					
						
							|  |  |  |                     It++; | 
					
						
							|  |  |  |                  | 
					
						
							|  |  |  |                 return result_type( It2, It ); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return result_type( End, End ); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // rle decompress format
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |     transform a repetition block into a sequence of characters | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | template< typename SeqT > | 
					
						
							|  |  |  | struct format_decompressF | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | private: | 
					
						
							|  |  |  |     typedef SeqT result_type; | 
					
						
							|  |  |  |     typedef typename SeqT::value_type value_type; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | public: | 
					
						
							|  |  |  |     // Construction
 | 
					
						
							|  |  |  |     format_decompressF() {}; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Operation
 | 
					
						
							|  |  |  |     template< typename ReplaceT > | 
					
						
							|  |  |  |     result_type operator()( const ReplaceT& Replace ) const | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         SeqT r; | 
					
						
							| 
									
										
										
										
											2008-06-19 12:07:24 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if(!Replace.empty()) | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             // extract info
 | 
					
						
							|  |  |  |             typename ReplaceT::const_iterator It=Replace.begin(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             value_type Value=*(++It); | 
					
						
							|  |  |  |             value_type Repeat=*(++It); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             for( value_type Index=0; Index<Repeat; Index++ ) r.push_back( Value ); | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2004-03-04 22:12:19 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |         return r; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int main() | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     cout << "* RLE Compression Example *" << endl << endl; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     string original("123_AA_*ZZZZZZZZZZZZZZ*34"); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // copy compression
 | 
					
						
							|  |  |  |     string compress=find_format_all_copy(  | 
					
						
							|  |  |  |         original,  | 
					
						
							|  |  |  |         find_compressF(),  | 
					
						
							|  |  |  |         format_compressF<string>() ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     cout << "Compressed string: " << compress << endl; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Copy decompression
 | 
					
						
							|  |  |  |     string decompress=find_format_all_copy(  | 
					
						
							|  |  |  |         compress,  | 
					
						
							|  |  |  |         find_decompressF(),  | 
					
						
							|  |  |  |         format_decompressF<string>() ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     cout << "Decompressed string: " << decompress << endl; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // in-place compression
 | 
					
						
							|  |  |  |     find_format_all(  | 
					
						
							|  |  |  |         original,  | 
					
						
							|  |  |  |         find_compressF(),  | 
					
						
							|  |  |  |         format_compressF<string>() ); | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |     cout << "Compressed string: " << original << endl; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // in-place decompression
 | 
					
						
							|  |  |  |     find_format_all(  | 
					
						
							|  |  |  |         original,  | 
					
						
							|  |  |  |         find_decompressF(),  | 
					
						
							|  |  |  |         format_decompressF<string>() ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     cout << "Decompressed string: " << original << endl; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     cout << endl; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } |