diff --git a/include/boost/algorithm/hex.hpp b/include/boost/algorithm/hex.hpp new file mode 100644 index 0000000..21aad9a --- /dev/null +++ b/include/boost/algorithm/hex.hpp @@ -0,0 +1,278 @@ +/* + Copyright (c) Marshall Clow 2011-2012. + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + + Thanks to Nevin for his comments/help. +*/ + +/* + General problem - turn a sequence of integral types into a sequence of hexadecimal characters. + - and back. + +TO DO: + 1. these should really only work on integral types. (see the >> and << operations) + -- this is done, I think. + 2. The 'value_type_or_char' struct is really a hack. + -- but it's a better hack now that it works with back_insert_iterators +*/ + +/// \file hex.hpp +/// \brief Convert sequence of integral types into a sequence of hexadecimal +/// characters and back. Based on the MySQL functions HEX and UNHEX +/// \author Marshall Clow + +#ifndef BOOST_ALGORITHM_HEXHPP +#define BOOST_ALGORITHM_HEXHPP + +#include // for std::iterator_traits +#include + +#include +#include +#include + +#include +#include + + +namespace boost { namespace algorithm { + +/*! + \struct hex_decode_error + \brief Base exception class for all hex decoding errors + + \struct non_hex_input + \brief Thrown when a non-hex value (0-9, A-F) encountered when decoding. + Contains the offending character + + \struct not_enough_input + \brief Thrown when the input sequence unexpectedly ends + +*/ +struct hex_decode_error: virtual boost::exception, virtual std::exception {}; +struct not_enough_input : public hex_decode_error {}; +struct non_hex_input : public hex_decode_error { + non_hex_input ( char ch ) : bad_char ( ch ) {} + char bad_char; +private: + non_hex_input (); // don't allow creation w/o a char + }; + +namespace detail { +/// \cond DOXYGEN_HIDE + + template + OutputIterator encode_one ( T val, OutputIterator out ) { + const std::size_t num_hex_digits = 2 * sizeof ( T ); + char res [ num_hex_digits ]; + char *p = res + num_hex_digits; + for ( std::size_t i = 0; i < num_hex_digits; ++i, val >>= 4 ) + *--p = "0123456789ABCDEF" [ val & 0x0F ]; + return std::copy ( res, res + num_hex_digits, out ); + } + + unsigned hex_char_to_int ( char c ) { + if ( c >= '0' && c <= '9' ) return c - '0'; + if ( c >= 'A' && c <= 'F' ) return c - 'A' + 10; + if ( c >= 'a' && c <= 'f' ) return c - 'a' + 10; + BOOST_THROW_EXCEPTION (non_hex_input (c)); + return 0; // keep dumb compilers happy + } + + +// My own iterator_traits class. +// It is here so that I can "reach inside" some kinds of output iterators +// and get the type to write. + template + struct hex_iterator_traits { + typedef typename std::iterator_traits::value_type value_type; + }; + + template + struct hex_iterator_traits< std::back_insert_iterator > { + typedef typename Container::value_type value_type; + }; + + template + struct hex_iterator_traits< std::front_insert_iterator > { + typedef typename Container::value_type value_type; + }; + + template + struct hex_iterator_traits< std::insert_iterator > { + typedef typename Container::value_type value_type; + }; + +// ostream_iterators have three template parameters. +// The first one is the output type, the second one is the character type of +// the underlying stream, the third is the character traits. +// We only care about the first one. + template + struct hex_iterator_traits< std::ostream_iterator > { + typedef T value_type; + }; + +// Output Iterators have a value type of 'void'. Kinda sucks. +// We special case some output iterators, but we can't enumerate them all. +// If we can't figure it out, we assume that you want to output chars. +// If you don't, pass in an iterator with a real value_type. + template struct value_type_or_char { typedef T value_type; }; + template <> struct value_type_or_char { typedef char value_type; }; + +// All in one step + template + struct iterator_value_type { +// typedef typename value_type_or_char::value_type>::value_type value_type; + typedef typename hex_iterator_traits::value_type value_type; + }; + +// What can we assume here about the inputs? +// is std::iterator_traits::value_type always 'char' ? +// Could it be wchar_t, say? Does it matter? +// We are assuming ASCII for the values - but what about the storage? + template + typename boost::enable_if::value_type>, OutputIterator>::type + decode_one ( InputIterator &first, InputIterator last, OutputIterator out ) { + typedef typename iterator_value_type::value_type T; + T res (0); + + // Need to make sure that we get can read that many chars here. + for ( std::size_t i = 0; i < 2 * sizeof ( T ); ++i, ++first ) { + if ( first == last ) + BOOST_THROW_EXCEPTION (not_enough_input ()); + res = ( 16 * res ) + hex_char_to_int (static_cast (*first)); + } + + *out = res; + return ++out; + } +/// \endcond + } + + +/// \fn hex ( InputIterator first, InputIterator last, OutputIterator out ) +/// \brief Converts a sequence of integral types into a hexadecimal sequence of characters. +/// +/// \param first The start of the input sequence +/// \param last One past the end of the input sequence +/// \param out An output iterator to the results into +/// \note Based on the MySQL function of the same name +template +typename boost::enable_if::value_type>, OutputIterator>::type +hex ( InputIterator first, InputIterator last, OutputIterator out ) { + for ( ; first != last; ++first ) + out = detail::encode_one ( *first, out ); + return out; + } + + +/// \fn hex ( const T *ptr, OutputIterator out ) +/// \brief Converts a sequence of integral types into a hexadecimal sequence of characters. +/// +/// \param ptr A pointer to a 0-terminated sequence of data. +/// \param out An output iterator to the results into +/// \return The updated output iterator +/// \note Based on the MySQL function of the same name +template +typename boost::enable_if, OutputIterator>::type +hex ( const T *ptr, OutputIterator out ) { + while ( *ptr ) + out = detail::encode_one ( *ptr++, out ); + return out; + } + +/// \fn hex ( const Range &r, OutputIterator out ) +/// \brief Converts a sequence of integral types into a hexadecimal sequence of characters. +/// +/// \param r The input range +/// \param out An output iterator to the results into +/// \return The updated output iterator +/// \note Based on the MySQL function of the same name +template +typename boost::enable_if::value_type>, OutputIterator>::type +hex ( const Range &r, OutputIterator out ) { + return hex (boost::begin(r), boost::end(r), out); +} + + +/// \fn unhex ( InputIterator first, InputIterator last, OutputIterator out ) +/// \brief Converts a sequence of hexadecimal characters into a sequence of integers. +/// +/// \param first The start of the input sequence +/// \param last One past the end of the input sequence +/// \param out An output iterator to the results into +/// \return The updated output iterator +/// \note Based on the MySQL function of the same name +template +OutputIterator unhex ( InputIterator first, InputIterator last, OutputIterator out ) { + while ( first != last ) + out = detail::decode_one ( first, last, out ); + return out; + } + + +/// \fn unhex ( const T *ptr, OutputIterator out ) +/// \brief Converts a sequence of hexadecimal characters into a sequence of integers. +/// +/// \param ptr A pointer to a null-terminated input sequence. +/// \param out An output iterator to the results into +/// \return The updated output iterator +/// \note Based on the MySQL function of the same name +template +OutputIterator unhex ( const T *ptr, OutputIterator out ) { + typedef typename detail::iterator_value_type::value_type OutputType; +// If we run into the terminator while decoding, we will throw a +// malformed input exception. It would be nicer to throw a 'Not enough input' +// exception - but how much extra work would that require? +// I just make up an "end iterator" which we will never get to - +// two Ts per byte of the output type. + while ( *ptr ) + out = detail::decode_one ( ptr, ptr + 2 * sizeof(OutputType), out ); + return out; + } + + +/// \fn unhex ( const Range &r, OutputIterator out ) +/// \brief Converts a sequence of hexadecimal characters into a sequence of integers. +/// +/// \param r The input range +/// \param out An output iterator to the results into +/// \return The updated output iterator +/// \note Based on the MySQL function of the same name +template +OutputIterator unhex ( const Range &r, OutputIterator out ) { + return unhex (boost::begin(r), boost::end(r), out); + } + + +/// \fn hex ( const String &input ) +/// \brief Converts a sequence of integral types into a hexadecimal sequence of characters. +/// +/// \param input A container to be converted +/// \return A container with the encoded text +template +String hex ( const String &input ) { + String output; + output.reserve (input.size () * (2 * sizeof (typename String::value_type))); + (void) hex (input, std::back_inserter (output)); + return output; + } + +/// \fn unhex ( const String &input ) +/// \brief Converts a sequence of hexadecimal characters into a sequence of integers. +/// +/// \param input A container to be converted +/// \return A container with the decoded text +template +String unhex ( const String &input ) { + String output; + output.reserve (input.size () / (2 * sizeof (typename String::value_type))); + (void) unhex (input, std::back_inserter (output)); + return output; + } + +}} + +#endif // BOOST_ALGORITHM_HEXHPP diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index 24c8ca7..7e9292d 100755 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -39,6 +39,11 @@ import testing ; [ run is_partitioned_test1.cpp : : : : is_partitioned_test1 ] [ run partition_copy_test1.cpp : : : : partition_copy_test1 ] +# Hex tests + [ run hex_test1.cpp : : : : hex_test1 ] + [ run hex_test2.cpp : : : : hex_test2 ] + [ run hex_test3.cpp : : : : hex_test3 ] + [ compile-fail hex_fail1.cpp ] ; } diff --git a/test/hex_fail1.cpp b/test/hex_fail1.cpp new file mode 100644 index 0000000..7192245 --- /dev/null +++ b/test/hex_fail1.cpp @@ -0,0 +1,25 @@ +/* + Copyright (c) Marshall Clow 2011-2012. + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + + For more information, see http://www.boost.org +*/ + +#include +#include +#include + +#include +#include +#include + +// should not compile: vector is not an integral type +int test_main( int , char* [] ) +{ + std::vector v; + std::string out; + boost::algorithm::unhex ( out, std::back_inserter(v)); + return 0; +} diff --git a/test/hex_test1.cpp b/test/hex_test1.cpp new file mode 100644 index 0000000..891afe4 --- /dev/null +++ b/test/hex_test1.cpp @@ -0,0 +1,154 @@ +/* + Copyright (c) Marshall Clow 2011-2012. + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + + For more information, see http://www.boost.org +*/ + +#include +#include +#include + +#include +#include + + +template +void test_to_hex ( const typename String::value_type ** tests ) { + for ( const typename String::value_type **p = tests; *p; p++ ) { + String arg, argh, one, two, three, four; + arg.assign ( *p ); + boost::algorithm::hex ( *p, std::back_inserter ( one )); + boost::algorithm::hex ( arg, std::back_inserter ( two )); + boost::algorithm::hex ( arg.begin (), arg.end (), std::back_inserter ( three )); + four = boost::algorithm::hex ( arg ); + BOOST_CHECK ( one == two ); + BOOST_CHECK ( one == three ); + BOOST_CHECK ( one == four ); + argh = one; + one.clear (); two.clear (); three.clear (); four.clear (); + boost::algorithm::unhex ( argh.c_str (), std::back_inserter ( one )); + boost::algorithm::unhex ( argh, std::back_inserter ( two )); + boost::algorithm::unhex ( argh.begin (), argh.end (), std::back_inserter ( three )); + four = boost::algorithm::unhex ( argh ); + BOOST_CHECK ( one == two ); + BOOST_CHECK ( one == three ); + BOOST_CHECK ( one == four ); + BOOST_CHECK ( one == arg ); + } + } + + +template +void test_from_hex_success ( const typename String::value_type ** tests ) { + for ( const typename String::value_type **p = tests; *p; p++ ) { + String arg, argh, one, two, three, four; + arg.assign ( *p ); + boost::algorithm::unhex ( *p, std::back_inserter ( one )); + boost::algorithm::unhex ( arg, std::back_inserter ( two )); + boost::algorithm::unhex ( arg.begin (), arg.end (), std::back_inserter ( three )); + four = boost::algorithm::unhex ( arg ); + BOOST_CHECK ( one == two ); + BOOST_CHECK ( one == three ); + BOOST_CHECK ( one == four ); + argh = one; + one.clear (); two.clear (); three.clear (); four.clear (); + boost::algorithm::hex ( argh.c_str (), std::back_inserter ( one )); + boost::algorithm::hex ( argh, std::back_inserter ( two )); + boost::algorithm::hex ( argh.begin (), argh.end (), std::back_inserter ( three )); + four = boost::algorithm::hex ( argh ); + BOOST_CHECK ( one == two ); + BOOST_CHECK ( one == three ); + BOOST_CHECK ( one == four ); + BOOST_CHECK ( one == arg ); + } + } + +template +void test_from_hex_failure ( const typename String::value_type ** tests ) { + int num_catches; + for ( const typename String::value_type **p = tests; *p; p++ ) { + String arg, one; + arg.assign ( *p ); + num_catches = 0; + + try { boost::algorithm::unhex ( *p, std::back_inserter ( one )); } + catch ( const boost::algorithm::hex_decode_error & /*ex*/ ) { num_catches++; } + try { boost::algorithm::unhex ( arg, std::back_inserter ( one )); } + catch ( const boost::algorithm::hex_decode_error & /*ex*/ ) { num_catches++; } + try { boost::algorithm::unhex ( arg.begin (), arg.end (), std::back_inserter ( one )); } + catch ( const boost::algorithm::hex_decode_error & /*ex*/ ) { num_catches++; } + BOOST_CHECK ( num_catches == 3 ); + } + } + + + +const char *tohex [] = { + "", + "a", + "\001", + "12", + "asdfadsfsad", + "01234567890ABCDEF", + NULL // End of the list + }; + + +const wchar_t *tohex_w [] = { + L"", + L"a", + L"\001", + L"12", + L"asdfadsfsad", + L"01234567890ABCDEF", + NULL // End of the list + }; + + +const char *fromhex [] = { + "20", + "2122234556FF", + NULL // End of the list + }; + + +const wchar_t *fromhex_w [] = { + L"00101020", + L"2122234556FF3456", + NULL // End of the list + }; + + +const char *fromhex_fail [] = { + "2", + "H", + "234", + "21222G4556FF", + NULL // End of the list + }; + + +const wchar_t *fromhex_fail_w [] = { + L"2", + L"12", + L"H", + L"234", + L"21222G4556FF", + NULL // End of the list + }; + + +int test_main( int , char* [] ) +{ + test_to_hex ( tohex ); + test_from_hex_success ( fromhex ); + test_from_hex_failure ( fromhex_fail ); + + test_to_hex ( tohex_w ); + test_from_hex_success ( fromhex_w ); + test_from_hex_failure ( fromhex_fail_w ); + return 0; +} diff --git a/test/hex_test2.cpp b/test/hex_test2.cpp new file mode 100644 index 0000000..7092154 --- /dev/null +++ b/test/hex_test2.cpp @@ -0,0 +1,137 @@ +/* + Copyright (c) Marshall Clow 2011-2012. + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + + For more information, see http://www.boost.org + +Test non-string cases; vector and list +*/ + +#include +#include +#include + +#include +#include +#include +#include + + +const char *tohex [] = { + "", + "a", + "\001", + "12", + "asdfadsfsad", + "01234567890ABCDEF", + NULL // End of the list + }; + +void test_to_hex () { + for ( const char **p = tohex; *p; p++ ) { + std::deque arg, argh; + std::list one, two, three; + arg.assign ( *p, *p + strlen (*p)); + boost::algorithm::hex ( *p, std::back_inserter ( one )); + boost::algorithm::hex ( arg, std::back_inserter ( two )); + boost::algorithm::hex ( arg.begin (), arg.end (), std::back_inserter ( three )); + BOOST_CHECK ( std::equal ( one.begin (), one.end (), two.begin ())); + BOOST_CHECK ( std::equal ( two.begin (), two.end (), three.begin ())); + + std::copy ( one.begin (), one.end (), std::back_inserter ( argh )); + one.clear (); two.clear (); three.clear (); + +// boost::algorithm::unhex ( argh.c_str (), std::back_inserter ( one )); + boost::algorithm::unhex ( argh, std::back_inserter ( two )); + boost::algorithm::unhex ( argh.begin (), argh.end (), std::back_inserter ( three )); +// BOOST_CHECK ( std::equal ( one.begin (), one.end (), two.begin ())); + BOOST_CHECK ( std::equal ( two.begin (), two.end (), three.begin ())); + BOOST_CHECK ( std::equal ( two.begin (), two.end (), arg.begin ())); + } + +// Again, with a front_inserter + for ( const char **p = tohex; *p; p++ ) { + std::deque arg, argh; + std::list one, two, three; + arg.assign ( *p, *p + strlen (*p)); + boost::algorithm::hex ( *p, std::front_inserter ( one )); + boost::algorithm::hex ( arg, std::front_inserter ( two )); + boost::algorithm::hex ( arg.begin (), arg.end (), std::front_inserter ( three )); + BOOST_CHECK ( std::equal ( one.begin (), one.end (), two.begin ())); + BOOST_CHECK ( std::equal ( two.begin (), two.end (), three.begin ())); + + // Copy, reversing + std::copy ( one.begin (), one.end (), std::front_inserter ( argh )); + one.clear (); two.clear (); three.clear (); + +// boost::algorithm::unhex ( argh.c_str (), std::front_inserter ( one )); + boost::algorithm::unhex ( argh, std::front_inserter ( two )); + boost::algorithm::unhex ( argh.begin (), argh.end (), std::front_inserter ( three )); +// BOOST_CHECK ( std::equal ( one.begin (), one.end (), two.begin ())); + BOOST_CHECK ( std::equal ( two.begin (), two.end (), three.begin ())); + BOOST_CHECK ( std::equal ( two.begin (), two.end (), arg.rbegin ())); // reverse + } + } + +const char *fromhex [] = { + "20", + "2122234556FF", + NULL // End of the list + }; + + +void test_from_hex_success () { + for ( const char **p = fromhex; *p; p++ ) { + std::deque arg, argh; + std::list one, two, three; + arg.assign ( *p, *p + strlen (*p)); + boost::algorithm::unhex ( *p, std::back_inserter ( one )); + boost::algorithm::unhex ( arg, std::back_inserter ( two )); + boost::algorithm::unhex ( arg.begin (), arg.end (), std::back_inserter ( three )); + BOOST_CHECK ( std::equal ( one.begin (), one.end (), two.begin ())); + BOOST_CHECK ( std::equal ( two.begin (), two.end (), three.begin ())); + + std::copy ( one.begin (), one.end (), std::back_inserter ( argh )); + one.clear (); two.clear (); three.clear (); + +// boost::algorithm::hex ( argh.c_str (), std::back_inserter ( one )); + boost::algorithm::hex ( argh, std::back_inserter ( two )); + boost::algorithm::hex ( argh.begin (), argh.end (), std::back_inserter ( three )); +// BOOST_CHECK ( std::equal ( one.begin (), one.end (), two.begin ())); + BOOST_CHECK ( std::equal ( two.begin (), two.end (), three.begin ())); + BOOST_CHECK ( std::equal ( two.begin (), two.end (), arg.begin ())); + } + +// Again, with a front_inserter + for ( const char **p = fromhex; *p; p++ ) { + std::deque arg, argh; + std::list one, two, three; + arg.assign ( *p, *p + strlen (*p)); + boost::algorithm::unhex ( *p, std::front_inserter ( one )); + boost::algorithm::unhex ( arg, std::front_inserter ( two )); + boost::algorithm::unhex ( arg.begin (), arg.end (), std::front_inserter ( three )); + BOOST_CHECK ( std::equal ( one.begin (), one.end (), two.begin ())); + BOOST_CHECK ( std::equal ( two.begin (), two.end (), three.begin ())); + + // Copy, reversing + std::copy ( one.begin (), one.end (), std::front_inserter ( argh )); + one.clear (); two.clear (); three.clear (); + +// boost::algorithm::hex ( argh.c_str (), std::front_inserter ( one )); + boost::algorithm::hex ( argh, std::front_inserter ( two )); + boost::algorithm::hex ( argh.begin (), argh.end (), std::front_inserter ( three )); +// BOOST_CHECK ( std::equal ( one.begin (), one.end (), two.begin ())); + BOOST_CHECK ( std::equal ( two.begin (), two.end (), three.begin ())); + BOOST_CHECK ( std::equal ( two.begin (), two.end (), arg.rbegin ())); // reversed + } + } + + +int test_main( int , char* [] ) +{ + test_to_hex (); + test_from_hex_success (); + return 0; +} diff --git a/test/hex_test3.cpp b/test/hex_test3.cpp new file mode 100644 index 0000000..cb91d95 --- /dev/null +++ b/test/hex_test3.cpp @@ -0,0 +1,123 @@ +/* + Copyright (c) Marshall Clow 2011-2012. + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + + For more information, see http://www.boost.org + +Try ostream_iterators +*/ + +#include +#include +#include + +#include +#include +#include +#include + + +template +void test_to_hex ( const char_type ** tests ) { + typedef std::basic_string String; + typedef std::basic_ostringstream Stream; + typedef std::ostream_iterator Iter; + + for ( const char_type **p = tests; *p; p++ ) { + String arg, argh; + Stream one, two, three; + arg.assign ( *p ); + boost::algorithm::hex ( *p, Iter ( one )); + boost::algorithm::hex ( arg, Iter ( two )); + boost::algorithm::hex ( arg.begin (), arg.end (), Iter ( three )); + boost::algorithm::hex ( arg ); + BOOST_CHECK ( one.str () == two.str ()); + BOOST_CHECK ( one.str () == three.str ()); + argh = one.str (); + one.str (String()); two.str (String()); three.str (String()); + boost::algorithm::unhex ( argh.c_str (), Iter ( one )); + boost::algorithm::unhex ( argh, Iter ( two )); + boost::algorithm::unhex ( argh.begin (), argh.end (), Iter ( three )); + BOOST_CHECK ( one.str () == two.str ()); + BOOST_CHECK ( one.str () == three.str ()); + BOOST_CHECK ( one.str () == arg ); + } + } + + +template +void test_from_hex_success ( const char_type ** tests ) { + typedef std::basic_string String; + typedef std::basic_ostringstream Stream; + typedef std::ostream_iterator Iter; + + for ( const char_type **p = tests; *p; p++ ) { + String arg, argh; + Stream one, two, three; + arg.assign ( *p ); + boost::algorithm::unhex ( *p, Iter ( one )); + boost::algorithm::unhex ( arg, Iter ( two )); + boost::algorithm::unhex ( arg.begin (), arg.end (), Iter ( three )); + + BOOST_CHECK ( one.str () == two.str ()); + BOOST_CHECK ( one.str () == three.str ()); + + argh = one.str (); + one.str (String()); two.str (String()); three.str (String()); + + boost::algorithm::hex ( argh.c_str (), Iter ( one )); + boost::algorithm::hex ( argh, Iter ( two )); + boost::algorithm::hex ( argh.begin (), argh.end (), Iter ( three )); + + BOOST_CHECK ( one.str () == two.str ()); + BOOST_CHECK ( one.str () == three.str ()); + BOOST_CHECK ( one.str () == arg ); + } + + } + +const char *tohex [] = { + "", + "a", + "\001", + "12", + "asdfadsfsad", + "01234567890ABCDEF", + NULL // End of the list + }; + +const wchar_t *tohex_w [] = { + L"", + L"a", + L"\001", + L"12", + L"asdfadsfsad", + L"01234567890ABCDEF", + NULL // End of the list + }; + + +const char *fromhex [] = { + "20", + "2122234556FF", + NULL // End of the list + }; + +const wchar_t *fromhex_w [] = { + L"11223320", + L"21222345010256FF", + NULL // End of the list + }; + + + +int test_main( int , char* [] ) +{ + test_to_hex ( tohex ); + test_to_hex ( tohex_w ); + test_from_hex_success ( fromhex ); + test_from_hex_success ( fromhex_w ); + return 0; +}