forked from catchorg/Catch2
		
	The ostream passed as reference to `hexEscapeChar` is manipulated and its original state not restored. This fixes it. Seen via coverity in a downstream project.
		
			
				
	
	
		
			287 lines
		
	
	
		
			8.7 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			287 lines
		
	
	
		
			8.7 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/*
 | 
						|
 *  Created by Phil on 19/07/2017.
 | 
						|
 *
 | 
						|
 *  Distributed under the Boost Software License, Version 1.0. (See accompanying
 | 
						|
 *  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 | 
						|
 */
 | 
						|
 | 
						|
#include "catch_xmlwriter.h"
 | 
						|
 | 
						|
#include "catch_enforce.h"
 | 
						|
 | 
						|
#include <iomanip>
 | 
						|
 | 
						|
using uchar = unsigned char;
 | 
						|
 | 
						|
namespace Catch {
 | 
						|
 | 
						|
namespace {
 | 
						|
 | 
						|
    size_t trailingBytes(unsigned char c) {
 | 
						|
        if ((c & 0xE0) == 0xC0) {
 | 
						|
            return 2;
 | 
						|
        }
 | 
						|
        if ((c & 0xF0) == 0xE0) {
 | 
						|
            return 3;
 | 
						|
        }
 | 
						|
        if ((c & 0xF8) == 0xF0) {
 | 
						|
            return 4;
 | 
						|
        }
 | 
						|
        CATCH_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered");
 | 
						|
    }
 | 
						|
 | 
						|
    uint32_t headerValue(unsigned char c) {
 | 
						|
        if ((c & 0xE0) == 0xC0) {
 | 
						|
            return c & 0x1F;
 | 
						|
        }
 | 
						|
        if ((c & 0xF0) == 0xE0) {
 | 
						|
            return c & 0x0F;
 | 
						|
        }
 | 
						|
        if ((c & 0xF8) == 0xF0) {
 | 
						|
            return c & 0x07;
 | 
						|
        }
 | 
						|
        CATCH_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered");
 | 
						|
    }
 | 
						|
 | 
						|
    void hexEscapeChar(std::ostream& os, unsigned char c) {
 | 
						|
        std::ios_base::fmtflags f(os.flags());
 | 
						|
        os << "\\x"
 | 
						|
            << std::uppercase << std::hex << std::setfill('0') << std::setw(2)
 | 
						|
            << static_cast<int>(c);
 | 
						|
        os.flags(f);
 | 
						|
    }
 | 
						|
 | 
						|
} // anonymous namespace
 | 
						|
 | 
						|
    XmlEncode::XmlEncode( std::string const& str, ForWhat forWhat )
 | 
						|
    :   m_str( str ),
 | 
						|
        m_forWhat( forWhat )
 | 
						|
    {}
 | 
						|
 | 
						|
    void XmlEncode::encodeTo( std::ostream& os ) const {
 | 
						|
        // Apostrophe escaping not necessary if we always use " to write attributes
 | 
						|
        // (see: http://www.w3.org/TR/xml/#syntax)
 | 
						|
 | 
						|
        for( std::size_t idx = 0; idx < m_str.size(); ++ idx ) {
 | 
						|
            uchar c = m_str[idx];
 | 
						|
            switch (c) {
 | 
						|
            case '<':   os << "<"; break;
 | 
						|
            case '&':   os << "&"; break;
 | 
						|
 | 
						|
            case '>':
 | 
						|
                // See: http://www.w3.org/TR/xml/#syntax
 | 
						|
                if (idx > 2 && m_str[idx - 1] == ']' && m_str[idx - 2] == ']')
 | 
						|
                    os << ">";
 | 
						|
                else
 | 
						|
                    os << c;
 | 
						|
                break;
 | 
						|
 | 
						|
            case '\"':
 | 
						|
                if (m_forWhat == ForAttributes)
 | 
						|
                    os << """;
 | 
						|
                else
 | 
						|
                    os << c;
 | 
						|
                break;
 | 
						|
 | 
						|
            default:
 | 
						|
                // Check for control characters and invalid utf-8
 | 
						|
 | 
						|
                // Escape control characters in standard ascii
 | 
						|
                // see http://stackoverflow.com/questions/404107/why-are-control-characters-illegal-in-xml-1-0
 | 
						|
                if (c < 0x09 || (c > 0x0D && c < 0x20) || c == 0x7F) {
 | 
						|
                    hexEscapeChar(os, c);
 | 
						|
                    break;
 | 
						|
                }
 | 
						|
 | 
						|
                // Plain ASCII: Write it to stream
 | 
						|
                if (c < 0x7F) {
 | 
						|
                    os << c;
 | 
						|
                    break;
 | 
						|
                }
 | 
						|
 | 
						|
                // UTF-8 territory
 | 
						|
                // Check if the encoding is valid and if it is not, hex escape bytes.
 | 
						|
                // Important: We do not check the exact decoded values for validity, only the encoding format
 | 
						|
                // First check that this bytes is a valid lead byte:
 | 
						|
                // This means that it is not encoded as 1111 1XXX
 | 
						|
                // Or as 10XX XXXX
 | 
						|
                if (c <  0xC0 ||
 | 
						|
                    c >= 0xF8) {
 | 
						|
                    hexEscapeChar(os, c);
 | 
						|
                    break;
 | 
						|
                }
 | 
						|
 | 
						|
                auto encBytes = trailingBytes(c);
 | 
						|
                // Are there enough bytes left to avoid accessing out-of-bounds memory?
 | 
						|
                if (idx + encBytes - 1 >= m_str.size()) {
 | 
						|
                    hexEscapeChar(os, c);
 | 
						|
                    break;
 | 
						|
                }
 | 
						|
                // The header is valid, check data
 | 
						|
                // The next encBytes bytes must together be a valid utf-8
 | 
						|
                // This means: bitpattern 10XX XXXX and the extracted value is sane (ish)
 | 
						|
                bool valid = true;
 | 
						|
                uint32_t value = headerValue(c);
 | 
						|
                for (std::size_t n = 1; n < encBytes; ++n) {
 | 
						|
                    uchar nc = m_str[idx + n];
 | 
						|
                    valid &= ((nc & 0xC0) == 0x80);
 | 
						|
                    value = (value << 6) | (nc & 0x3F);
 | 
						|
                }
 | 
						|
 | 
						|
                if (
 | 
						|
                    // Wrong bit pattern of following bytes
 | 
						|
                    (!valid) ||
 | 
						|
                    // Overlong encodings
 | 
						|
                    (value < 0x80) ||
 | 
						|
                    (0x80 <= value && value < 0x800   && encBytes > 2) ||
 | 
						|
                    (0x800 < value && value < 0x10000 && encBytes > 3) ||
 | 
						|
                    // Encoded value out of range
 | 
						|
                    (value >= 0x110000)
 | 
						|
                    ) {
 | 
						|
                    hexEscapeChar(os, c);
 | 
						|
                    break;
 | 
						|
                }
 | 
						|
 | 
						|
                // If we got here, this is in fact a valid(ish) utf-8 sequence
 | 
						|
                for (std::size_t n = 0; n < encBytes; ++n) {
 | 
						|
                    os << m_str[idx + n];
 | 
						|
                }
 | 
						|
                idx += encBytes - 1;
 | 
						|
                break;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    std::ostream& operator << ( std::ostream& os, XmlEncode const& xmlEncode ) {
 | 
						|
        xmlEncode.encodeTo( os );
 | 
						|
        return os;
 | 
						|
    }
 | 
						|
 | 
						|
    XmlWriter::ScopedElement::ScopedElement( XmlWriter* writer )
 | 
						|
    :   m_writer( writer )
 | 
						|
    {}
 | 
						|
 | 
						|
    XmlWriter::ScopedElement::ScopedElement( ScopedElement&& other ) noexcept
 | 
						|
    :   m_writer( other.m_writer ){
 | 
						|
        other.m_writer = nullptr;
 | 
						|
    }
 | 
						|
    XmlWriter::ScopedElement& XmlWriter::ScopedElement::operator=( ScopedElement&& other ) noexcept {
 | 
						|
        if ( m_writer ) {
 | 
						|
            m_writer->endElement();
 | 
						|
        }
 | 
						|
        m_writer = other.m_writer;
 | 
						|
        other.m_writer = nullptr;
 | 
						|
        return *this;
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
    XmlWriter::ScopedElement::~ScopedElement() {
 | 
						|
        if( m_writer )
 | 
						|
            m_writer->endElement();
 | 
						|
    }
 | 
						|
 | 
						|
    XmlWriter::ScopedElement& XmlWriter::ScopedElement::writeText( std::string const& text, bool indent ) {
 | 
						|
        m_writer->writeText( text, indent );
 | 
						|
        return *this;
 | 
						|
    }
 | 
						|
 | 
						|
    XmlWriter::XmlWriter( std::ostream& os ) : m_os( os )
 | 
						|
    {
 | 
						|
        writeDeclaration();
 | 
						|
    }
 | 
						|
 | 
						|
    XmlWriter::~XmlWriter() {
 | 
						|
        while( !m_tags.empty() )
 | 
						|
            endElement();
 | 
						|
    }
 | 
						|
 | 
						|
    XmlWriter& XmlWriter::startElement( std::string const& name ) {
 | 
						|
        ensureTagClosed();
 | 
						|
        newlineIfNecessary();
 | 
						|
        m_os << m_indent << '<' << name;
 | 
						|
        m_tags.push_back( name );
 | 
						|
        m_indent += "  ";
 | 
						|
        m_tagIsOpen = true;
 | 
						|
        return *this;
 | 
						|
    }
 | 
						|
 | 
						|
    XmlWriter::ScopedElement XmlWriter::scopedElement( std::string const& name ) {
 | 
						|
        ScopedElement scoped( this );
 | 
						|
        startElement( name );
 | 
						|
        return scoped;
 | 
						|
    }
 | 
						|
 | 
						|
    XmlWriter& XmlWriter::endElement() {
 | 
						|
        newlineIfNecessary();
 | 
						|
        m_indent = m_indent.substr( 0, m_indent.size()-2 );
 | 
						|
        if( m_tagIsOpen ) {
 | 
						|
            m_os << "/>";
 | 
						|
            m_tagIsOpen = false;
 | 
						|
        }
 | 
						|
        else {
 | 
						|
            m_os << m_indent << "</" << m_tags.back() << ">";
 | 
						|
        }
 | 
						|
        m_os << std::endl;
 | 
						|
        m_tags.pop_back();
 | 
						|
        return *this;
 | 
						|
    }
 | 
						|
 | 
						|
    XmlWriter& XmlWriter::writeAttribute( std::string const& name, std::string const& attribute ) {
 | 
						|
        if( !name.empty() && !attribute.empty() )
 | 
						|
            m_os << ' ' << name << "=\"" << XmlEncode( attribute, XmlEncode::ForAttributes ) << '"';
 | 
						|
        return *this;
 | 
						|
    }
 | 
						|
 | 
						|
    XmlWriter& XmlWriter::writeAttribute( std::string const& name, bool attribute ) {
 | 
						|
        m_os << ' ' << name << "=\"" << ( attribute ? "true" : "false" ) << '"';
 | 
						|
        return *this;
 | 
						|
    }
 | 
						|
 | 
						|
    XmlWriter& XmlWriter::writeText( std::string const& text, bool indent ) {
 | 
						|
        if( !text.empty() ){
 | 
						|
            bool tagWasOpen = m_tagIsOpen;
 | 
						|
            ensureTagClosed();
 | 
						|
            if( tagWasOpen && indent )
 | 
						|
                m_os << m_indent;
 | 
						|
            m_os << XmlEncode( text );
 | 
						|
            m_needsNewline = true;
 | 
						|
        }
 | 
						|
        return *this;
 | 
						|
    }
 | 
						|
 | 
						|
    XmlWriter& XmlWriter::writeComment( std::string const& text ) {
 | 
						|
        ensureTagClosed();
 | 
						|
        m_os << m_indent << "<!--" << text << "-->";
 | 
						|
        m_needsNewline = true;
 | 
						|
        return *this;
 | 
						|
    }
 | 
						|
 | 
						|
    void XmlWriter::writeStylesheetRef( std::string const& url ) {
 | 
						|
        m_os << "<?xml-stylesheet type=\"text/xsl\" href=\"" << url << "\"?>\n";
 | 
						|
    }
 | 
						|
 | 
						|
    XmlWriter& XmlWriter::writeBlankLine() {
 | 
						|
        ensureTagClosed();
 | 
						|
        m_os << '\n';
 | 
						|
        return *this;
 | 
						|
    }
 | 
						|
 | 
						|
    void XmlWriter::ensureTagClosed() {
 | 
						|
        if( m_tagIsOpen ) {
 | 
						|
            m_os << ">" << std::endl;
 | 
						|
            m_tagIsOpen = false;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    void XmlWriter::writeDeclaration() {
 | 
						|
        m_os << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
 | 
						|
    }
 | 
						|
 | 
						|
    void XmlWriter::newlineIfNecessary() {
 | 
						|
        if( m_needsNewline ) {
 | 
						|
            m_os << std::endl;
 | 
						|
            m_needsNewline = false;
 | 
						|
        }
 | 
						|
    }
 | 
						|
}
 |