CMakePM: Initial import of the RSTparser

Change-Id: I45bc3d53df3358c1f52ca219b53a1dec8e85a4ca
Reviewed-by: Alessandro Portale <alessandro.portale@qt.io>
This commit is contained in:
Cristian Adam
2023-09-25 23:01:31 +02:00
parent ef88a5c3d0
commit ec13beff1c
7 changed files with 620 additions and 0 deletions

View File

@@ -970,3 +970,36 @@ SQLite (https://www.sqlite.org) is in the Public Domain.
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
### RSTParser
RSTParser is an open-source C++ library for parsing reStructuredText
https://github.com/vitaut-archive/rstparser
License
-------
Copyright (c) 2013, Victor Zverovich
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -938,6 +938,43 @@
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\endcode
\li \b RSTParser
RSTParser is an open-source C++ library for parsing reStructuredText
\list
\li \l https://github.com/vitaut-archive/rstparser
\endlist
\badcode
License
-------
Copyright (c) 2013, Victor Zverovich
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\endcode
\endlist
*/

View File

@@ -0,0 +1,3 @@
Files taken from the CMake repository https://github.com/vitaut-archive/rstparser.git
49e1e6626ba28357749acfe3bf07c4a19e5bc4ef

View File

@@ -0,0 +1,32 @@
RSTParser
=========
RSTParser is an open-source C++ library for parsing
`reStructuredText <http://docutils.sourceforge.net/rst.html>`__.
License
-------
Copyright (c) 2013, Victor Zverovich
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1,169 @@
/*
reStructuredText parser tests.
Copyright (c) 2012, Victor Zverovich
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <gtest/gtest.h>
#include <stack>
#ifdef _WIN32
# include <crtdbg.h>
#endif
#include "rstparser.h"
namespace {
class TestHandler : public rst::ContentHandler {
private:
std::stack<std::string> tags_;
std::string content_;
public:
const std::string &content() const { return content_; }
void StartBlock(rst::BlockType type) {
std::string tag;
switch (type) {
case rst::PARAGRAPH:
tag = "p";
break;
case rst::LINE_BLOCK:
tag = "lineblock";
break;
case rst::BLOCK_QUOTE:
tag = "blockquote";
break;
case rst::BULLET_LIST:
tag = "ul";
break;
case rst::LIST_ITEM:
tag = "li";
break;
case rst::LITERAL_BLOCK:
tag = "code";
break;
}
content_ += "<" + tag + ">";
tags_.push(tag);
}
void EndBlock() {
content_ += "</" + tags_.top() + ">";
tags_.pop();
}
void HandleText(const char *text, std::size_t size) {
content_.append(text, size);
}
void HandleDirective(const char *type) {
content_ += std::string("<") + type + " />";
}
};
std::string Parse(const char *s) {
TestHandler handler;
rst::Parser parser(&handler);
parser.Parse(s);
return handler.content();
}
}
TEST(ParserTest, Paragraph) {
EXPECT_EQ("<p>test</p>", Parse("test"));
EXPECT_EQ("<p>test</p>", Parse("\ntest"));
EXPECT_EQ("<p>.</p>", Parse("."));
EXPECT_EQ("<p>..test</p>", Parse("..test"));
}
TEST(ParserTest, LineBlock) {
EXPECT_EQ("<lineblock>test</lineblock>", Parse("| test"));
EXPECT_EQ("<lineblock> abc\ndef</lineblock>", Parse("| abc\n| def"));
}
TEST(ParserTest, BlockQuote) {
EXPECT_EQ("<blockquote>test</blockquote>", Parse(" test"));
}
TEST(ParserTest, PreserveInnerSpace) {
EXPECT_EQ("<p>a b</p>", Parse("a b"));
}
TEST(ParserTest, ReplaceWhitespace) {
EXPECT_EQ("<p>a b</p>", Parse("a\tb"));
EXPECT_EQ("<blockquote>a b</blockquote>", Parse(" a\tb"));
EXPECT_EQ("<p>a b</p>", Parse("a\vb"));
}
TEST(ParserTest, StripTrailingSpace) {
EXPECT_EQ("<p>test</p>", Parse("test \t"));
}
TEST(ParserTest, MultiLineBlock) {
EXPECT_EQ("<p>line 1\nline 2</p>", Parse("line 1\nline 2"));
}
TEST(ParserTest, UnindentBlock) {
EXPECT_EQ("<blockquote>abc</blockquote><p>def</p>", Parse(" abc\ndef"));
}
TEST(ParserTest, BulletList) {
EXPECT_EQ("<ul><li>item</li></ul>", Parse("* item"));
EXPECT_EQ("<ul><li>abc\ndef</li></ul>", Parse("* abc\n def"));
}
TEST(ParserTest, Literal) {
EXPECT_EQ("<p>abc:</p><code>def</code>", Parse("abc::\n\n def"));
EXPECT_EQ("<code>abc\ndef</code>", Parse("::\n\n abc\n def"));
EXPECT_EQ("<p>abc\ndef</p>", Parse("::\n\nabc\ndef"));
EXPECT_EQ("<p>::\nabc\ndef</p>", Parse("::\nabc\ndef"));
}
TEST(ParserTest, Comment) {
EXPECT_EQ("", Parse(".."));
EXPECT_EQ("", Parse("..\n"));
EXPECT_EQ("", Parse(".. comment"));
EXPECT_EQ("", Parse(".. comment:"));
}
TEST(ParserTest, Directive) {
EXPECT_EQ("<test />", Parse(".. test::"));
EXPECT_EQ("<test />", Parse(".. test::"));
EXPECT_EQ("<test />", Parse("..\ttest::"));
}
int main(int argc, char **argv) {
#ifdef _WIN32
// Disable message boxes on assertion failures.
_CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
_CrtSetReportFile(_CRT_ERROR, _CRTDBG_FILE_STDERR);
_CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
_CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
#endif
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@@ -0,0 +1,249 @@
/*
A reStructuredText parser written in C++.
Copyright (c) 2013, Victor Zverovich
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "rstparser.h"
#include <cctype>
#include <cstring>
namespace {
inline bool IsSpace(char c) {
switch (c) {
case ' ': case '\t': case '\v': case '\f':
return true;
}
return false;
}
// Returns true if s ends with string end.
bool EndsWith(const std::string &s, const char *end) {
std::size_t size = s.size(), end_size = std::strlen(end);
return size >= end_size ? std::strcmp(&s[size - end_size], end) == 0 : false;
}
}
rst::ContentHandler::~ContentHandler() {}
void rst::Parser::SkipSpace() {
while (IsSpace(*ptr_))
++ptr_;
}
std::string rst::Parser::ParseDirectiveType() {
const char *s = ptr_;
if (!std::isalnum(*s))
return std::string();
for (;;) {
++s;
if (std::isalnum(*s))
continue;
switch (*s) {
case '-': case '_': case '+': case ':': case '.':
if (std::isalnum(s[1])) {
++s;
continue;
}
// Fall through.
}
break;
}
std::string type;
if (s != ptr_)
type.assign(ptr_, s);
ptr_ = s;
return type;
}
void rst::Parser::EnterBlock(rst::BlockType &prev_type, rst::BlockType type) {
if (type == prev_type)
return;
if (prev_type == LIST_ITEM)
handler_->EndBlock();
if (type == LIST_ITEM)
handler_->StartBlock(BULLET_LIST);
prev_type = type;
}
void rst::Parser::ParseBlock(
rst::BlockType type, rst::BlockType &prev_type, int indent) {
std::string text;
for (bool first = true; ; first = false) {
const char *line_start = ptr_;
if (!first) {
// Check indentation.
SkipSpace();
if (ptr_ - line_start != indent)
break;
if (*ptr_ == '\n') {
++ptr_;
break; // Empty line ends the block.
}
if (!*ptr_)
break; // End of input.
}
// Strip indentation.
line_start = ptr_;
// Find the end of the line.
while (*ptr_ && *ptr_ != '\n')
++ptr_;
// Strip whitespace at the end of the line.
const char *end = ptr_;
while (end != line_start && IsSpace(end[-1]))
--end;
// Copy text converting all whitespace characters to spaces.
text.reserve(end - line_start + 1);
if (!first)
text.push_back('\n');
enum {TAB_WIDTH = 8};
for (const char *s = line_start; s != end; ++s) {
char c = *s;
if (c == '\t') {
text.append(" ",
TAB_WIDTH - ((indent + s - line_start) % TAB_WIDTH));
} else if (IsSpace(c)) {
text.push_back(' ');
} else {
text.push_back(*s);
}
}
if (*ptr_ == '\n')
++ptr_;
}
// Remove a trailing newline.
if (*text.rbegin() == '\n')
text.resize(text.size() - 1);
bool literal = type == PARAGRAPH && EndsWith(text, "::");
if (!literal || text.size() != 2) {
std::size_t size = text.size();
if (literal)
--size;
EnterBlock(prev_type, type);
handler_->StartBlock(type);
handler_->HandleText(text.c_str(), size);
handler_->EndBlock();
}
if (literal) {
// Parse a literal block.
const char *line_start = ptr_;
SkipSpace();
int new_indent = static_cast<int>(ptr_ - line_start);
if (new_indent > indent)
ParseBlock(LITERAL_BLOCK, prev_type, new_indent);
}
}
void rst::Parser::ParseLineBlock(rst::BlockType &prev_type, int indent) {
std::string text;
for (bool first = true; ; first = false) {
const char *line_start = ptr_;
if (!first) {
// Check indentation.
SkipSpace();
if (*ptr_ != '|' || !IsSpace(ptr_[1]) || ptr_ - line_start != indent)
break;
ptr_ += 2;
if (!*ptr_)
break; // End of input.
}
// Strip indentation.
line_start = ptr_;
// Find the end of the line.
while (*ptr_ && *ptr_ != '\n')
++ptr_;
if (*ptr_ == '\n')
++ptr_;
text.append(line_start, ptr_);
}
EnterBlock(prev_type, rst::LINE_BLOCK);
handler_->StartBlock(rst::LINE_BLOCK);
handler_->HandleText(text.c_str(), text.size());
handler_->EndBlock();
}
void rst::Parser::Parse(const char *s) {
BlockType prev_type = PARAGRAPH;
ptr_ = s;
while (*ptr_) {
// Skip whitespace and empty lines.
const char *line_start = ptr_;
SkipSpace();
if (*ptr_ == '\n') {
++ptr_;
continue;
}
switch (*ptr_) {
case '.':
if (ptr_[1] == '.') {
char c = ptr_[2];
if (!IsSpace(c) && c != '\n' && c)
break;
// Parse a directive or a comment.
ptr_ += 2;
SkipSpace();
std::string type = ParseDirectiveType();
if (!type.empty() && ptr_[0] == ':' && ptr_[1] == ':') {
ptr_ += 2;
handler_->HandleDirective(type.c_str());
}
// Skip everything till the end of the line.
while (*ptr_ && *ptr_ != '\n')
++ptr_;
if (*ptr_ == '\n')
++ptr_;
continue;
}
break;
case '*': case '+': case '-':
if (IsSpace(ptr_[1])) {
// Parse a bullet list item.
ptr_ += 2;
ParseBlock(LIST_ITEM, prev_type, static_cast<int>(ptr_ - line_start));
continue;
}
break;
case '|':
if (IsSpace(ptr_[1])) {
// Parse a line block.
int indent = static_cast<int>(ptr_ - line_start);
ptr_ += 2;
ParseLineBlock(prev_type, indent);
continue;
}
break;
}
ParseBlock(std::isspace(line_start[0]) ? BLOCK_QUOTE : PARAGRAPH,
prev_type, static_cast<int>(ptr_ - line_start));
}
EnterBlock(prev_type, PARAGRAPH);
}

View File

@@ -0,0 +1,97 @@
/*
A reStructuredText parser written in C++.
Copyright (c) 2013, Victor Zverovich
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef RSTPARSER_H_
#define RSTPARSER_H_
#include <memory>
#include <string>
#include <vector>
namespace rst {
enum BlockType {
PARAGRAPH,
LINE_BLOCK,
BLOCK_QUOTE,
BULLET_LIST,
LIST_ITEM,
LITERAL_BLOCK
};
// Receive notification of the logical content of a document.
class ContentHandler {
public:
virtual ~ContentHandler();
// Receives notification of the beginning of a text block.
virtual void StartBlock(BlockType type) = 0;
// Receives notification of the end of a text block.
virtual void EndBlock() = 0;
// Receives notification of text.
virtual void HandleText(const char *text, std::size_t size) = 0;
// Receives notification of a directive.
virtual void HandleDirective(const char *type) = 0;
};
// A parser for a subset of reStructuredText.
class Parser {
private:
ContentHandler *handler_;
const char *ptr_;
// Skips whitespace.
void SkipSpace();
// Parses a directive type.
std::string ParseDirectiveType();
// Parses a paragraph.
void ParseParagraph();
// Changes the current block type sending notifications if necessary.
void EnterBlock(rst::BlockType &prev_type, rst::BlockType type);
// Parses a block of text.
void ParseBlock(rst::BlockType type, rst::BlockType &prev_type, int indent);
// Parses a line block.
void ParseLineBlock(rst::BlockType &prev_type, int indent);
public:
explicit Parser(ContentHandler *h) : handler_(h), ptr_(0) {}
// Parses a string containing reStructuredText and returns a document node.
void Parse(const char *s);
};
}
#endif // RSTPARSER_H_