CMakePM: Add missing features to RSTParser

To be able to parse the rst help files from CMake

Change-Id: Ibec21e8571324276d2080f81728b1268581601d0
Reviewed-by: Alessandro Portale <alessandro.portale@qt.io>
This commit is contained in:
Cristian Adam
2023-09-25 23:07:02 +02:00
parent 4924e5afec
commit 94d7c76d67
3 changed files with 279 additions and 14 deletions

View File

@@ -48,6 +48,27 @@ class TestHandler : public rst::ContentHandler {
void StartBlock(rst::BlockType type) {
std::string tag;
switch (type) {
case rst::REFERENCE_LINK:
// not used, HandleReferenceLink is used instead
break;
case rst::H1:
tag = "h1";
break;
case rst::H2:
tag = "h2";
break;
case rst::H3:
tag = "h3";
break;
case rst::H4:
tag = "h4";
break;
case rst::H5:
tag = "h5";
break;
case rst::CODE:
tag = "code";
break;
case rst::PARAGRAPH:
tag = "p";
break;
@@ -80,8 +101,12 @@ class TestHandler : public rst::ContentHandler {
content_.append(text, size);
}
void HandleDirective(const char *type) {
content_ += std::string("<") + type + " />";
void HandleDirective(const std::string &type, const std::string &name) {
content_ += std::string("<div class=\"") + name + "\">" + type + "</div>";
}
void HandleReferenceLink(const std::string &type, const std::string &text) {
content_ += std::string("<a href=\"#") + type + "\">" + text + "</a>";
}
};
@@ -93,6 +118,14 @@ std::string Parse(const char *s) {
}
}
TEST(ParserTest, HX) {
EXPECT_EQ("<h1>test</h1>", Parse("====\ntest\n===="));
EXPECT_EQ("<h2>test</h2>", Parse("test\n===="));
EXPECT_EQ("<h3>test</h3>", Parse("test\n----"));
EXPECT_EQ("<h4>test</h4>", Parse("test\n^^^^"));
EXPECT_EQ("<h5>test</h5>", Parse("test\n\"\"\"\""));
}
TEST(ParserTest, Paragraph) {
EXPECT_EQ("<p>test</p>", Parse("test"));
EXPECT_EQ("<p>test</p>", Parse("\ntest"));
@@ -143,6 +176,14 @@ TEST(ParserTest, Literal) {
EXPECT_EQ("<p>::\nabc\ndef</p>", Parse("::\nabc\ndef"));
}
TEST(ParserTest, InlineCode) {
EXPECT_EQ("<p><code>code</code></p>", Parse("``code``"));
EXPECT_EQ("<p>`code``</p>", Parse("`code``"));
EXPECT_EQ("<p>some <code>code</code></p>", Parse("some ``code``"));
EXPECT_EQ("<p><code>code</code> some</p>", Parse("``code`` some"));
EXPECT_EQ("<p>some <code>code</code> and more</p>", Parse("some ``code`` and more"));
}
TEST(ParserTest, Comment) {
EXPECT_EQ("", Parse(".."));
EXPECT_EQ("", Parse("..\n"));
@@ -151,11 +192,49 @@ TEST(ParserTest, Comment) {
}
TEST(ParserTest, Directive) {
EXPECT_EQ("<test />", Parse(".. test::"));
EXPECT_EQ("<test />", Parse(".. test::"));
EXPECT_EQ("<test />", Parse("..\ttest::"));
EXPECT_EQ("<div class=\"\">test</div>", Parse(".. test::"));
EXPECT_EQ("<div class=\"name\">test</div>", Parse(".. test:: name"));
EXPECT_EQ("<div class=\"\">test</div>", Parse(".. test::"));
EXPECT_EQ("<div class=\"\">test</div>", Parse("..\ttest::"));
EXPECT_EQ("<div class=\"to-text\">|from-text| replace</div>", Parse(".. |from-text| replace:: to-text"));
std::string rst =
R"(.. code-block:: c++
int main() {
if (false)
return 1;
return 0;
})";
std::string html =
R"(<div class="c++">code-block</div><blockquote>int main() {
if (false)
return 1;
return 0;
}</blockquote>)";
EXPECT_EQ(html, Parse(rst.c_str()));
rst =
R"(.. note:: This is a cool
note. Such a cool note.)";
html =
R"(<div class="">note</div><blockquote>This is a cool
note. Such a cool note.</blockquote>)";
EXPECT_EQ(html, Parse(rst.c_str()));
}
TEST(ParserTest, ReferenceLinks) {
EXPECT_EQ("<p><a href=\"#ref\">info</a></p>", Parse(":ref:`info`"));
EXPECT_EQ("<p>some <a href=\"#ref\">info</a></p>", Parse("some :ref:`info`"));
EXPECT_EQ("<p>some <a href=\"#ref\">info</a> and more</p>", Parse("some :ref:`info` and more"));
EXPECT_EQ("<p><a href=\"#ref\">info</a>.</p>", Parse(":ref:`info`."));
}
int main(int argc, char **argv) {
#ifdef _WIN32
// Disable message boxes on assertion failures.

View File

@@ -27,6 +27,7 @@
#include "rstparser.h"
#include <algorithm>
#include <cctype>
#include <cstring>
@@ -55,15 +56,15 @@ void rst::Parser::SkipSpace() {
std::string rst::Parser::ParseDirectiveType() {
const char *s = ptr_;
if (!std::isalnum(*s))
if (!std::isalnum(*s) && *s != '|')
return std::string();
for (;;) {
++s;
if (std::isalnum(*s))
continue;
switch (*s) {
case '-': case '_': case '+': case ':': case '.':
if (std::isalnum(s[1])) {
case '-': case '_': case '+': case ':': case '.': case '|':
if (std::isalnum(s[1]) || (*s == '|' && IsSpace(s[1]))) {
++s;
continue;
}
@@ -91,13 +92,28 @@ void rst::Parser::EnterBlock(rst::BlockType &prev_type, rst::BlockType type) {
void rst::Parser::ParseBlock(
rst::BlockType type, rst::BlockType &prev_type, int indent) {
std::string text;
struct InlineTags {
rst::BlockType type;
std::size_t pos {};
std::string text;
std::string type_string;
};
std::vector<InlineTags> inline_tags;
bool have_h1 = false;
for (bool first = true; ; first = false) {
const char *line_start = ptr_;
if (!first) {
// Check indentation.
SkipSpace();
if (ptr_ - line_start != indent)
const int new_indent = ptr_ - line_start;
if (new_indent < indent)
break;
// Restore the indent
if (new_indent > indent)
std::advance(ptr_, indent - new_indent);
if (*ptr_ == '\n') {
++ptr_;
break; // Empty line ends the block.
@@ -119,9 +135,17 @@ void rst::Parser::ParseBlock(
// Copy text converting all whitespace characters to spaces.
text.reserve(end - line_start + 1);
if (!first)
if (!first && !have_h1)
text.push_back('\n');
enum {TAB_WIDTH = 8};
// Used the sections mapping from https://docs.anaconda.com/restructuredtext/index.html
struct {
BlockType type;
int count = 0;
char c = 0;
} hx[] = { {H1, 0, '=' }, {H2, 0, '='}, {H3, 0, '-'}, {H4, 0, '^'}, {H5, 0, '\"'}};
for (const char *s = line_start; s != end; ++s) {
char c = *s;
if (c == '\t') {
@@ -129,10 +153,60 @@ void rst::Parser::ParseBlock(
TAB_WIDTH - ((indent + s - line_start) % TAB_WIDTH));
} else if (IsSpace(c)) {
text.push_back(' ');
} else if (c == hx[0].c) {
++hx[0].count;
++hx[1].count;
} else if (c == hx[2].c) {
++hx[2].count;
} else if (c == hx[3].c) {
++hx[3].count;
} else if (c == hx[4].c) {
++hx[4].count;
} else if (c == '`') {
std::string code_tag_text;
if (ParseCode(s, end - s, code_tag_text)) {
InlineTags code;
code.type = rst::CODE;
code.pos = text.size();
code.text = code_tag_text;
inline_tags.push_back(code);
const int tag_size = 4;
s = s + code_tag_text.size() + tag_size - 1;
} else {
text.push_back(*s);
}
} else if (c == ':') {
std::string link_type;
std::string link_text;
if (ParseReferenceLink(s, end - s, link_type, link_text)) {
InlineTags link;
link.type = rst::REFERENCE_LINK;
link.pos = text.size();
link.text = link_text;
link.type_string = link_type;
inline_tags.push_back(link);
const int tag_size = 4;
s = s + link_type.size() + link_text.size() + tag_size - 1;
} else {
text.push_back(*s);
}
} else {
text.push_back(*s);
}
}
for (int i = 0; i < 5; ++i) {
if (hx[i].count > 0 && hx[i].count == end - line_start) {
// h1 and h2 have the same underline character
// only if there was one ontop then is h1 otherwise h2
if (i == 0 && first)
have_h1 = true;
if ((i == 0 && !have_h1) || (i == 1 && have_h1))
continue;
type = hx[i].type;
}
}
if (*ptr_ == '\n')
++ptr_;
}
@@ -144,11 +218,35 @@ void rst::Parser::ParseBlock(
bool literal = type == PARAGRAPH && EndsWith(text, "::");
if (!literal || text.size() != 2) {
std::size_t size = text.size();
if (size == 0 && inline_tags.size() == 0)
return;
if (literal)
--size;
EnterBlock(prev_type, type);
handler_->StartBlock(type);
if (inline_tags.size() == 0) {
handler_->HandleText(text.c_str(), size);
} else {
std::size_t start = 0;
for (const InlineTags &in : inline_tags) {
if (in.pos > start)
handler_->HandleText(text.c_str() + start, in.pos - start);
if (in.type == rst::REFERENCE_LINK) {
handler_->HandleReferenceLink(in.type_string, in.text);
} else {
handler_->StartBlock(in.type);
handler_->HandleText(in.text.c_str(), in.text.size());
handler_->EndBlock();
}
start = in.pos;
}
if (start < size)
handler_->HandleText(text.c_str() + start, size - start);
}
handler_->EndBlock();
}
if (literal) {
@@ -191,6 +289,58 @@ void rst::Parser::ParseLineBlock(rst::BlockType &prev_type, int indent) {
handler_->EndBlock();
}
bool rst::Parser::ParseCode(const char *s, std::size_t size, std::string &code)
{
// It requires at least four ticks ``text``
if (s[0] != '`' || s[1] != '`')
return false;
if (size < 4)
return false;
std::size_t start_pos = 2;
std::size_t end_pos = 0;
for (std::size_t i = start_pos; i < size - 1; ++i) {
if (s[i] == '`' && s[i + 1] == '`') {
end_pos = i;
break;
}
}
if (end_pos == 0)
return false;
code.assign(s + start_pos, end_pos - start_pos);
return true;
}
bool rst::Parser::ParseReferenceLink(const char *s, std::size_t size, std::string &type, std::string &text)
{
// :type:`text`
if (size < 4)
return false;
auto start_type_tag = s + 1;
auto end_type_tag = std::find(start_type_tag, s + size, ':');
if (end_type_tag == s + size)
return false;
type.assign(start_type_tag, end_type_tag - start_type_tag);
if (*(end_type_tag + 1) != '`')
return false;
auto start_text_tag = end_type_tag + 2;
auto end_text_tag = std::find(start_text_tag, s + size, '`');
if (end_text_tag == s + size)
return false;
text.assign(start_text_tag, end_text_tag - start_text_tag);
return true;
}
void rst::Parser::Parse(const char *s) {
BlockType prev_type = PARAGRAPH;
ptr_ = s;
@@ -214,7 +364,28 @@ void rst::Parser::Parse(const char *s) {
std::string type = ParseDirectiveType();
if (!type.empty() && ptr_[0] == ':' && ptr_[1] == ':') {
ptr_ += 2;
handler_->HandleDirective(type.c_str());
const char* after_directive = ptr_;
// Get the name of the directive
std::string name;
while (*ptr_ && *ptr_ != '\n') {
c = *ptr_++;
if (!IsSpace(c))
name.push_back(c);
}
// Special case for ".. note::" which can start directly after the ::
if (type == "note" && name.size() > 0) {
ptr_ = after_directive;
SkipSpace();
handler_->HandleDirective(type, "");
ParseBlock(BLOCK_QUOTE, prev_type, 0);
break;
}
handler_->HandleDirective(type, name);
}
// Skip everything till the end of the line.
while (*ptr_ && *ptr_ != '\n')

View File

@@ -35,6 +35,13 @@
namespace rst {
enum BlockType {
H1,
H2,
H3,
H4,
H5,
CODE,
REFERENCE_LINK,
PARAGRAPH,
LINE_BLOCK,
BLOCK_QUOTE,
@@ -58,7 +65,10 @@ class ContentHandler {
virtual void HandleText(const char *text, std::size_t size) = 0;
// Receives notification of a directive.
virtual void HandleDirective(const char *type) = 0;
virtual void HandleDirective(const std::string &type, const std::string &name) = 0;
// Receives notification of a link.
virtual void HandleReferenceLink(const std::string &type, const std::string &text) = 0;
};
// A parser for a subset of reStructuredText.
@@ -85,6 +95,12 @@ class Parser {
// Parses a line block.
void ParseLineBlock(rst::BlockType &prev_type, int indent);
// Parses inline ``code``
bool ParseCode(const char* s, std::size_t size, std::string &code);
// Parses :reference:`link`
bool ParseReferenceLink(const char* s, std::size_t size, std::string &type, std::string &text);
public:
explicit Parser(ContentHandler *h) : handler_(h), ptr_(0) {}
@@ -94,4 +110,3 @@ class Parser {
}
#endif // RSTPARSER_H_