From 94d7c76d674d90a680ed334f8b6601a77cd7478b Mon Sep 17 00:00:00 2001 From: Cristian Adam Date: Mon, 25 Sep 2023 23:07:02 +0200 Subject: [PATCH] CMakePM: Add missing features to RSTParser To be able to parse the rst help files from CMake Change-Id: Ibec21e8571324276d2080f81728b1268581601d0 Reviewed-by: Alessandro Portale --- .../3rdparty/rstparser/rstparser-test.cc | 89 ++++++++- .../3rdparty/rstparser/rstparser.cc | 185 +++++++++++++++++- .../3rdparty/rstparser/rstparser.h | 19 +- 3 files changed, 279 insertions(+), 14 deletions(-) diff --git a/src/plugins/cmakeprojectmanager/3rdparty/rstparser/rstparser-test.cc b/src/plugins/cmakeprojectmanager/3rdparty/rstparser/rstparser-test.cc index c70fc67c6da..70b9a6df337 100644 --- a/src/plugins/cmakeprojectmanager/3rdparty/rstparser/rstparser-test.cc +++ b/src/plugins/cmakeprojectmanager/3rdparty/rstparser/rstparser-test.cc @@ -48,6 +48,27 @@ class TestHandler : public rst::ContentHandler { void StartBlock(rst::BlockType type) { std::string tag; switch (type) { + case rst::REFERENCE_LINK: + // not used, HandleReferenceLink is used instead + break; + case rst::H1: + tag = "h1"; + break; + case rst::H2: + tag = "h2"; + break; + case rst::H3: + tag = "h3"; + break; + case rst::H4: + tag = "h4"; + break; + case rst::H5: + tag = "h5"; + break; + case rst::CODE: + tag = "code"; + break; case rst::PARAGRAPH: tag = "p"; break; @@ -80,8 +101,12 @@ class TestHandler : public rst::ContentHandler { content_.append(text, size); } - void HandleDirective(const char *type) { - content_ += std::string("<") + type + " />"; + void HandleDirective(const std::string &type, const std::string &name) { + content_ += std::string("
" + type + "
"; + } + + void HandleReferenceLink(const std::string &type, const std::string &text) { + content_ += std::string("" + text + ""; } }; @@ -93,6 +118,14 @@ std::string Parse(const char *s) { } } +TEST(ParserTest, HX) { + EXPECT_EQ("

test

", Parse("====\ntest\n====")); + EXPECT_EQ("

test

", Parse("test\n====")); + EXPECT_EQ("

test

", Parse("test\n----")); + EXPECT_EQ("

test

", Parse("test\n^^^^")); + EXPECT_EQ("
test
", Parse("test\n\"\"\"\"")); +} + TEST(ParserTest, Paragraph) { EXPECT_EQ("

test

", Parse("test")); EXPECT_EQ("

test

", Parse("\ntest")); @@ -143,6 +176,14 @@ TEST(ParserTest, Literal) { EXPECT_EQ("

::\nabc\ndef

", Parse("::\nabc\ndef")); } +TEST(ParserTest, InlineCode) { + EXPECT_EQ("

code

", Parse("``code``")); + EXPECT_EQ("

`code``

", Parse("`code``")); + EXPECT_EQ("

some code

", Parse("some ``code``")); + EXPECT_EQ("

code some

", Parse("``code`` some")); + EXPECT_EQ("

some code and more

", Parse("some ``code`` and more")); +} + TEST(ParserTest, Comment) { EXPECT_EQ("", Parse("..")); EXPECT_EQ("", Parse("..\n")); @@ -151,11 +192,49 @@ TEST(ParserTest, Comment) { } TEST(ParserTest, Directive) { - EXPECT_EQ("", Parse(".. test::")); - EXPECT_EQ("", Parse(".. test::")); - EXPECT_EQ("", Parse("..\ttest::")); + EXPECT_EQ("
test
", Parse(".. test::")); + EXPECT_EQ("
test
", Parse(".. test:: name")); + EXPECT_EQ("
test
", Parse(".. test::")); + EXPECT_EQ("
test
", Parse("..\ttest::")); + + EXPECT_EQ("
|from-text| replace
", Parse(".. |from-text| replace:: to-text")); + + std::string rst = +R"(.. code-block:: c++ + int main() { + if (false) + return 1; + return 0; + })"; + + std::string html = +R"(
code-block
int main() { + if (false) + return 1; + return 0; +}
)"; + + EXPECT_EQ(html, Parse(rst.c_str())); + + rst = +R"(.. note:: This is a cool + note. Such a cool note.)"; + + html = +R"(
note
This is a cool + note. Such a cool note.
)"; + + EXPECT_EQ(html, Parse(rst.c_str())); } +TEST(ParserTest, ReferenceLinks) { + EXPECT_EQ("

info

", Parse(":ref:`info`")); + EXPECT_EQ("

some info

", Parse("some :ref:`info`")); + EXPECT_EQ("

some info and more

", Parse("some :ref:`info` and more")); + EXPECT_EQ("

info.

", Parse(":ref:`info`.")); +} + + int main(int argc, char **argv) { #ifdef _WIN32 // Disable message boxes on assertion failures. diff --git a/src/plugins/cmakeprojectmanager/3rdparty/rstparser/rstparser.cc b/src/plugins/cmakeprojectmanager/3rdparty/rstparser/rstparser.cc index 528c572f683..f430c40e95d 100644 --- a/src/plugins/cmakeprojectmanager/3rdparty/rstparser/rstparser.cc +++ b/src/plugins/cmakeprojectmanager/3rdparty/rstparser/rstparser.cc @@ -27,6 +27,7 @@ #include "rstparser.h" +#include #include #include @@ -55,15 +56,15 @@ void rst::Parser::SkipSpace() { std::string rst::Parser::ParseDirectiveType() { const char *s = ptr_; - if (!std::isalnum(*s)) + if (!std::isalnum(*s) && *s != '|') return std::string(); for (;;) { ++s; if (std::isalnum(*s)) continue; switch (*s) { - case '-': case '_': case '+': case ':': case '.': - if (std::isalnum(s[1])) { + case '-': case '_': case '+': case ':': case '.': case '|': + if (std::isalnum(s[1]) || (*s == '|' && IsSpace(s[1]))) { ++s; continue; } @@ -91,13 +92,28 @@ void rst::Parser::EnterBlock(rst::BlockType &prev_type, rst::BlockType type) { void rst::Parser::ParseBlock( rst::BlockType type, rst::BlockType &prev_type, int indent) { std::string text; + + struct InlineTags { + rst::BlockType type; + std::size_t pos {}; + std::string text; + std::string type_string; + }; + std::vector inline_tags; + + bool have_h1 = false; for (bool first = true; ; first = false) { const char *line_start = ptr_; if (!first) { // Check indentation. SkipSpace(); - if (ptr_ - line_start != indent) + const int new_indent = ptr_ - line_start; + if (new_indent < indent) break; + // Restore the indent + if (new_indent > indent) + std::advance(ptr_, indent - new_indent); + if (*ptr_ == '\n') { ++ptr_; break; // Empty line ends the block. @@ -119,9 +135,17 @@ void rst::Parser::ParseBlock( // Copy text converting all whitespace characters to spaces. text.reserve(end - line_start + 1); - if (!first) + if (!first && !have_h1) text.push_back('\n'); enum {TAB_WIDTH = 8}; + + // Used the sections mapping from https://docs.anaconda.com/restructuredtext/index.html + struct { + BlockType type; + int count = 0; + char c = 0; + } hx[] = { {H1, 0, '=' }, {H2, 0, '='}, {H3, 0, '-'}, {H4, 0, '^'}, {H5, 0, '\"'}}; + for (const char *s = line_start; s != end; ++s) { char c = *s; if (c == '\t') { @@ -129,10 +153,60 @@ void rst::Parser::ParseBlock( TAB_WIDTH - ((indent + s - line_start) % TAB_WIDTH)); } else if (IsSpace(c)) { text.push_back(' '); + } else if (c == hx[0].c) { + ++hx[0].count; + ++hx[1].count; + } else if (c == hx[2].c) { + ++hx[2].count; + } else if (c == hx[3].c) { + ++hx[3].count; + } else if (c == hx[4].c) { + ++hx[4].count; + } else if (c == '`') { + std::string code_tag_text; + if (ParseCode(s, end - s, code_tag_text)) { + InlineTags code; + code.type = rst::CODE; + code.pos = text.size(); + code.text = code_tag_text; + inline_tags.push_back(code); + const int tag_size = 4; + s = s + code_tag_text.size() + tag_size - 1; + } else { + text.push_back(*s); + } + } else if (c == ':') { + std::string link_type; + std::string link_text; + if (ParseReferenceLink(s, end - s, link_type, link_text)) { + InlineTags link; + link.type = rst::REFERENCE_LINK; + link.pos = text.size(); + link.text = link_text; + link.type_string = link_type; + inline_tags.push_back(link); + const int tag_size = 4; + s = s + link_type.size() + link_text.size() + tag_size - 1; + } else { + text.push_back(*s); + } } else { text.push_back(*s); } } + + for (int i = 0; i < 5; ++i) { + if (hx[i].count > 0 && hx[i].count == end - line_start) { + // h1 and h2 have the same underline character + // only if there was one ontop then is h1 otherwise h2 + if (i == 0 && first) + have_h1 = true; + if ((i == 0 && !have_h1) || (i == 1 && have_h1)) + continue; + type = hx[i].type; + } + } + if (*ptr_ == '\n') ++ptr_; } @@ -144,11 +218,35 @@ void rst::Parser::ParseBlock( bool literal = type == PARAGRAPH && EndsWith(text, "::"); if (!literal || text.size() != 2) { std::size_t size = text.size(); + if (size == 0 && inline_tags.size() == 0) + return; + if (literal) --size; EnterBlock(prev_type, type); handler_->StartBlock(type); - handler_->HandleText(text.c_str(), size); + + if (inline_tags.size() == 0) { + handler_->HandleText(text.c_str(), size); + } else { + std::size_t start = 0; + for (const InlineTags &in : inline_tags) { + if (in.pos > start) + handler_->HandleText(text.c_str() + start, in.pos - start); + if (in.type == rst::REFERENCE_LINK) { + handler_->HandleReferenceLink(in.type_string, in.text); + } else { + handler_->StartBlock(in.type); + handler_->HandleText(in.text.c_str(), in.text.size()); + handler_->EndBlock(); + } + start = in.pos; + } + + if (start < size) + handler_->HandleText(text.c_str() + start, size - start); + } + handler_->EndBlock(); } if (literal) { @@ -191,6 +289,58 @@ void rst::Parser::ParseLineBlock(rst::BlockType &prev_type, int indent) { handler_->EndBlock(); } +bool rst::Parser::ParseCode(const char *s, std::size_t size, std::string &code) +{ + // It requires at least four ticks ``text`` + if (s[0] != '`' || s[1] != '`') + return false; + + if (size < 4) + return false; + + std::size_t start_pos = 2; + std::size_t end_pos = 0; + for (std::size_t i = start_pos; i < size - 1; ++i) { + if (s[i] == '`' && s[i + 1] == '`') { + end_pos = i; + break; + } + } + + if (end_pos == 0) + return false; + + code.assign(s + start_pos, end_pos - start_pos); + + return true; +} + +bool rst::Parser::ParseReferenceLink(const char *s, std::size_t size, std::string &type, std::string &text) +{ + // :type:`text` + if (size < 4) + return false; + + auto start_type_tag = s + 1; + auto end_type_tag = std::find(start_type_tag, s + size, ':'); + if (end_type_tag == s + size) + return false; + + type.assign(start_type_tag, end_type_tag - start_type_tag); + + if (*(end_type_tag + 1) != '`') + return false; + + auto start_text_tag = end_type_tag + 2; + auto end_text_tag = std::find(start_text_tag, s + size, '`'); + if (end_text_tag == s + size) + return false; + + text.assign(start_text_tag, end_text_tag - start_text_tag); + + return true; +} + void rst::Parser::Parse(const char *s) { BlockType prev_type = PARAGRAPH; ptr_ = s; @@ -214,7 +364,28 @@ void rst::Parser::Parse(const char *s) { std::string type = ParseDirectiveType(); if (!type.empty() && ptr_[0] == ':' && ptr_[1] == ':') { ptr_ += 2; - handler_->HandleDirective(type.c_str()); + + const char* after_directive = ptr_; + + // Get the name of the directive + std::string name; + while (*ptr_ && *ptr_ != '\n') { + c = *ptr_++; + if (!IsSpace(c)) + name.push_back(c); + } + + // Special case for ".. note::" which can start directly after the :: + if (type == "note" && name.size() > 0) { + ptr_ = after_directive; + SkipSpace(); + handler_->HandleDirective(type, ""); + + ParseBlock(BLOCK_QUOTE, prev_type, 0); + break; + } + + handler_->HandleDirective(type, name); } // Skip everything till the end of the line. while (*ptr_ && *ptr_ != '\n') diff --git a/src/plugins/cmakeprojectmanager/3rdparty/rstparser/rstparser.h b/src/plugins/cmakeprojectmanager/3rdparty/rstparser/rstparser.h index 547f128af7c..4fabdbf46ec 100644 --- a/src/plugins/cmakeprojectmanager/3rdparty/rstparser/rstparser.h +++ b/src/plugins/cmakeprojectmanager/3rdparty/rstparser/rstparser.h @@ -35,6 +35,13 @@ namespace rst { enum BlockType { + H1, + H2, + H3, + H4, + H5, + CODE, + REFERENCE_LINK, PARAGRAPH, LINE_BLOCK, BLOCK_QUOTE, @@ -58,7 +65,10 @@ class ContentHandler { virtual void HandleText(const char *text, std::size_t size) = 0; // Receives notification of a directive. - virtual void HandleDirective(const char *type) = 0; + virtual void HandleDirective(const std::string &type, const std::string &name) = 0; + + // Receives notification of a link. + virtual void HandleReferenceLink(const std::string &type, const std::string &text) = 0; }; // A parser for a subset of reStructuredText. @@ -85,6 +95,12 @@ class Parser { // Parses a line block. void ParseLineBlock(rst::BlockType &prev_type, int indent); + // Parses inline ``code`` + bool ParseCode(const char* s, std::size_t size, std::string &code); + + // Parses :reference:`link` + bool ParseReferenceLink(const char* s, std::size_t size, std::string &type, std::string &text); + public: explicit Parser(ContentHandler *h) : handler_(h), ptr_(0) {} @@ -94,4 +110,3 @@ class Parser { } #endif // RSTPARSER_H_ -