CMakePM: Add missing features to RSTParser

To be able to parse the rst help files from CMake Change-Id: Ibec21e8571324276d2080f81728b1268581601d0 Reviewed-by: Alessandro Portale <alessandro.portale@qt.io>
2023-09-25 23:07:02 +02:00
parent 4924e5afec
commit 94d7c76d67
3 changed files with 279 additions and 14 deletions
--- a/src/plugins/cmakeprojectmanager/3rdparty/rstparser/rstparser-test.cc
+++ b/src/plugins/cmakeprojectmanager/3rdparty/rstparser/rstparser-test.cc
@@ -48,6 +48,27 @@ class TestHandler : public rst::ContentHandler {
  void StartBlock(rst::BlockType type) {
    std::string tag;
    switch (type) {
+    case rst::REFERENCE_LINK:
+      // not used, HandleReferenceLink is used instead
+      break;
+    case rst::H1:
+      tag = "h1";
+      break;
+    case rst::H2:
+      tag = "h2";
+      break;
+    case rst::H3:
+      tag = "h3";
+      break;
+    case rst::H4:
+      tag = "h4";
+      break;
+    case rst::H5:
+      tag = "h5";
+      break;
+    case rst::CODE:
+      tag = "code";
+      break;
    case rst::PARAGRAPH:
      tag = "p";
      break;
@@ -80,8 +101,12 @@ class TestHandler : public rst::ContentHandler {
    content_.append(text, size);
  }

-  void HandleDirective(const char *type) {
-    content_ += std::string("<") + type + " />";
+  void HandleDirective(const std::string &type, const std::string &name) {
+    content_ += std::string("<div class=\"") + name + "\">" + type + "</div>";
+  }
+
+  void HandleReferenceLink(const std::string &type, const std::string &text) {
+    content_ += std::string("<a href=\"#") + type + "\">" + text + "</a>";
  }
 };

@@ -93,6 +118,14 @@ std::string Parse(const char *s) {
 }
 }

+TEST(ParserTest, HX) {
+  EXPECT_EQ("<h1>test</h1>", Parse("====\ntest\n===="));
+  EXPECT_EQ("<h2>test</h2>", Parse("test\n===="));
+  EXPECT_EQ("<h3>test</h3>", Parse("test\n----"));
+  EXPECT_EQ("<h4>test</h4>", Parse("test\n^^^^"));
+  EXPECT_EQ("<h5>test</h5>", Parse("test\n\"\"\"\""));
+}
+
 TEST(ParserTest, Paragraph) {
  EXPECT_EQ("<p>test</p>", Parse("test"));
  EXPECT_EQ("<p>test</p>", Parse("\ntest"));
@@ -143,6 +176,14 @@ TEST(ParserTest, Literal) {
  EXPECT_EQ("<p>::\nabc\ndef</p>", Parse("::\nabc\ndef"));
 }

+TEST(ParserTest, InlineCode) {
+  EXPECT_EQ("<p><code>code</code></p>", Parse("``code``"));
+  EXPECT_EQ("<p>`code``</p>", Parse("`code``"));
+  EXPECT_EQ("<p>some <code>code</code></p>", Parse("some ``code``"));
+  EXPECT_EQ("<p><code>code</code> some</p>", Parse("``code`` some"));
+  EXPECT_EQ("<p>some <code>code</code> and more</p>", Parse("some ``code`` and more"));
+}
+
 TEST(ParserTest, Comment) {
  EXPECT_EQ("", Parse(".."));
  EXPECT_EQ("", Parse("..\n"));
@@ -151,11 +192,49 @@ TEST(ParserTest, Comment) {
 }

 TEST(ParserTest, Directive) {
-  EXPECT_EQ("<test />", Parse(".. test::"));
-  EXPECT_EQ("<test />", Parse("..  test::"));
-  EXPECT_EQ("<test />", Parse("..\ttest::"));
+  EXPECT_EQ("<div class=\"\">test</div>", Parse(".. test::"));
+  EXPECT_EQ("<div class=\"name\">test</div>", Parse(".. test:: name"));
+  EXPECT_EQ("<div class=\"\">test</div>", Parse("..  test::"));
+  EXPECT_EQ("<div class=\"\">test</div>", Parse("..\ttest::"));
+
+  EXPECT_EQ("<div class=\"to-text\">|from-text| replace</div>", Parse(".. |from-text| replace:: to-text"));
+
+  std::string rst =
+R"(.. code-block:: c++
+  int main() {
+    if (false)
+        return 1;
+    return 0;
+  })";
+
+  std::string html =
+R"(<div class="c++">code-block</div><blockquote>int main() {
+  if (false)
+      return 1;
+  return 0;
+}</blockquote>)";
+
+  EXPECT_EQ(html, Parse(rst.c_str()));
+
+  rst =
+R"(.. note:: This is a cool
+             note. Such a cool note.)";
+
+  html =
+R"(<div class="">note</div><blockquote>This is a cool
+             note. Such a cool note.</blockquote>)";
+
+  EXPECT_EQ(html, Parse(rst.c_str()));
 }

+TEST(ParserTest, ReferenceLinks) {
+  EXPECT_EQ("<p><a href=\"#ref\">info</a></p>", Parse(":ref:`info`"));
+  EXPECT_EQ("<p>some <a href=\"#ref\">info</a></p>", Parse("some :ref:`info`"));
+  EXPECT_EQ("<p>some <a href=\"#ref\">info</a> and more</p>", Parse("some :ref:`info` and more"));
+  EXPECT_EQ("<p><a href=\"#ref\">info</a>.</p>", Parse(":ref:`info`."));
+}
+
+
 int main(int argc, char **argv) {
 #ifdef _WIN32
  // Disable message boxes on assertion failures.
--- a/src/plugins/cmakeprojectmanager/3rdparty/rstparser/rstparser.cc
+++ b/src/plugins/cmakeprojectmanager/3rdparty/rstparser/rstparser.cc
@@ -27,6 +27,7 @@

 #include "rstparser.h"

+#include <algorithm>
 #include <cctype>
 #include <cstring>

@@ -55,15 +56,15 @@ void rst::Parser::SkipSpace() {

 std::string rst::Parser::ParseDirectiveType() {
  const char *s = ptr_;
-  if (!std::isalnum(*s))
+  if (!std::isalnum(*s) && *s != '|')
    return std::string();
  for (;;) {
    ++s;
    if (std::isalnum(*s))
      continue;
    switch (*s) {
-    case '-': case '_': case '+': case ':': case '.':
-      if (std::isalnum(s[1])) {
+    case '-': case '_': case '+': case ':': case '.': case '|':
+      if (std::isalnum(s[1]) || (*s == '|' && IsSpace(s[1]))) {
        ++s;
        continue;
      }
@@ -91,13 +92,28 @@ void rst::Parser::EnterBlock(rst::BlockType &prev_type, rst::BlockType type) {
 void rst::Parser::ParseBlock(
    rst::BlockType type, rst::BlockType &prev_type, int indent) {
  std::string text;
+
+  struct InlineTags {
+    rst::BlockType type;
+    std::size_t pos {};
+    std::string text;
+    std::string type_string;
+  };
+  std::vector<InlineTags> inline_tags;
+
+  bool have_h1 = false;
  for (bool first = true; ; first = false) {
    const char *line_start = ptr_;
    if (!first) {
      // Check indentation.
      SkipSpace();
-      if (ptr_ - line_start != indent)
+      const int new_indent = ptr_ - line_start;
+      if (new_indent < indent)
        break;
+      // Restore the indent
+      if (new_indent > indent)
+        std::advance(ptr_, indent - new_indent);
+
      if (*ptr_ == '\n') {
        ++ptr_;
        break;  // Empty line ends the block.
@@ -119,9 +135,17 @@ void rst::Parser::ParseBlock(

    // Copy text converting all whitespace characters to spaces.
    text.reserve(end - line_start + 1);
-    if (!first)
+    if (!first && !have_h1)
      text.push_back('\n');
    enum {TAB_WIDTH = 8};
+
+    // Used the sections mapping from https://docs.anaconda.com/restructuredtext/index.html
+    struct {
+      BlockType type;
+      int count = 0;
+      char c = 0;
+    } hx[] = { {H1, 0, '=' }, {H2, 0, '='}, {H3, 0, '-'}, {H4, 0, '^'}, {H5, 0, '\"'}};
+
    for (const char *s = line_start; s != end; ++s) {
      char c = *s;
      if (c == '\t') {
@@ -129,10 +153,60 @@ void rst::Parser::ParseBlock(
            TAB_WIDTH - ((indent + s - line_start) % TAB_WIDTH));
      } else if (IsSpace(c)) {
        text.push_back(' ');
+      } else if (c == hx[0].c) {
+        ++hx[0].count;
+        ++hx[1].count;
+      } else if (c == hx[2].c) {
+        ++hx[2].count;
+      } else if (c == hx[3].c) {
+        ++hx[3].count;
+      } else if (c == hx[4].c) {
+        ++hx[4].count;
+      } else if (c == '`') {
+        std::string code_tag_text;
+        if (ParseCode(s, end - s, code_tag_text)) {
+          InlineTags code;
+          code.type = rst::CODE;
+          code.pos = text.size();
+          code.text = code_tag_text;
+          inline_tags.push_back(code);
+          const int tag_size = 4;
+          s = s + code_tag_text.size() + tag_size - 1;
+        } else {
+          text.push_back(*s);
+        }
+      } else if (c == ':') {
+        std::string link_type;
+        std::string link_text;
+        if (ParseReferenceLink(s, end - s, link_type, link_text)) {
+          InlineTags link;
+          link.type = rst::REFERENCE_LINK;
+          link.pos = text.size();
+          link.text = link_text;
+          link.type_string = link_type;
+          inline_tags.push_back(link);
+          const int tag_size = 4;
+          s = s + link_type.size() + link_text.size() + tag_size - 1;
+        } else {
+          text.push_back(*s);
+        }
      } else {
        text.push_back(*s);
      }
    }
+
+    for (int i = 0; i < 5; ++i) {
+      if (hx[i].count > 0 && hx[i].count == end - line_start) {
+        // h1 and h2 have the same underline character
+        // only if there was one ontop then is h1 otherwise h2
+        if (i == 0 && first)
+          have_h1 = true;
+        if ((i == 0 && !have_h1) || (i == 1 && have_h1))
+          continue;
+        type = hx[i].type;
+      }
+    }
+
    if (*ptr_ == '\n')
      ++ptr_;
  }
@@ -144,11 +218,35 @@ void rst::Parser::ParseBlock(
  bool literal = type == PARAGRAPH && EndsWith(text, "::");
  if (!literal || text.size() != 2) {
    std::size_t size = text.size();
+    if (size == 0 && inline_tags.size() == 0)
+      return;
+
    if (literal)
      --size;
    EnterBlock(prev_type, type);
    handler_->StartBlock(type);
+
+    if (inline_tags.size() == 0) {
      handler_->HandleText(text.c_str(), size);
+    } else {
+      std::size_t start = 0;
+      for (const InlineTags &in : inline_tags) {
+        if (in.pos > start)
+          handler_->HandleText(text.c_str() + start, in.pos - start);
+        if (in.type == rst::REFERENCE_LINK) {
+          handler_->HandleReferenceLink(in.type_string, in.text);
+        } else {
+          handler_->StartBlock(in.type);
+          handler_->HandleText(in.text.c_str(), in.text.size());
+          handler_->EndBlock();
+        }
+        start = in.pos;
+      }
+
+      if (start < size)
+        handler_->HandleText(text.c_str() + start, size - start);
+    }
+
    handler_->EndBlock();
  }
  if (literal) {
@@ -191,6 +289,58 @@ void rst::Parser::ParseLineBlock(rst::BlockType &prev_type, int indent) {
  handler_->EndBlock();
 }

+bool rst::Parser::ParseCode(const char *s, std::size_t size, std::string &code)
+{
+  // It requires at least four ticks ``text``
+  if (s[0] != '`' || s[1] != '`')
+    return false;
+
+  if (size < 4)
+    return false;
+
+  std::size_t start_pos = 2;
+  std::size_t end_pos = 0;
+  for (std::size_t i = start_pos; i < size - 1; ++i) {
+    if (s[i] == '`' && s[i + 1] == '`') {
+      end_pos = i;
+      break;
+    }
+  }
+
+  if (end_pos == 0)
+    return false;
+
+  code.assign(s + start_pos, end_pos - start_pos);
+
+  return true;
+}
+
+bool rst::Parser::ParseReferenceLink(const char *s, std::size_t size, std::string &type, std::string &text)
+{
+  // :type:`text`
+  if (size < 4)
+    return false;
+
+  auto start_type_tag = s + 1;
+  auto end_type_tag = std::find(start_type_tag, s + size, ':');
+  if (end_type_tag == s + size)
+    return false;
+
+  type.assign(start_type_tag, end_type_tag - start_type_tag);
+
+  if (*(end_type_tag + 1) != '`')
+    return false;
+
+  auto start_text_tag = end_type_tag + 2;
+  auto end_text_tag = std::find(start_text_tag, s + size, '`');
+  if (end_text_tag == s + size)
+    return false;
+
+  text.assign(start_text_tag, end_text_tag - start_text_tag);
+
+  return true;
+}
+
 void rst::Parser::Parse(const char *s) {
  BlockType prev_type = PARAGRAPH;
  ptr_ = s;
@@ -214,7 +364,28 @@ void rst::Parser::Parse(const char *s) {
        std::string type = ParseDirectiveType();
        if (!type.empty() && ptr_[0] == ':' && ptr_[1] == ':') {
          ptr_ += 2;
-          handler_->HandleDirective(type.c_str());
+
+          const char* after_directive = ptr_;
+
+          // Get the name of the directive
+          std::string name;
+          while (*ptr_ && *ptr_ != '\n') {
+            c = *ptr_++;
+            if (!IsSpace(c))
+              name.push_back(c);
+          }
+
+          // Special case for ".. note::" which can start directly after the ::
+          if (type == "note" && name.size() > 0) {
+            ptr_ = after_directive;
+            SkipSpace();
+            handler_->HandleDirective(type, "");
+
+            ParseBlock(BLOCK_QUOTE, prev_type, 0);
+            break;
+          }
+
+          handler_->HandleDirective(type, name);
        }
        // Skip everything till the end of the line.
        while (*ptr_ && *ptr_ != '\n')
--- a/src/plugins/cmakeprojectmanager/3rdparty/rstparser/rstparser.h
+++ b/src/plugins/cmakeprojectmanager/3rdparty/rstparser/rstparser.h
@@ -35,6 +35,13 @@
 namespace rst {

 enum BlockType {
+  H1,
+  H2,
+  H3,
+  H4,
+  H5,
+  CODE,
+  REFERENCE_LINK,
  PARAGRAPH,
  LINE_BLOCK,
  BLOCK_QUOTE,
@@ -58,7 +65,10 @@ class ContentHandler {
  virtual void HandleText(const char *text, std::size_t size) = 0;

  // Receives notification of a directive.
-  virtual void HandleDirective(const char *type) = 0;
+  virtual void HandleDirective(const std::string &type, const std::string &name) = 0;
+
+  // Receives notification of a link.
+  virtual void HandleReferenceLink(const std::string &type, const std::string &text) = 0;
 };

 // A parser for a subset of reStructuredText.
@@ -85,6 +95,12 @@ class Parser {
  // Parses a line block.
  void ParseLineBlock(rst::BlockType &prev_type, int indent);

+  // Parses inline ``code``
+  bool ParseCode(const char* s, std::size_t size, std::string &code);
+
+  // Parses :reference:`link`
+  bool ParseReferenceLink(const char* s, std::size_t size, std::string &type, std::string &text);
+
 public:
  explicit Parser(ContentHandler *h) : handler_(h), ptr_(0) {}

@@ -94,4 +110,3 @@ class Parser {
 }

 #endif  // RSTPARSER_H_
-