forked from qt-creator/qt-creator
CMakePM: Add missing features to RSTParser
To be able to parse the rst help files from CMake Change-Id: Ibec21e8571324276d2080f81728b1268581601d0 Reviewed-by: Alessandro Portale <alessandro.portale@qt.io>
This commit is contained in:
@@ -48,6 +48,27 @@ class TestHandler : public rst::ContentHandler {
|
||||
void StartBlock(rst::BlockType type) {
|
||||
std::string tag;
|
||||
switch (type) {
|
||||
case rst::REFERENCE_LINK:
|
||||
// not used, HandleReferenceLink is used instead
|
||||
break;
|
||||
case rst::H1:
|
||||
tag = "h1";
|
||||
break;
|
||||
case rst::H2:
|
||||
tag = "h2";
|
||||
break;
|
||||
case rst::H3:
|
||||
tag = "h3";
|
||||
break;
|
||||
case rst::H4:
|
||||
tag = "h4";
|
||||
break;
|
||||
case rst::H5:
|
||||
tag = "h5";
|
||||
break;
|
||||
case rst::CODE:
|
||||
tag = "code";
|
||||
break;
|
||||
case rst::PARAGRAPH:
|
||||
tag = "p";
|
||||
break;
|
||||
@@ -80,8 +101,12 @@ class TestHandler : public rst::ContentHandler {
|
||||
content_.append(text, size);
|
||||
}
|
||||
|
||||
void HandleDirective(const char *type) {
|
||||
content_ += std::string("<") + type + " />";
|
||||
void HandleDirective(const std::string &type, const std::string &name) {
|
||||
content_ += std::string("<div class=\"") + name + "\">" + type + "</div>";
|
||||
}
|
||||
|
||||
void HandleReferenceLink(const std::string &type, const std::string &text) {
|
||||
content_ += std::string("<a href=\"#") + type + "\">" + text + "</a>";
|
||||
}
|
||||
};
|
||||
|
||||
@@ -93,6 +118,14 @@ std::string Parse(const char *s) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(ParserTest, HX) {
|
||||
EXPECT_EQ("<h1>test</h1>", Parse("====\ntest\n===="));
|
||||
EXPECT_EQ("<h2>test</h2>", Parse("test\n===="));
|
||||
EXPECT_EQ("<h3>test</h3>", Parse("test\n----"));
|
||||
EXPECT_EQ("<h4>test</h4>", Parse("test\n^^^^"));
|
||||
EXPECT_EQ("<h5>test</h5>", Parse("test\n\"\"\"\""));
|
||||
}
|
||||
|
||||
TEST(ParserTest, Paragraph) {
|
||||
EXPECT_EQ("<p>test</p>", Parse("test"));
|
||||
EXPECT_EQ("<p>test</p>", Parse("\ntest"));
|
||||
@@ -143,6 +176,14 @@ TEST(ParserTest, Literal) {
|
||||
EXPECT_EQ("<p>::\nabc\ndef</p>", Parse("::\nabc\ndef"));
|
||||
}
|
||||
|
||||
TEST(ParserTest, InlineCode) {
|
||||
EXPECT_EQ("<p><code>code</code></p>", Parse("``code``"));
|
||||
EXPECT_EQ("<p>`code``</p>", Parse("`code``"));
|
||||
EXPECT_EQ("<p>some <code>code</code></p>", Parse("some ``code``"));
|
||||
EXPECT_EQ("<p><code>code</code> some</p>", Parse("``code`` some"));
|
||||
EXPECT_EQ("<p>some <code>code</code> and more</p>", Parse("some ``code`` and more"));
|
||||
}
|
||||
|
||||
TEST(ParserTest, Comment) {
|
||||
EXPECT_EQ("", Parse(".."));
|
||||
EXPECT_EQ("", Parse("..\n"));
|
||||
@@ -151,11 +192,49 @@ TEST(ParserTest, Comment) {
|
||||
}
|
||||
|
||||
TEST(ParserTest, Directive) {
|
||||
EXPECT_EQ("<test />", Parse(".. test::"));
|
||||
EXPECT_EQ("<test />", Parse(".. test::"));
|
||||
EXPECT_EQ("<test />", Parse("..\ttest::"));
|
||||
EXPECT_EQ("<div class=\"\">test</div>", Parse(".. test::"));
|
||||
EXPECT_EQ("<div class=\"name\">test</div>", Parse(".. test:: name"));
|
||||
EXPECT_EQ("<div class=\"\">test</div>", Parse(".. test::"));
|
||||
EXPECT_EQ("<div class=\"\">test</div>", Parse("..\ttest::"));
|
||||
|
||||
EXPECT_EQ("<div class=\"to-text\">|from-text| replace</div>", Parse(".. |from-text| replace:: to-text"));
|
||||
|
||||
std::string rst =
|
||||
R"(.. code-block:: c++
|
||||
int main() {
|
||||
if (false)
|
||||
return 1;
|
||||
return 0;
|
||||
})";
|
||||
|
||||
std::string html =
|
||||
R"(<div class="c++">code-block</div><blockquote>int main() {
|
||||
if (false)
|
||||
return 1;
|
||||
return 0;
|
||||
}</blockquote>)";
|
||||
|
||||
EXPECT_EQ(html, Parse(rst.c_str()));
|
||||
|
||||
rst =
|
||||
R"(.. note:: This is a cool
|
||||
note. Such a cool note.)";
|
||||
|
||||
html =
|
||||
R"(<div class="">note</div><blockquote>This is a cool
|
||||
note. Such a cool note.</blockquote>)";
|
||||
|
||||
EXPECT_EQ(html, Parse(rst.c_str()));
|
||||
}
|
||||
|
||||
TEST(ParserTest, ReferenceLinks) {
|
||||
EXPECT_EQ("<p><a href=\"#ref\">info</a></p>", Parse(":ref:`info`"));
|
||||
EXPECT_EQ("<p>some <a href=\"#ref\">info</a></p>", Parse("some :ref:`info`"));
|
||||
EXPECT_EQ("<p>some <a href=\"#ref\">info</a> and more</p>", Parse("some :ref:`info` and more"));
|
||||
EXPECT_EQ("<p><a href=\"#ref\">info</a>.</p>", Parse(":ref:`info`."));
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
#ifdef _WIN32
|
||||
// Disable message boxes on assertion failures.
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
|
||||
#include "rstparser.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <cstring>
|
||||
|
||||
@@ -55,15 +56,15 @@ void rst::Parser::SkipSpace() {
|
||||
|
||||
std::string rst::Parser::ParseDirectiveType() {
|
||||
const char *s = ptr_;
|
||||
if (!std::isalnum(*s))
|
||||
if (!std::isalnum(*s) && *s != '|')
|
||||
return std::string();
|
||||
for (;;) {
|
||||
++s;
|
||||
if (std::isalnum(*s))
|
||||
continue;
|
||||
switch (*s) {
|
||||
case '-': case '_': case '+': case ':': case '.':
|
||||
if (std::isalnum(s[1])) {
|
||||
case '-': case '_': case '+': case ':': case '.': case '|':
|
||||
if (std::isalnum(s[1]) || (*s == '|' && IsSpace(s[1]))) {
|
||||
++s;
|
||||
continue;
|
||||
}
|
||||
@@ -91,13 +92,28 @@ void rst::Parser::EnterBlock(rst::BlockType &prev_type, rst::BlockType type) {
|
||||
void rst::Parser::ParseBlock(
|
||||
rst::BlockType type, rst::BlockType &prev_type, int indent) {
|
||||
std::string text;
|
||||
|
||||
struct InlineTags {
|
||||
rst::BlockType type;
|
||||
std::size_t pos {};
|
||||
std::string text;
|
||||
std::string type_string;
|
||||
};
|
||||
std::vector<InlineTags> inline_tags;
|
||||
|
||||
bool have_h1 = false;
|
||||
for (bool first = true; ; first = false) {
|
||||
const char *line_start = ptr_;
|
||||
if (!first) {
|
||||
// Check indentation.
|
||||
SkipSpace();
|
||||
if (ptr_ - line_start != indent)
|
||||
const int new_indent = ptr_ - line_start;
|
||||
if (new_indent < indent)
|
||||
break;
|
||||
// Restore the indent
|
||||
if (new_indent > indent)
|
||||
std::advance(ptr_, indent - new_indent);
|
||||
|
||||
if (*ptr_ == '\n') {
|
||||
++ptr_;
|
||||
break; // Empty line ends the block.
|
||||
@@ -119,9 +135,17 @@ void rst::Parser::ParseBlock(
|
||||
|
||||
// Copy text converting all whitespace characters to spaces.
|
||||
text.reserve(end - line_start + 1);
|
||||
if (!first)
|
||||
if (!first && !have_h1)
|
||||
text.push_back('\n');
|
||||
enum {TAB_WIDTH = 8};
|
||||
|
||||
// Used the sections mapping from https://docs.anaconda.com/restructuredtext/index.html
|
||||
struct {
|
||||
BlockType type;
|
||||
int count = 0;
|
||||
char c = 0;
|
||||
} hx[] = { {H1, 0, '=' }, {H2, 0, '='}, {H3, 0, '-'}, {H4, 0, '^'}, {H5, 0, '\"'}};
|
||||
|
||||
for (const char *s = line_start; s != end; ++s) {
|
||||
char c = *s;
|
||||
if (c == '\t') {
|
||||
@@ -129,10 +153,60 @@ void rst::Parser::ParseBlock(
|
||||
TAB_WIDTH - ((indent + s - line_start) % TAB_WIDTH));
|
||||
} else if (IsSpace(c)) {
|
||||
text.push_back(' ');
|
||||
} else if (c == hx[0].c) {
|
||||
++hx[0].count;
|
||||
++hx[1].count;
|
||||
} else if (c == hx[2].c) {
|
||||
++hx[2].count;
|
||||
} else if (c == hx[3].c) {
|
||||
++hx[3].count;
|
||||
} else if (c == hx[4].c) {
|
||||
++hx[4].count;
|
||||
} else if (c == '`') {
|
||||
std::string code_tag_text;
|
||||
if (ParseCode(s, end - s, code_tag_text)) {
|
||||
InlineTags code;
|
||||
code.type = rst::CODE;
|
||||
code.pos = text.size();
|
||||
code.text = code_tag_text;
|
||||
inline_tags.push_back(code);
|
||||
const int tag_size = 4;
|
||||
s = s + code_tag_text.size() + tag_size - 1;
|
||||
} else {
|
||||
text.push_back(*s);
|
||||
}
|
||||
} else if (c == ':') {
|
||||
std::string link_type;
|
||||
std::string link_text;
|
||||
if (ParseReferenceLink(s, end - s, link_type, link_text)) {
|
||||
InlineTags link;
|
||||
link.type = rst::REFERENCE_LINK;
|
||||
link.pos = text.size();
|
||||
link.text = link_text;
|
||||
link.type_string = link_type;
|
||||
inline_tags.push_back(link);
|
||||
const int tag_size = 4;
|
||||
s = s + link_type.size() + link_text.size() + tag_size - 1;
|
||||
} else {
|
||||
text.push_back(*s);
|
||||
}
|
||||
} else {
|
||||
text.push_back(*s);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
if (hx[i].count > 0 && hx[i].count == end - line_start) {
|
||||
// h1 and h2 have the same underline character
|
||||
// only if there was one ontop then is h1 otherwise h2
|
||||
if (i == 0 && first)
|
||||
have_h1 = true;
|
||||
if ((i == 0 && !have_h1) || (i == 1 && have_h1))
|
||||
continue;
|
||||
type = hx[i].type;
|
||||
}
|
||||
}
|
||||
|
||||
if (*ptr_ == '\n')
|
||||
++ptr_;
|
||||
}
|
||||
@@ -144,11 +218,35 @@ void rst::Parser::ParseBlock(
|
||||
bool literal = type == PARAGRAPH && EndsWith(text, "::");
|
||||
if (!literal || text.size() != 2) {
|
||||
std::size_t size = text.size();
|
||||
if (size == 0 && inline_tags.size() == 0)
|
||||
return;
|
||||
|
||||
if (literal)
|
||||
--size;
|
||||
EnterBlock(prev_type, type);
|
||||
handler_->StartBlock(type);
|
||||
|
||||
if (inline_tags.size() == 0) {
|
||||
handler_->HandleText(text.c_str(), size);
|
||||
} else {
|
||||
std::size_t start = 0;
|
||||
for (const InlineTags &in : inline_tags) {
|
||||
if (in.pos > start)
|
||||
handler_->HandleText(text.c_str() + start, in.pos - start);
|
||||
if (in.type == rst::REFERENCE_LINK) {
|
||||
handler_->HandleReferenceLink(in.type_string, in.text);
|
||||
} else {
|
||||
handler_->StartBlock(in.type);
|
||||
handler_->HandleText(in.text.c_str(), in.text.size());
|
||||
handler_->EndBlock();
|
||||
}
|
||||
start = in.pos;
|
||||
}
|
||||
|
||||
if (start < size)
|
||||
handler_->HandleText(text.c_str() + start, size - start);
|
||||
}
|
||||
|
||||
handler_->EndBlock();
|
||||
}
|
||||
if (literal) {
|
||||
@@ -191,6 +289,58 @@ void rst::Parser::ParseLineBlock(rst::BlockType &prev_type, int indent) {
|
||||
handler_->EndBlock();
|
||||
}
|
||||
|
||||
bool rst::Parser::ParseCode(const char *s, std::size_t size, std::string &code)
|
||||
{
|
||||
// It requires at least four ticks ``text``
|
||||
if (s[0] != '`' || s[1] != '`')
|
||||
return false;
|
||||
|
||||
if (size < 4)
|
||||
return false;
|
||||
|
||||
std::size_t start_pos = 2;
|
||||
std::size_t end_pos = 0;
|
||||
for (std::size_t i = start_pos; i < size - 1; ++i) {
|
||||
if (s[i] == '`' && s[i + 1] == '`') {
|
||||
end_pos = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (end_pos == 0)
|
||||
return false;
|
||||
|
||||
code.assign(s + start_pos, end_pos - start_pos);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool rst::Parser::ParseReferenceLink(const char *s, std::size_t size, std::string &type, std::string &text)
|
||||
{
|
||||
// :type:`text`
|
||||
if (size < 4)
|
||||
return false;
|
||||
|
||||
auto start_type_tag = s + 1;
|
||||
auto end_type_tag = std::find(start_type_tag, s + size, ':');
|
||||
if (end_type_tag == s + size)
|
||||
return false;
|
||||
|
||||
type.assign(start_type_tag, end_type_tag - start_type_tag);
|
||||
|
||||
if (*(end_type_tag + 1) != '`')
|
||||
return false;
|
||||
|
||||
auto start_text_tag = end_type_tag + 2;
|
||||
auto end_text_tag = std::find(start_text_tag, s + size, '`');
|
||||
if (end_text_tag == s + size)
|
||||
return false;
|
||||
|
||||
text.assign(start_text_tag, end_text_tag - start_text_tag);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void rst::Parser::Parse(const char *s) {
|
||||
BlockType prev_type = PARAGRAPH;
|
||||
ptr_ = s;
|
||||
@@ -214,7 +364,28 @@ void rst::Parser::Parse(const char *s) {
|
||||
std::string type = ParseDirectiveType();
|
||||
if (!type.empty() && ptr_[0] == ':' && ptr_[1] == ':') {
|
||||
ptr_ += 2;
|
||||
handler_->HandleDirective(type.c_str());
|
||||
|
||||
const char* after_directive = ptr_;
|
||||
|
||||
// Get the name of the directive
|
||||
std::string name;
|
||||
while (*ptr_ && *ptr_ != '\n') {
|
||||
c = *ptr_++;
|
||||
if (!IsSpace(c))
|
||||
name.push_back(c);
|
||||
}
|
||||
|
||||
// Special case for ".. note::" which can start directly after the ::
|
||||
if (type == "note" && name.size() > 0) {
|
||||
ptr_ = after_directive;
|
||||
SkipSpace();
|
||||
handler_->HandleDirective(type, "");
|
||||
|
||||
ParseBlock(BLOCK_QUOTE, prev_type, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
handler_->HandleDirective(type, name);
|
||||
}
|
||||
// Skip everything till the end of the line.
|
||||
while (*ptr_ && *ptr_ != '\n')
|
||||
|
||||
@@ -35,6 +35,13 @@
|
||||
namespace rst {
|
||||
|
||||
enum BlockType {
|
||||
H1,
|
||||
H2,
|
||||
H3,
|
||||
H4,
|
||||
H5,
|
||||
CODE,
|
||||
REFERENCE_LINK,
|
||||
PARAGRAPH,
|
||||
LINE_BLOCK,
|
||||
BLOCK_QUOTE,
|
||||
@@ -58,7 +65,10 @@ class ContentHandler {
|
||||
virtual void HandleText(const char *text, std::size_t size) = 0;
|
||||
|
||||
// Receives notification of a directive.
|
||||
virtual void HandleDirective(const char *type) = 0;
|
||||
virtual void HandleDirective(const std::string &type, const std::string &name) = 0;
|
||||
|
||||
// Receives notification of a link.
|
||||
virtual void HandleReferenceLink(const std::string &type, const std::string &text) = 0;
|
||||
};
|
||||
|
||||
// A parser for a subset of reStructuredText.
|
||||
@@ -85,6 +95,12 @@ class Parser {
|
||||
// Parses a line block.
|
||||
void ParseLineBlock(rst::BlockType &prev_type, int indent);
|
||||
|
||||
// Parses inline ``code``
|
||||
bool ParseCode(const char* s, std::size_t size, std::string &code);
|
||||
|
||||
// Parses :reference:`link`
|
||||
bool ParseReferenceLink(const char* s, std::size_t size, std::string &type, std::string &text);
|
||||
|
||||
public:
|
||||
explicit Parser(ContentHandler *h) : handler_(h), ptr_(0) {}
|
||||
|
||||
@@ -94,4 +110,3 @@ class Parser {
|
||||
}
|
||||
|
||||
#endif // RSTPARSER_H_
|
||||
|
||||
|
||||
Reference in New Issue
Block a user