Files
qt-creator/shared/cpaster/cgi.cpp

428 lines
23 KiB
C++
Raw Normal View History

2008-12-02 12:01:29 +01:00
/***************************************************************************
**
** This file is part of Qt Creator
**
** Copyright (c) 2008 Nokia Corporation and/or its subsidiary(-ies).
**
** Contact: Qt Software Information (qt-info@nokia.com)
**
**
** Non-Open Source Usage
**
2008-12-02 12:01:29 +01:00
** Licensees may use this file in accordance with the Qt Beta Version
** License Agreement, Agreement version 2.2 provided with the Software or,
** alternatively, in accordance with the terms contained in a written
** agreement between you and Nokia.
**
** GNU General Public License Usage
**
2008-12-02 12:01:29 +01:00
** Alternatively, this file may be used under the terms of the GNU General
** Public License versions 2.0 or 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the packaging
** of this file. Please review the following information to ensure GNU
** General Public Licensing requirements will be met:
**
** http://www.fsf.org/licensing/licenses/info/GPLv2.html and
** http://www.gnu.org/copyleft/gpl.html.
**
** In addition, as a special exception, Nokia gives you certain additional
** rights. These rights are described in the Nokia Qt GPL Exception
** version 1.2, included in the file GPL_EXCEPTION.txt in this package.
**
***************************************************************************/
2008-12-02 12:01:29 +01:00
#include "cgi.h"
#include <QByteArray>
const char *cgi_chars = "0123456789abcdef"; // RFC 1738 suggests lower-case to be optimal
2008-12-02 12:01:29 +01:00
QString CGI::encodeURL(const QString &rawText)
{
QByteArray utf = rawText.toUtf8();
QString enc;
enc.reserve(utf.length()); // Make sure we at least have space for a normal US-ASCII URL
QByteArray::const_iterator it = utf.constBegin();
2008-12-09 11:07:24 +01:00
while (it != utf.constEnd()) {
2008-12-02 12:01:29 +01:00
char ch = *it;
if (('A' <= ch && ch <= 'Z')
|| ('a' <= ch && ch <= 'z')
|| ('0' <= ch && ch <= '9'))
enc.append(*it);
else if (ch == ' ')
enc.append('+');
else {
2008-12-09 11:07:24 +01:00
switch (ch) {
2008-12-02 12:01:29 +01:00
case '-': case '_':
case '(': case ')':
case '.': case '!':
case '~': case '*':
case '\'':
enc.append(ch);
break;
default:
ushort c1 = (*it & 0xF0) >> 4;
ushort c2 = (*it & 0x0F);
enc.append('%');
enc.append(QChar(*(cgi_chars + c1)));
enc.append(QChar(*(cgi_chars + c2)));
break;
}
}
++it;
}
return enc;
}
QString CGI::decodeURL(const QString &urlText)
{
QByteArray dec;
QString::const_iterator it = urlText.constBegin();
2008-12-09 11:07:24 +01:00
while (it != urlText.constEnd()) {
2008-12-02 12:01:29 +01:00
ushort ch = (*it).unicode();
2008-12-09 11:07:24 +01:00
switch (ch) {
2008-12-02 12:01:29 +01:00
case '%':
{
char c1 = char(0x00ff & (*(++it)).unicode());
char c2 = char(0x00ff & (*(++it)).unicode());
ushort v = 0;
2008-12-09 11:07:24 +01:00
if ('A' <= c1 && c1 <= 'Z')
2008-12-02 12:01:29 +01:00
v = c1 - 'A' + 10;
else if ('a' <= c1 && c1 <= 'z')
v = c1 - 'a' + 10;
else if ('0' <= c1 && c1 <= '9')
v = c1 - '0';
else
continue; // Malformed URL!
v <<= 4; // c1 was MSB half
2008-12-09 11:07:24 +01:00
if ('A' <= c2 && c2 <= 'Z')
2008-12-02 12:01:29 +01:00
v |= c2 - 'A' + 10;
else if ('a' <= c2 && c2 <= 'z')
v |= c2 - 'a' + 10;
else if ('0' <= c2 && c2 <= '9')
v |= c2 - '0';
else
continue; // Malformed URL!
dec.append((char)v);
}
break;
case '+':
dec.append(' ');
break;
default:
dec.append(*it);
break;
}
++it;
}
return QString::fromUtf8(dec.constData(), dec.length());
}
// -------------------------------------------------------------------------------------------------
inline const char *unicodeToHTML(ushort unicode_char)
{
2008-12-09 11:07:24 +01:00
switch (unicode_char) {
2008-12-02 12:01:29 +01:00
// Latin -------------------------------
case 0x0022: return "quot"; // (34 ) quotation mark = APL quote
case 0x0026: return "amp"; // (38 ) ampersand
case 0x003C: return "lt"; // (60 ) less-than sign
case 0x003E: return "gt"; // (62 ) greater-than sign
case 0x00A0: return "nbsp"; // (160 ) no-break space = non-breaking space
case 0x00A1: return "iexcl"; // (161 ) inverted exclamation mark
case 0x00A2: return "cent"; // (162 ) cent sign
case 0x00A3: return "pound"; // (163 ) pound sign
case 0x00A4: return "curren"; // (164 ) currency sign
case 0x00A5: return "yen"; // (165 ) yen sign = yuan sign
case 0x00A6: return "brvbar"; // (166 ) broken bar = broken vertical bar
case 0x00A7: return "sect"; // (167 ) section sign
case 0x00A8: return "uml"; // (168 ) diaeresis = spacing diaeresis
case 0x00A9: return "copy"; // (169 ) copyright sign
case 0x00AA: return "ordf"; // (170 ) feminine ordinal indicator
case 0x00AB: return "laquo"; // (171 ) left-pointing double angle quotation mark = left pointing guillemet
case 0x00AC: return "not"; // (172 ) not sign
case 0x00AD: return "shy"; // (173 ) soft hyphen = discretionary hyphen
case 0x00AE: return "reg"; // (174 ) registered sign = registered trade mark sign
case 0x00AF: return "macr"; // (175 ) macron = spacing macron = overline = APL overbar
case 0x00B0: return "deg"; // (176 ) degree sign
case 0x00B1: return "plusmn"; // (177 ) plus-minus sign = plus-or-minus sign
case 0x00B2: return "sup2"; // (178 ) superscript two = superscript digit two = squared
case 0x00B3: return "sup3"; // (179 ) superscript three = superscript digit three = cubed
case 0x00B4: return "acute"; // (180 ) acute accent = spacing acute
case 0x00B5: return "micro"; // (181 ) micro sign
case 0x00B6: return "para"; // (182 ) pilcrow sign = paragraph sign
case 0x00B7: return "middot"; // (183 ) middle dot = Georgian comma = Greek middle dot
case 0x00B8: return "cedil"; // (184 ) cedilla = spacing cedilla
case 0x00B9: return "sup1"; // (185 ) superscript one = superscript digit one
case 0x00BA: return "ordm"; // (186 ) masculine ordinal indicator
case 0x00BB: return "raquo"; // (187 ) right-pointing double angle quotation mark = right pointing guillemet
case 0x00BC: return "frac14"; // (188 ) vulgar fraction one quarter = fraction one quarter
case 0x00BD: return "frac12"; // (189 ) vulgar fraction one half = fraction one half
case 0x00BE: return "frac34"; // (190 ) vulgar fraction three quarters = fraction three quarters
case 0x00BF: return "iquest"; // (191 ) inverted question mark = turned question mark
case 0x00C0: return "Agrave"; // (192 ) capital letter A with grave = capital letter <20>
case 0x00C1: return "Aacute"; // (193 ) capital letter A with acute
case 0x00C2: return "Acirc"; // (194 ) capital letter A with circumflex
case 0x00C3: return "Atilde"; // (195 ) capital letter A with tilde
case 0x00C4: return "Auml"; // (196 ) capital letter A with diaeresis
case 0x00C5: return "Aring"; // (197 ) capital letter A with ring above = capital letter <20>
case 0x00C6: return "AElig"; // (198 ) capital letter AE = capital ligature <20>
case 0x00C7: return "Ccedil"; // (199 ) capital letter C with cedilla
case 0x00C8: return "Egrave"; // (200 ) capital letter E with grave
case 0x00C9: return "Eacute"; // (201 ) capital letter E with acute
case 0x00CA: return "Ecirc"; // (202 ) capital letter E with circumflex
case 0x00CB: return "Euml"; // (203 ) capital letter E with diaeresis
case 0x00CC: return "Igrave"; // (204 ) capital letter I with grave
case 0x00CD: return "Iacute"; // (205 ) capital letter I with acute
case 0x00CE: return "Icirc"; // (206 ) capital letter I with circumflex
case 0x00CF: return "Iuml"; // (207 ) capital letter I with diaeresis
case 0x00D0: return "ETH"; // (208 ) capital letter ETH
case 0x00D1: return "Ntilde"; // (209 ) capital letter N with tilde
case 0x00D2: return "Ograve"; // (210 ) capital letter O with grave
case 0x00D3: return "Oacute"; // (211 ) capital letter O with acute
case 0x00D4: return "Ocirc"; // (212 ) capital letter O with circumflex
case 0x00D5: return "Otilde"; // (213 ) capital letter O with tilde
case 0x00D6: return "Ouml"; // (214 ) capital letter O with diaeresis
case 0x00D7: return "times"; // (215 ) multiplication sign
case 0x00D8: return "Oslash"; // (216 ) capital letter O with stroke = capital letter <20>
case 0x00D9: return "Ugrave"; // (217 ) capital letter U with grave
case 0x00DA: return "Uacute"; // (218 ) capital letter U with acute
case 0x00DB: return "Ucirc"; // (219 ) capital letter U with circumflex
case 0x00DC: return "Uuml"; // (220 ) capital letter U with diaeresis
case 0x00DD: return "Yacute"; // (221 ) capital letter Y with acute
case 0x00DE: return "THORN"; // (222 ) capital letter THORN
case 0x00DF: return "szlig"; // (223 ) small letter sharp s = ess-zed
case 0x00E0: return "agrave"; // (224 ) small letter a with grave = small letter <20>
case 0x00E1: return "aacute"; // (225 ) small letter a with acute
case 0x00E2: return "acirc"; // (226 ) small letter a with circumflex
case 0x00E3: return "atilde"; // (227 ) small letter a with tilde
case 0x00E4: return "auml"; // (228 ) small letter a with diaeresis
case 0x00E5: return "aring"; // (229 ) small letter a with ring above = small letter <20>
case 0x00E6: return "aelig"; // (230 ) small letter ae = small letter <20>
case 0x00E7: return "ccedil"; // (231 ) small letter c with cedilla
case 0x00E8: return "egrave"; // (232 ) small letter e with grave
case 0x00E9: return "eacute"; // (233 ) small letter e with acute
case 0x00EA: return "ecirc"; // (234 ) small letter e with circumflex
case 0x00EB: return "euml"; // (235 ) small letter e with diaeresis
case 0x00EC: return "igrave"; // (236 ) small letter i with grave
case 0x00ED: return "iacute"; // (237 ) small letter i with acute
case 0x00EE: return "icirc"; // (238 ) small letter i with circumflex
case 0x00EF: return "iuml"; // (239 ) small letter i with diaeresis
case 0x00F0: return "eth"; // (240 ) small letter eth
case 0x00F1: return "ntilde"; // (241 ) small letter n with tilde
case 0x00F2: return "ograve"; // (242 ) small letter o with grave
case 0x00F3: return "oacute"; // (243 ) small letter o with acute
case 0x00F4: return "ocirc"; // (244 ) small letter o with circumflex
case 0x00F5: return "otilde"; // (245 ) small letter o with tilde
case 0x00F6: return "ouml"; // (246 ) small letter o with diaeresis
case 0x00F7: return "divide"; // (247 ) division sign
case 0x00F8: return "oslash"; // (248 ) small letter o with stroke = small letter <20>
case 0x00F9: return "ugrave"; // (249 ) small letter u with grave
case 0x00FA: return "uacute"; // (250 ) small letter u with acute
case 0x00FB: return "ucirc"; // (251 ) small letter u with circumflex
case 0x00FC: return "uuml"; // (252 ) small letter u with diaeresis
case 0x00FD: return "yacute"; // (253 ) small letter y with acute
case 0x00FE: return "thorn"; // (254 ) small letter thorn
case 0x00FF: return "yuml"; // (255 ) small letter y with diaeresis
case 0x0152: return "OElig"; // (338 ) capital ligature OE
case 0x0153: return "oelig"; // (339 ) small ligature oe
case 0x0160: return "Scaron"; // (352 ) capital letter S with caron
case 0x0161: return "scaron"; // (353 ) small letter s with caron
case 0x0178: return "Yuml"; // (376 ) capital letter Y with diaeresis
case 0x0192: return "fnof"; // (402 ) small f with hook = function = florin
case 0x02C6: return "circ"; // (710 ) modifier letter circumflex accent
case 0x02DC: return "tilde"; // (732 ) small tilde
// Greek -------------------------------
case 0x0391: return "Alpha"; // (913 ) capital letter alpha
case 0x0392: return "Beta"; // (914 ) capital letter beta
case 0x0393: return "Gamma"; // (915 ) capital letter gamma
case 0x0394: return "Delta"; // (916 ) capital letter delta
case 0x0395: return "Epsilon"; // (917 ) capital letter epsilon
case 0x0396: return "Zeta"; // (918 ) capital letter zeta
case 0x0397: return "Eta"; // (919 ) capital letter eta
case 0x0398: return "Theta"; // (920 ) capital letter theta
case 0x0399: return "Iota"; // (921 ) capital letter iota
case 0x039A: return "Kappa"; // (922 ) capital letter kappa
case 0x039B: return "Lambda"; // (923 ) capital letter lambda
case 0x039C: return "Mu"; // (924 ) capital letter mu
case 0x039D: return "Nu"; // (925 ) capital letter nu
case 0x039E: return "Xi"; // (926 ) capital letter xi
case 0x039F: return "Omicron"; // (927 ) capital letter omicron
case 0x03A0: return "Pi"; // (928 ) capital letter pi
case 0x03A1: return "Rho"; // (929 ) capital letter rho
case 0x03A3: return "Sigma"; // (931 ) capital letter sigma
case 0x03A4: return "Tau"; // (932 ) capital letter tau
case 0x03A5: return "Upsilon"; // (933 ) capital letter upsilon
case 0x03A6: return "Phi"; // (934 ) capital letter phi
case 0x03A7: return "Chi"; // (935 ) capital letter chi
case 0x03A8: return "Psi"; // (936 ) capital letter psi
case 0x03A9: return "Omega"; // (937 ) capital letter omega
case 0x03B1: return "alpha"; // (945 ) small letter alpha
case 0x03B2: return "beta"; // (946 ) small letter beta
case 0x03B3: return "gamma"; // (947 ) small letter gamma
case 0x03B4: return "delta"; // (948 ) small letter delta
case 0x03B5: return "epsilon"; // (949 ) small letter epsilon
case 0x03B6: return "zeta"; // (950 ) small letter zeta
case 0x03B7: return "eta"; // (951 ) small letter eta
case 0x03B8: return "theta"; // (952 ) small letter theta
case 0x03B9: return "iota"; // (953 ) small letter iota
case 0x03BA: return "kappa"; // (954 ) small letter kappa
case 0x03BB: return "lambda"; // (955 ) small letter lambda
case 0x03BC: return "mu"; // (956 ) small letter mu
case 0x03BD: return "nu"; // (957 ) small letter nu
case 0x03BE: return "xi"; // (958 ) small letter xi
case 0x03BF: return "omicron"; // (959 ) small letter omicron
case 0x03C0: return "pi"; // (960 ) small letter pi
case 0x03C1: return "rho"; // (961 ) small letter rho
case 0x03C2: return "sigmaf"; // (962 ) small letter final sigma
case 0x03C3: return "sigma"; // (963 ) small letter sigma
case 0x03C4: return "tau"; // (964 ) small letter tau
case 0x03C5: return "upsilon"; // (965 ) small letter upsilon
case 0x03C6: return "phi"; // (966 ) small letter phi
case 0x03C7: return "chi"; // (967 ) small letter chi
case 0x03C8: return "psi"; // (968 ) small letter psi
case 0x03C9: return "omega"; // (969 ) small letter omega
case 0x03D1: return "thetasym";// (977 ) small letter theta symbol
case 0x03D2: return "upsih"; // (978 ) upsilon with hook symbol
case 0x03D6: return "piv"; // (982 ) pi symbol
// General Punctuation -----------------
case 0x2002: return "ensp"; // (8194) en space
case 0x2003: return "emsp"; // (8195) em space
case 0x2009: return "thinsp"; // (8201) thin space
case 0x200C: return "zwnj"; // (8204) zero width non-joiner
case 0x200D: return "zwj"; // (8205) zero width joiner
case 0x200E: return "lrm"; // (8206) left-to-right mark
case 0x200F: return "rlm"; // (8207) right-to-left mark
case 0x2013: return "ndash"; // (8211) en dash
case 0x2014: return "mdash"; // (8212) em dash
case 0x2018: return "lsquo"; // (8216) left single quotation mark
case 0x2019: return "rsquo"; // (8217) right single quotation mark
case 0x201A: return "sbquo"; // (8218) single low-9 quotation mark
case 0x201C: return "ldquo"; // (8220) left double quotation mark
case 0x201D: return "rdquo"; // (8221) right double quotation mark
case 0x201E: return "bdquo"; // (8222) double low-9 quotation mark
case 0x2020: return "dagger"; // (8224) dagger
case 0x2021: return "Dagger"; // (8225) double dagger
case 0x2022: return "bull"; // (8226) bullet = black small circle
case 0x2026: return "hellip"; // (8230) horizontal ellipsis = three dot leader
case 0x2030: return "permil"; // (8240) per mille sign
case 0x2032: return "prime"; // (8242) prime = minutes = feet
case 0x2033: return "Prime"; // (8243) double prime = seconds = inches
case 0x2039: return "lsaquo"; // (8249) single left-pointing angle quotation mark
case 0x203A: return "rsaquo"; // (8250) single right-pointing angle quotation mark
case 0x203E: return "oline"; // (8254) overline = spacing overscore
case 0x2044: return "frasl"; // (8260) fraction slash
// Currency Symbols --------------------
case 0x20AC: return "euro"; // (8364) euro sign
// Letterlike Symbols ------------------
case 0x2111: return "image"; // (8465) blackletter capital I = imaginary part
case 0x2118: return "weierp"; // (8472) script capital P = power set = Weierstrass p
case 0x211C: return "real"; // (8476) blackletter capital R = real part symbol
case 0x2122: return "trade"; // (8482) trade mark sign
case 0x2135: return "alefsym"; // (8501) alef symbol = first transfinite cardinal
// Arrows ------------------------------
case 0x2190: return "larr"; // (8592) leftwards arrow
case 0x2191: return "uarr"; // (8593) upwards arrow
case 0x2192: return "rarr"; // (8594) rightwards arrow
case 0x2193: return "darr"; // (8595) downwards arrow
case 0x2194: return "harr"; // (8596) left right arrow
case 0x21B5: return "crarr"; // (8629) downwards arrow with corner leftwards = carriage return
case 0x21D0: return "lArr"; // (8656) leftwards double arrow
case 0x21D1: return "uArr"; // (8657) upwards double arrow
case 0x21D2: return "rArr"; // (8658) rightwards double arrow
case 0x21D3: return "dArr"; // (8659) downwards double arrow
case 0x21D4: return "hArr"; // (8660) left right double arrow
// Mathematical Operators --------------
case 0x2200: return "forall"; // (8704) for all
case 0x2202: return "part"; // (8706) partial differential
case 0x2203: return "exist"; // (8707) there exists
case 0x2205: return "empty"; // (8709) empty set = null set = diameter
case 0x2207: return "nabla"; // (8711) nabla = backward difference
case 0x2208: return "isin"; // (8712) element of
case 0x2209: return "notin"; // (8713) not an element of
case 0x220B: return "ni"; // (8715) contains as member
case 0x220F: return "prod"; // (8719) n-ary product = product sign
case 0x2211: return "sum"; // (8721) n-ary sumation
case 0x2212: return "minus"; // (8722) minus sign
case 0x2217: return "lowast"; // (8727) asterisk operator
case 0x221A: return "radic"; // (8730) square root = radical sign
case 0x221D: return "prop"; // (8733) proportional to
case 0x221E: return "infin"; // (8734) infinity
case 0x2220: return "ang"; // (8736) angle
case 0x2227: return "and"; // (8743) logical and = wedge
case 0x2228: return "or"; // (8744) logical or = vee
case 0x2229: return "cap"; // (8745) intersection = cap
case 0x222A: return "cup"; // (8746) union = cup
case 0x222B: return "int"; // (8747) integral
case 0x2234: return "there4"; // (8756) therefore
case 0x223C: return "sim"; // (8764) tilde operator = varies with = similar to
case 0x2245: return "cong"; // (8773) approximately equal to
case 0x2248: return "asymp"; // (8776) almost equal to = asymptotic to
case 0x2260: return "ne"; // (8800) not equal to
case 0x2261: return "equiv"; // (8801) identical to
case 0x2264: return "le"; // (8804) less-than or equal to
case 0x2265: return "ge"; // (8805) greater-than or equal to
case 0x2282: return "sub"; // (8834) subset of
case 0x2283: return "sup"; // (8835) superset of
case 0x2284: return "nsub"; // (8836) not a subset of
case 0x2286: return "sube"; // (8838) subset of or equal to
case 0x2287: return "supe"; // (8839) superset of or equal to
case 0x2295: return "oplus"; // (8853) circled plus = direct sum
case 0x2297: return "otimes"; // (8855) circled times = vector product
case 0x22A5: return "perp"; // (8869) up tack = orthogonal to = perpendicular
case 0x22C5: return "sdot"; // (8901) dot operator
// Miscellaneous Technical -------------
case 0x2308: return "lceil"; // (8968) left ceiling = apl upstile
case 0x2309: return "rceil"; // (8969) right ceiling
case 0x230A: return "lfloor"; // (8970) left floor = apl downstile
case 0x230B: return "rfloor"; // (8971) right floor
case 0x2329: return "lang"; // (9001) left-pointing angle bracket = bra
case 0x232A: return "rang"; // (9002) right-pointing angle bracket = ket
// Geometric Shapes --------------------
case 0x25CA: return "loz"; // (9674) lozenge
// Miscellaneous Symbols ---------------
case 0x2660: return "spades"; // (9824) black spade suit
case 0x2663: return "clubs"; // (9827) black club suit = shamrock
case 0x2665: return "hearts"; // (9829) black heart suit = valentine
case 0x2666: return "diams"; // (9830) black diamond suit
default: break;
}
return 0;
}
QString CGI::encodeHTML(const QString &rawText, int conversionFlags)
{
QString enc;
enc.reserve(rawText.length()); // at least
QString::const_iterator it = rawText.constBegin();
while (it != rawText.constEnd()) {
const char *html = unicodeToHTML((*it).unicode());
if (html) {
enc.append('&');
enc.append(html);
enc.append(';');
} else if ((conversionFlags & CGI::LineBreaks)
&& ((*it).toLatin1() == '\n')) {
enc.append("<BR>\n");
} else if ((conversionFlags & CGI::Spaces)
&& ((*it).toLatin1() == ' ')) {
enc.append("&nbsp;");
} else if ((conversionFlags & CGI::Tabs)
&& ((*it).toLatin1() == '\t')) {
enc.append("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;");
} else if ((*it).unicode() > 0x00FF) {
enc.append("&#");
enc.append(QString::number((*it).unicode()));
enc.append(';');
} else {
enc.append(*it);
}
++it;
}
return enc;
}