• Skip to content
  • Skip to link menu
  • KDE API Reference
  • kdelibs-4.8.3 API Reference
  • KDE Home
  • Contact Us
 

KIOSlave

parsinghelpers.cpp
Go to the documentation of this file.
00001 /* This file is part of the KDE libraries
00002     Copyright (C) 2008 Andreas Hartmetz <ahartmetz@gmail.com>
00003     Copyright (C) 2010,2011 Rolf Eike Beer <kde@opensource.sf-tec.de>
00004 
00005     This library is free software; you can redistribute it and/or
00006     modify it under the terms of the GNU Library General Public
00007     License as published by the Free Software Foundation; either
00008     version 2 of the License, or (at your option) any later version.
00009 
00010     This library is distributed in the hope that it will be useful,
00011     but WITHOUT ANY WARRANTY; without even the implied warranty of
00012     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013     Library General Public License for more details.
00014 
00015     You should have received a copy of the GNU Library General Public License
00016     along with this library; see the file COPYING.LIB.  If not, write to
00017     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00018     Boston, MA 02110-1301, USA.
00019 */
00020 
00021 #include <ctype.h>
00022 
00023 #include <QDir>
00024 #include <QMap>
00025 #include <QTextCodec>
00026 #include <QUrl>
00027 
00028 #include <kcodecs.h>
00029 #include <kdebug.h>
00030 
00031 // Advance *pos beyond spaces / tabs
00032 static void skipSpace(const char input[], int *pos, int end)
00033 {
00034     int idx = *pos;
00035     while (idx < end && (input[idx] == ' ' || input[idx] == '\t')) {
00036         idx++;
00037     }
00038     *pos = idx;
00039     return;
00040 }
00041 
00042 // Advance *pos to start of next line while being forgiving about line endings.
00043 // Return false if the end of the header has been reached, true otherwise.
00044 static bool nextLine(const char input[], int *pos, int end)
00045 {
00046     int idx = *pos;
00047     while (idx < end && input[idx] != '\r' && input[idx] != '\n') {
00048         idx++;
00049     }
00050     int rCount = 0;
00051     int nCount = 0;
00052     while (idx < end && qMax(rCount, nCount) < 2 && (input[idx] == '\r' || input[idx] == '\n')) {
00053         input[idx] == '\r' ? rCount++ : nCount++;
00054         idx++;
00055     }
00056     if (idx < end && qMax(rCount, nCount) == 2 && qMin(rCount, nCount) == 1) {
00057         // if just one of the others is missing eat it too.
00058         // this ensures that conforming headers using the proper
00059         // \r\n sequence (and also \n\r) will be parsed correctly.
00060         if ((rCount == 1 && input[idx] == '\r') || (nCount == 1 && input[idx] == '\n')) {
00061             idx++;
00062         }
00063     }
00064 
00065     *pos = idx;
00066     return idx < end && rCount < 2 && nCount < 2;
00067 }
00068 
00069 // QByteArray::fromPercentEncoding() does not notify us about encoding errors so we need
00070 // to check here if this is valid at all.
00071 static bool isValidPercentEncoding(const QByteArray &data)
00072 {
00073     int i = 0;
00074     const int last = data.length() - 1;
00075     const char *d = data.constData();
00076 
00077     while ( (i = data.indexOf('%', i)) != -1) {
00078         if ( i >= last - 2 )
00079             return false;
00080         if ( ! isxdigit(d[i + 1]) )
00081             return false;
00082         if ( ! isxdigit(d[i + 2]) )
00083             return false;
00084         i++;
00085     }
00086 
00087     return true;
00088 }
00089 
00090 QByteArray TokenIterator::next()
00091 {
00092     QPair<int, int> token = m_tokens[m_currentToken++];
00093     //fromRawData brings some speed advantage but also the requirement to keep the text buffer
00094     //around. this together with implicit sharing (you don't know where copies end up)
00095     //is dangerous!
00096     //return QByteArray::fromRawData(&m_buffer[token.first], token.second - token.first);
00097     return QByteArray(&m_buffer[token.first], token.second - token.first);
00098 }
00099 
00100 QByteArray TokenIterator::current() const
00101 {
00102     QPair<int, int> token = m_tokens[m_currentToken - 1];
00103     //return QByteArray::fromRawData(&m_buffer[token.first], token.second - token.first);
00104     return QByteArray(&m_buffer[token.first], token.second - token.first);
00105 }
00106 
00107 QList<QByteArray> TokenIterator::all() const
00108 {
00109     QList<QByteArray> ret;
00110     for (int i = 0; i < m_tokens.count(); i++) {
00111         QPair<int, int> token = m_tokens[i];
00112         ret.append(QByteArray(&m_buffer[token.first], token.second - token.first));
00113     }
00114     return ret;
00115 }
00116 
00117 
00118 HeaderTokenizer::HeaderTokenizer(char *buffer)
00119     : m_buffer(buffer)
00120 {
00121     // add information about available headers and whether they have one or multiple,
00122     // comma-separated values.
00123 
00124     //The following response header fields are from RFC 2616 unless otherwise specified.
00125     //Hint: search the web for e.g. 'http "accept-ranges header"' to find information about
00126     //a header field.
00127     static const HeaderFieldTemplate headerFieldTemplates[] = {
00128         {"accept-ranges", false},
00129         {"age", false},
00130         {"cache-control", true},
00131         {"connection", true},
00132         {"content-disposition", false}, //is multi-valued in a way, but with ";" separator!
00133         {"content-encoding", true},
00134         {"content-language", true},
00135         {"content-length", false},
00136         {"content-location", false},
00137         {"content-md5", false},
00138         {"content-type", false},
00139         {"date", false},
00140         {"dav", true}, //RFC 2518
00141         {"etag", false},
00142         {"expires", false},
00143         {"keep-alive", true}, //RFC 2068
00144         {"last-modified", false},
00145         {"link", false}, //RFC 2068, multi-valued with ";" separator
00146         {"location", false},
00147         {"p3p", true}, // http://www.w3.org/TR/P3P/
00148         {"pragma", true},
00149         {"proxy-authenticate", false}, //complicated multi-valuedness: quoted commas don't separate
00150                                        //multiple values. we handle this at a higher level.
00151         {"proxy-connection", true}, //inofficial but well-known; to avoid misunderstandings
00152                                     //when using "connection" when talking to a proxy.
00153         {"refresh", false}, //not sure, only found some mailing list posts mentioning it
00154         {"set-cookie", false}, //RFC 2109; the multi-valuedness seems to be usually achieved
00155                                //by sending several instances of this field as opposed to
00156                                //usually comma-separated lists with maybe multiple instances.
00157         {"transfer-encoding", true},
00158         {"upgrade", true},
00159         {"warning", true},
00160         {"www-authenticate", false} //see proxy-authenticate
00161     };
00162 
00163     for (uint i = 0; i < sizeof(headerFieldTemplates) / sizeof(HeaderFieldTemplate); i++) {
00164         const HeaderFieldTemplate &ft = headerFieldTemplates[i];
00165         insert(QByteArray(ft.name), HeaderField(ft.isMultiValued));
00166     }
00167 }
00168 
00169 int HeaderTokenizer::tokenize(int begin, int end)
00170 {
00171     char *buf = m_buffer;  //keep line length in check :/
00172     int idx = begin;
00173     int startIdx = begin; //multi-purpose start of current token
00174     bool multiValuedEndedWithComma = false; //did the last multi-valued line end with a comma?
00175     QByteArray headerKey;
00176     do {
00177 
00178         if (buf[idx] == ' ' || buf [idx] == '\t') {
00179             // line continuation; preserve startIdx except (see below)
00180             if (headerKey.isEmpty()) {
00181                 continue;
00182             }
00183             // turn CR/LF into spaces for later parsing convenience
00184             int backIdx = idx - 1;
00185             while (backIdx >= begin && (buf[backIdx] == '\r' || buf[backIdx] == '\n')) {
00186                 buf[backIdx--] = ' ';
00187             }
00188 
00189             // multiple values, comma-separated: add new value or continue previous?
00190             if (operator[](headerKey).isMultiValued) {
00191                 if (multiValuedEndedWithComma) {
00192                     // start new value; this is almost like no line continuation
00193                     skipSpace(buf, &idx, end);
00194                     startIdx = idx;
00195                 } else {
00196                     // continue previous value; this is tricky. unit tests to the rescue!
00197                     if (operator[](headerKey).beginEnd.last().first == startIdx) {
00198                         // remove entry, it will be re-added because already idx != startIdx
00199                         operator[](headerKey).beginEnd.removeLast();
00200                     } else {
00201                         // no comma, no entry: the prev line was whitespace only - start new value
00202                         skipSpace(buf, &idx, end);
00203                         startIdx = idx;
00204                     }
00205                 }
00206             }
00207 
00208         } else {
00209             // new field
00210             startIdx = idx;
00211             // also make sure that there is at least one char after the colon
00212             while (idx < (end - 1) && buf[idx] != ':' && buf[idx] != '\r' && buf[idx] != '\n') {
00213                 buf[idx] = tolower(buf[idx]);
00214                 idx++;
00215             }
00216             if (buf[idx] != ':') {
00217                 //malformed line: no colon
00218                 headerKey.clear();
00219                 continue;
00220             }
00221             headerKey = QByteArray(&buf[startIdx], idx - startIdx);
00222             if (!contains(headerKey)) {
00223                 //we don't recognize this header line
00224                 headerKey.clear();
00225                 continue;
00226             }
00227             // skip colon & leading whitespace
00228             idx++;
00229             skipSpace(buf, &idx, end);
00230             startIdx = idx;
00231         }
00232 
00233         // we have the name/key of the field, now parse the value
00234         if (!operator[](headerKey).isMultiValued) {
00235 
00236             // scan to end of line
00237             while (idx < end && buf[idx] != '\r' && buf[idx] != '\n') {
00238                 idx++;
00239             }
00240             if (!operator[](headerKey).beginEnd.isEmpty()) {
00241                 // there already is an entry; are we just in a line continuation?
00242                 if (operator[](headerKey).beginEnd.last().first == startIdx) {
00243                     // line continuation: delete previous entry and later insert a new, longer one.
00244                     operator[](headerKey).beginEnd.removeLast();
00245                 }
00246             }
00247             operator[](headerKey).beginEnd.append(QPair<int, int>(startIdx, idx));
00248 
00249         } else {
00250 
00251             // comma-separated list
00252             while (true) {
00253                 //skip one value
00254                 while (idx < end && buf[idx] != '\r' && buf[idx] != '\n' && buf[idx] != ',') {
00255                     idx++;
00256                 }
00257                 if (idx != startIdx) {
00258                     operator[](headerKey).beginEnd.append(QPair<int, int>(startIdx, idx));
00259                 }
00260                 multiValuedEndedWithComma = buf[idx] == ',';
00261                 //skip comma(s) and leading whitespace, if any respectively
00262                 while (idx < end && buf[idx] == ',') {
00263                     idx++;
00264                 }
00265                 skipSpace(buf, &idx, end);
00266                 //next value or end-of-line / end of header?
00267                 if (buf[idx] >= end || buf[idx] == '\r' || buf[idx] == '\n') {
00268                     break;
00269                 }
00270                 //next value
00271                 startIdx = idx;
00272             }
00273         }
00274     } while (nextLine(buf, &idx, end));
00275     return idx;
00276 }
00277 
00278 
00279 TokenIterator HeaderTokenizer::iterator(const char *key) const
00280 {
00281     QByteArray keyBa = QByteArray::fromRawData(key, strlen(key));
00282     if (contains(keyBa)) {
00283         return TokenIterator(value(keyBa).beginEnd, m_buffer);
00284     } else {
00285         return TokenIterator(m_nullTokens, m_buffer);
00286     }
00287 }
00288 
00289 static void skipLWS(const QString &str, int &pos)
00290 {
00291     while (pos < str.length() && (str[pos] == QLatin1Char(' ') || str[pos] == QLatin1Char('\t'))) {
00292         ++pos;
00293     }
00294 }
00295 
00296 // keep the common ending, this allows the compiler to join them
00297 static const char typeSpecials[] =  "{}*'%()<>@,;:\\\"/[]?=";
00298 static const char attrSpecials[] =     "'%()<>@,;:\\\"/[]?=";
00299 static const char valueSpecials[] =      "()<>@,;:\\\"/[]?=";
00300 
00301 static bool specialChar(const QChar &ch, const char *specials)
00302 {
00303     // WORKAROUND: According to RFC 2616, any character other than ascii
00304     // characters should NOT be allowed in unquoted content-disposition file
00305     // names. However, since none of the major browsers follow this rule, we do
00306     // the same thing here and allow all printable unicode characters. See
00307     // https://bugs.kde.org/show_bug.cgi?id=261223 for the detials.
00308     if (!ch.isPrint()) {
00309         return true;
00310     }
00311 
00312     for (int i = qstrlen(specials) - 1; i >= 0; i--) {
00313         if (ch == QLatin1Char(specials[i])) {
00314             return true;
00315         }
00316     }
00317 
00318     return false;
00319 }
00320 
00336 static QString extractUntil(const QString &str, QChar term, int &pos, const char *specials)
00337 {
00338     QString out;
00339     skipLWS(str, pos);
00340     bool valid = true;
00341 
00342     while (pos < str.length() && (str[pos] != term)) {
00343         out += str[pos];
00344         valid = (valid && !specialChar(str[pos], specials));
00345         ++pos;
00346     }
00347 
00348     if (pos < str.length()) { // Stopped due to finding term
00349         ++pos;
00350     }
00351 
00352     if (!valid) {
00353         return QString();
00354     }
00355 
00356     // Remove trailing linear whitespace...
00357     while (out.endsWith(QLatin1Char(' ')) || out.endsWith(QLatin1Char('\t'))) {
00358         out.chop(1);
00359     }
00360 
00361     if (out.contains(QLatin1Char(' '))) {
00362         out.clear();
00363     }
00364 
00365     return out;
00366 }
00367 
00368 // As above, but also handles quotes..
00369 // pos is set to -1 on parse error
00370 static QString extractMaybeQuotedUntil(const QString &str, int &pos)
00371 {
00372     const QChar term = QLatin1Char(';');
00373 
00374     skipLWS(str, pos);
00375 
00376     // Are we quoted?
00377     if (pos < str.length() && str[pos] == QLatin1Char('"')) {
00378         QString out;
00379 
00380         // Skip the quote...
00381         ++pos;
00382 
00383         // when quoted we also need an end-quote
00384         bool endquote = false;
00385 
00386         // Parse until trailing quote...
00387         while (pos < str.length()) {
00388             if (str[pos] == QLatin1Char('\\') && pos + 1 < str.length()) {
00389                 // quoted-pair = "\" CHAR
00390                 out += str[pos + 1];
00391                 pos += 2; // Skip both...
00392             } else if (str[pos] == QLatin1Char('"')) {
00393                 ++pos;
00394                 endquote = true;
00395                 break;
00396             } else if (!str[pos].isPrint()) { // Don't allow CTL's RFC 2616 sec 2.2
00397                 break;
00398             } else {
00399                 out += str[pos];
00400                 ++pos;
00401             }
00402         }
00403 
00404         if (!endquote) {
00405             pos = -1;
00406             return QString();
00407         }
00408 
00409         // Skip until term..
00410         while (pos < str.length() && (str[pos] != term)) {
00411             if ((str[pos] != QLatin1Char(' ')) && (str[pos] != QLatin1Char('\t'))) {
00412                 pos = -1;
00413                 return QString();
00414             }
00415             ++pos;
00416         }
00417 
00418         if (pos < str.length()) {  // Stopped due to finding term
00419             ++pos;
00420         }
00421 
00422         return out;
00423     } else {
00424         return extractUntil(str, term, pos, valueSpecials);
00425     }
00426 }
00427 
00428 static QMap<QString, QString> contentDispositionParserInternal(const QString &disposition)
00429 {
00430     kDebug(7113) << "disposition: " << disposition;
00431     int pos = 0;
00432     const QString strDisposition = extractUntil(disposition, QLatin1Char(';'), pos, typeSpecials).toLower();
00433 
00434     QMap<QString, QString> parameters;
00435     QMap<QString, QString> contparams;   // all parameters that contain continuations
00436     QMap<QString, QString> encparams;    // all parameters that have character encoding
00437 
00438     // the type is invalid, the complete header is junk
00439     if (strDisposition.isEmpty()) {
00440         return parameters;
00441     }
00442 
00443     parameters.insert(QLatin1String("type"), strDisposition);
00444 
00445     while (pos < disposition.length()) {
00446         QString key = extractUntil(disposition, QLatin1Char('='), pos, attrSpecials).toLower();
00447 
00448         if (key.isEmpty()) {
00449             // parse error in this key: do not parse more, but add up
00450             // everything we already got
00451             kDebug(7113) << "parse error in key, abort parsing";
00452             break;
00453         }
00454 
00455         QString val;
00456         if (key.endsWith(QLatin1Char('*'))) {
00457             val = extractUntil(disposition, QLatin1Char(';'), pos, valueSpecials);
00458         } else {
00459             val = extractMaybeQuotedUntil(disposition, pos);
00460         }
00461 
00462         if (val.isEmpty()) {
00463             if (pos == -1) {
00464                 kDebug(7113) << "parse error in value, abort parsing";
00465                 break;
00466             }
00467             continue;
00468         }
00469 
00470         const int spos = key.indexOf(QLatin1Char('*'));
00471         if (spos == key.length() - 1) {
00472             key.chop(1);
00473             encparams.insert(key, val);
00474         } else if (spos >= 0) {
00475             contparams.insert(key, val);
00476         } else if (parameters.contains(key)) {
00477             kDebug(7113) << "duplicate key" << key << "found, ignoring everything more";
00478             parameters.remove(key);
00479             return parameters;
00480         } else {
00481             parameters.insert(key, val);
00482         }
00483     }
00484 
00485     QMap<QString, QString>::iterator i = contparams.begin();
00486     while (i != contparams.end()) {
00487         QString key = i.key();
00488         int spos = key.indexOf(QLatin1Char('*'));
00489         bool hasencoding = false;
00490 
00491         if (key.at(spos + 1) != QLatin1Char('0')) {
00492             ++i;
00493             continue;
00494         }
00495 
00496         // no leading zeros allowed, so delete the junk
00497         int klen = key.length();
00498         if (klen > spos + 2) {
00499             // nothing but continuations and encodings may insert * into parameter name
00500             if ((klen > spos + 3) || ((klen == spos + 3) && (key.at(spos + 2) != QLatin1Char('*')))) {
00501                 kDebug(7113) << "removing invalid key " << key << "with val" << i.value() << key.at(spos + 2);
00502                 i = contparams.erase(i);
00503                 continue;
00504             }
00505             hasencoding = true;
00506         }
00507 
00508         int seqnum = 1;
00509         QMap<QString, QString>::iterator partsi;
00510         // we do not need to care about encoding specifications: only the first
00511         // part is allowed to have one
00512         QString val = i.value();
00513 
00514         key.chop(hasencoding ? 2 : 1);
00515 
00516         while ((partsi = contparams.find(key + QString::number(seqnum))) != contparams.end()) {
00517             val += partsi.value();
00518             contparams.erase(partsi);
00519         }
00520 
00521         i = contparams.erase(i);
00522 
00523         key.chop(1);
00524         if (hasencoding) {
00525             encparams.insert(key, val);
00526         } else {
00527             if (parameters.contains(key)) {
00528                 kDebug(7113) << "duplicate key" << key << "found, ignoring everything more";
00529                 parameters.remove(key);
00530                 return parameters;
00531             }
00532 
00533             parameters.insert(key, val);
00534         }
00535     }
00536 
00537     for (QMap<QString, QString>::iterator i = encparams.begin(); i != encparams.end(); ++i) {
00538         QString val = i.value();
00539 
00540         // RfC 2231 encoded character set in filename
00541         int spos = val.indexOf(QLatin1Char('\''));
00542         if (spos == -1) {
00543             continue;
00544         }
00545         int npos = val.indexOf(QLatin1Char('\''), spos + 1);
00546         if (npos == -1) {
00547             continue;
00548         }
00549 
00550         const QString charset = val.left(spos);
00551         const QString lang = val.mid(spos + 1, npos - spos - 1);
00552         const QByteArray encodedVal = val.mid(npos + 1).toAscii();
00553 
00554         if ( ! isValidPercentEncoding(encodedVal) )
00555             continue;
00556 
00557         const QByteArray rawval = QByteArray::fromPercentEncoding(encodedVal);
00558 
00559         if (charset.isEmpty() || (charset == QLatin1String("us-ascii"))) {
00560             bool valid = true;
00561             for (int j = rawval.length() - 1; (j >= 0) && valid; j--) {
00562                 valid = (rawval.at(j) >= 32);
00563             }
00564 
00565             if (!valid)
00566                 continue;
00567             val = QString::fromAscii(rawval.constData());
00568         } else {
00569             QTextCodec *codec = QTextCodec::codecForName(charset.toAscii());
00570             if (!codec)
00571                 continue;
00572             val = codec->toUnicode(rawval);
00573         }
00574 
00575         parameters.insert(i.key(), val);
00576     }
00577 
00578     return parameters;
00579 }
00580 
00581 static QMap<QString, QString> contentDispositionParser(const QString &disposition)
00582 {
00583     QMap<QString, QString> parameters = contentDispositionParserInternal(disposition);
00584 
00585     const QLatin1String fn("filename");
00586     if (parameters.contains(fn)) {
00587         // Content-Disposition is not allowed to dictate directory
00588         // path, thus we extract the filename only.
00589         const QString val = QDir::toNativeSeparators(parameters[fn]);
00590         int slpos = val.lastIndexOf(QDir::separator());
00591 
00592         if (slpos > -1) {
00593             parameters.insert(fn, val.mid(slpos + 1));
00594         }
00595     }
00596 
00597     return parameters;
00598 }
This file is part of the KDE documentation.
Documentation copyright © 1996-2012 The KDE developers.
Generated on Thu May 10 2012 20:57:54 by doxygen 1.8.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KIOSlave

Skip menu "KIOSlave"
  • Main Page
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Related Pages

kdelibs-4.8.3 API Reference

Skip menu "kdelibs-4.8.3 API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal