• Skip to content
  • Skip to link menu
  • KDE API Reference
  • kdelibs-4.14.38 API Reference
  • KDE Home
  • Contact Us
 

KIOSlave

  • kioslave
  • http
parsinghelpers.cpp
Go to the documentation of this file.
1/* This file is part of the KDE libraries
2 Copyright (C) 2008 Andreas Hartmetz <ahartmetz@gmail.com>
3 Copyright (C) 2010,2011 Rolf Eike Beer <kde@opensource.sf-tec.de>
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public
7 License as published by the Free Software Foundation; either
8 version 2 of the License, or (at your option) any later version.
9
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public License
16 along with this library; see the file COPYING.LIB. If not, write to
17 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 Boston, MA 02110-1301, USA.
19*/
20
21#include <ctype.h>
22
23#include <QDir>
24#include <QMap>
25#include <QTextCodec>
26#include <QUrl>
27
28#include <kcodecs.h>
29#include <kdebug.h>
30
31// Advance *pos beyond spaces / tabs
32static void skipSpace(const char input[], int *pos, int end)
33{
34 int idx = *pos;
35 while (idx < end && (input[idx] == ' ' || input[idx] == '\t')) {
36 idx++;
37 }
38 *pos = idx;
39 return;
40}
41
42// Advance *pos to start of next line while being forgiving about line endings.
43// Return false if the end of the header has been reached, true otherwise.
44static bool nextLine(const char input[], int *pos, int end)
45{
46 int idx = *pos;
47 while (idx < end && input[idx] != '\r' && input[idx] != '\n') {
48 idx++;
49 }
50 int rCount = 0;
51 int nCount = 0;
52 while (idx < end && qMax(rCount, nCount) < 2 && (input[idx] == '\r' || input[idx] == '\n')) {
53 input[idx] == '\r' ? rCount++ : nCount++;
54 idx++;
55 }
56 if (idx < end && qMax(rCount, nCount) == 2 && qMin(rCount, nCount) == 1) {
57 // if just one of the others is missing eat it too.
58 // this ensures that conforming headers using the proper
59 // \r\n sequence (and also \n\r) will be parsed correctly.
60 if ((rCount == 1 && input[idx] == '\r') || (nCount == 1 && input[idx] == '\n')) {
61 idx++;
62 }
63 }
64
65 *pos = idx;
66 return idx < end && rCount < 2 && nCount < 2;
67}
68
69// QByteArray::fromPercentEncoding() does not notify us about encoding errors so we need
70// to check here if this is valid at all.
71static bool isValidPercentEncoding(const QByteArray &data)
72{
73 int i = 0;
74 const int last = data.length() - 1;
75 const char *d = data.constData();
76
77 while ( (i = data.indexOf('%', i)) != -1) {
78 if ( i >= last - 2 )
79 return false;
80 if ( ! isxdigit(d[i + 1]) )
81 return false;
82 if ( ! isxdigit(d[i + 2]) )
83 return false;
84 i++;
85 }
86
87 return true;
88}
89
90QByteArray TokenIterator::next()
91{
92 QPair<int, int> token = m_tokens[m_currentToken++];
93 //fromRawData brings some speed advantage but also the requirement to keep the text buffer
94 //around. this together with implicit sharing (you don't know where copies end up)
95 //is dangerous!
96 //return QByteArray::fromRawData(&m_buffer[token.first], token.second - token.first);
97 return QByteArray(&m_buffer[token.first], token.second - token.first);
98}
99
100QByteArray TokenIterator::current() const
101{
102 QPair<int, int> token = m_tokens[m_currentToken - 1];
103 //return QByteArray::fromRawData(&m_buffer[token.first], token.second - token.first);
104 return QByteArray(&m_buffer[token.first], token.second - token.first);
105}
106
107QList<QByteArray> TokenIterator::all() const
108{
109 QList<QByteArray> ret;
110 for (int i = 0; i < m_tokens.count(); i++) {
111 QPair<int, int> token = m_tokens[i];
112 ret.append(QByteArray(&m_buffer[token.first], token.second - token.first));
113 }
114 return ret;
115}
116
117
118HeaderTokenizer::HeaderTokenizer(char *buffer)
119 : m_buffer(buffer)
120{
121 // add information about available headers and whether they have one or multiple,
122 // comma-separated values.
123
124 //The following response header fields are from RFC 2616 unless otherwise specified.
125 //Hint: search the web for e.g. 'http "accept-ranges header"' to find information about
126 //a header field.
127 static const HeaderFieldTemplate headerFieldTemplates[] = {
128 {"accept-ranges", false},
129 {"age", false},
130 {"cache-control", true},
131 {"connection", true},
132 {"content-disposition", false}, //is multi-valued in a way, but with ";" separator!
133 {"content-encoding", true},
134 {"content-language", true},
135 {"content-length", false},
136 {"content-location", false},
137 {"content-md5", false},
138 {"content-type", false},
139 {"date", false},
140 {"dav", true}, //RFC 2518
141 {"etag", false},
142 {"expires", false},
143 {"keep-alive", true}, //RFC 2068
144 {"last-modified", false},
145 {"link", false}, //RFC 2068, multi-valued with ";" separator
146 {"location", false},
147 {"p3p", true}, // http://www.w3.org/TR/P3P/
148 {"pragma", true},
149 {"proxy-authenticate", false}, //complicated multi-valuedness: quoted commas don't separate
150 //multiple values. we handle this at a higher level.
151 {"proxy-connection", true}, //inofficial but well-known; to avoid misunderstandings
152 //when using "connection" when talking to a proxy.
153 {"refresh", false}, //not sure, only found some mailing list posts mentioning it
154 {"set-cookie", false}, //RFC 2109; the multi-valuedness seems to be usually achieved
155 //by sending several instances of this field as opposed to
156 //usually comma-separated lists with maybe multiple instances.
157 {"transfer-encoding", true},
158 {"upgrade", true},
159 {"warning", true},
160 {"www-authenticate", false} //see proxy-authenticate
161 };
162
163 for (uint i = 0; i < sizeof(headerFieldTemplates) / sizeof(HeaderFieldTemplate); i++) {
164 const HeaderFieldTemplate &ft = headerFieldTemplates[i];
165 insert(QByteArray(ft.name), HeaderField(ft.isMultiValued));
166 }
167}
168
169int HeaderTokenizer::tokenize(int begin, int end)
170{
171 char *buf = m_buffer; //keep line length in check :/
172 int idx = begin;
173 int startIdx = begin; //multi-purpose start of current token
174 bool multiValuedEndedWithComma = false; //did the last multi-valued line end with a comma?
175 QByteArray headerKey;
176 do {
177
178 if (buf[idx] == ' ' || buf [idx] == '\t') {
179 // line continuation; preserve startIdx except (see below)
180 if (headerKey.isEmpty()) {
181 continue;
182 }
183 // turn CR/LF into spaces for later parsing convenience
184 int backIdx = idx - 1;
185 while (backIdx >= begin && (buf[backIdx] == '\r' || buf[backIdx] == '\n')) {
186 buf[backIdx--] = ' ';
187 }
188
189 // multiple values, comma-separated: add new value or continue previous?
190 if (operator[](headerKey).isMultiValued) {
191 if (multiValuedEndedWithComma) {
192 // start new value; this is almost like no line continuation
193 skipSpace(buf, &idx, end);
194 startIdx = idx;
195 } else {
196 // continue previous value; this is tricky. unit tests to the rescue!
197 if (operator[](headerKey).beginEnd.last().first == startIdx) {
198 // remove entry, it will be re-added because already idx != startIdx
199 operator[](headerKey).beginEnd.removeLast();
200 } else {
201 // no comma, no entry: the prev line was whitespace only - start new value
202 skipSpace(buf, &idx, end);
203 startIdx = idx;
204 }
205 }
206 }
207
208 } else {
209 // new field
210 startIdx = idx;
211 // also make sure that there is at least one char after the colon
212 while (idx < (end - 1) && buf[idx] != ':' && buf[idx] != '\r' && buf[idx] != '\n') {
213 buf[idx] = tolower(buf[idx]);
214 idx++;
215 }
216 if (buf[idx] != ':') {
217 //malformed line: no colon
218 headerKey.clear();
219 continue;
220 }
221 headerKey = QByteArray(&buf[startIdx], idx - startIdx);
222 if (!contains(headerKey)) {
223 //we don't recognize this header line
224 headerKey.clear();
225 continue;
226 }
227 // skip colon & leading whitespace
228 idx++;
229 skipSpace(buf, &idx, end);
230 startIdx = idx;
231 }
232
233 // we have the name/key of the field, now parse the value
234 if (!operator[](headerKey).isMultiValued) {
235
236 // scan to end of line
237 while (idx < end && buf[idx] != '\r' && buf[idx] != '\n') {
238 idx++;
239 }
240 if (!operator[](headerKey).beginEnd.isEmpty()) {
241 // there already is an entry; are we just in a line continuation?
242 if (operator[](headerKey).beginEnd.last().first == startIdx) {
243 // line continuation: delete previous entry and later insert a new, longer one.
244 operator[](headerKey).beginEnd.removeLast();
245 }
246 }
247 operator[](headerKey).beginEnd.append(QPair<int, int>(startIdx, idx));
248
249 } else {
250
251 // comma-separated list
252 while (true) {
253 //skip one value
254 while (idx < end && buf[idx] != '\r' && buf[idx] != '\n' && buf[idx] != ',') {
255 idx++;
256 }
257 if (idx != startIdx) {
258 operator[](headerKey).beginEnd.append(QPair<int, int>(startIdx, idx));
259 }
260 multiValuedEndedWithComma = buf[idx] == ',';
261 //skip comma(s) and leading whitespace, if any respectively
262 while (idx < end && buf[idx] == ',') {
263 idx++;
264 }
265 skipSpace(buf, &idx, end);
266 //next value or end-of-line / end of header?
267 if (buf[idx] >= end || buf[idx] == '\r' || buf[idx] == '\n') {
268 break;
269 }
270 //next value
271 startIdx = idx;
272 }
273 }
274 } while (nextLine(buf, &idx, end));
275 return idx;
276}
277
278
279TokenIterator HeaderTokenizer::iterator(const char *key) const
280{
281 QByteArray keyBa = QByteArray::fromRawData(key, strlen(key));
282 if (contains(keyBa)) {
283 return TokenIterator(value(keyBa).beginEnd, m_buffer);
284 } else {
285 return TokenIterator(m_nullTokens, m_buffer);
286 }
287}
288
289static void skipLWS(const QString &str, int &pos)
290{
291 while (pos < str.length() && (str[pos] == QLatin1Char(' ') || str[pos] == QLatin1Char('\t'))) {
292 ++pos;
293 }
294}
295
296// keep the common ending, this allows the compiler to join them
297static const char typeSpecials[] = "{}*'%()<>@,;:\\\"/[]?=";
298static const char attrSpecials[] = "'%()<>@,;:\\\"/[]?=";
299static const char valueSpecials[] = "()<>@,;:\\\"/[]?=";
300
301static bool specialChar(const QChar &ch, const char *specials)
302{
303 // WORKAROUND: According to RFC 2616, any character other than ascii
304 // characters should NOT be allowed in unquoted content-disposition file
305 // names. However, since none of the major browsers follow this rule, we do
306 // the same thing here and allow all printable unicode characters. See
307 // https://bugs.kde.org/show_bug.cgi?id=261223 for the detials.
308 if (!ch.isPrint()) {
309 return true;
310 }
311
312 for (int i = qstrlen(specials) - 1; i >= 0; i--) {
313 if (ch == QLatin1Char(specials[i])) {
314 return true;
315 }
316 }
317
318 return false;
319}
320
336static QString extractUntil(const QString &str, QChar term, int &pos, const char *specials)
337{
338 QString out;
339 skipLWS(str, pos);
340 bool valid = true;
341
342 while (pos < str.length() && (str[pos] != term)) {
343 out += str[pos];
344 valid = (valid && !specialChar(str[pos], specials));
345 ++pos;
346 }
347
348 if (pos < str.length()) { // Stopped due to finding term
349 ++pos;
350 }
351
352 if (!valid) {
353 return QString();
354 }
355
356 // Remove trailing linear whitespace...
357 while (out.endsWith(QLatin1Char(' ')) || out.endsWith(QLatin1Char('\t'))) {
358 out.chop(1);
359 }
360
361 if (out.contains(QLatin1Char(' '))) {
362 out.clear();
363 }
364
365 return out;
366}
367
368// As above, but also handles quotes..
369// pos is set to -1 on parse error
370static QString extractMaybeQuotedUntil(const QString &str, int &pos)
371{
372 const QChar term = QLatin1Char(';');
373
374 skipLWS(str, pos);
375
376 // Are we quoted?
377 if (pos < str.length() && str[pos] == QLatin1Char('"')) {
378 QString out;
379
380 // Skip the quote...
381 ++pos;
382
383 // when quoted we also need an end-quote
384 bool endquote = false;
385
386 // Parse until trailing quote...
387 while (pos < str.length()) {
388 if (str[pos] == QLatin1Char('\\') && pos + 1 < str.length()) {
389 // quoted-pair = "\" CHAR
390 out += str[pos + 1];
391 pos += 2; // Skip both...
392 } else if (str[pos] == QLatin1Char('"')) {
393 ++pos;
394 endquote = true;
395 break;
396 } else if (!str[pos].isPrint()) { // Don't allow CTL's RFC 2616 sec 2.2
397 break;
398 } else {
399 out += str[pos];
400 ++pos;
401 }
402 }
403
404 if (!endquote) {
405 pos = -1;
406 return QString();
407 }
408
409 // Skip until term..
410 while (pos < str.length() && (str[pos] != term)) {
411 if ((str[pos] != QLatin1Char(' ')) && (str[pos] != QLatin1Char('\t'))) {
412 pos = -1;
413 return QString();
414 }
415 ++pos;
416 }
417
418 if (pos < str.length()) { // Stopped due to finding term
419 ++pos;
420 }
421
422 return out;
423 } else {
424 return extractUntil(str, term, pos, valueSpecials);
425 }
426}
427
428static QMap<QString, QString> contentDispositionParserInternal(const QString &disposition)
429{
430 kDebug(7113) << "disposition: " << disposition;
431 int pos = 0;
432 const QString strDisposition = extractUntil(disposition, QLatin1Char(';'), pos, typeSpecials).toLower();
433
434 QMap<QString, QString> parameters;
435 QMap<QString, QString> contparams; // all parameters that contain continuations
436 QMap<QString, QString> encparams; // all parameters that have character encoding
437
438 // the type is invalid, the complete header is junk
439 if (strDisposition.isEmpty()) {
440 return parameters;
441 }
442
443 parameters.insert(QLatin1String("type"), strDisposition);
444
445 while (pos < disposition.length()) {
446 QString key = extractUntil(disposition, QLatin1Char('='), pos, attrSpecials).toLower();
447
448 if (key.isEmpty()) {
449 // parse error in this key: do not parse more, but add up
450 // everything we already got
451 kDebug(7113) << "parse error in key, abort parsing";
452 break;
453 }
454
455 QString val;
456 if (key.endsWith(QLatin1Char('*'))) {
457 val = extractUntil(disposition, QLatin1Char(';'), pos, valueSpecials);
458 } else {
459 val = extractMaybeQuotedUntil(disposition, pos);
460 }
461
462 if (val.isEmpty()) {
463 if (pos == -1) {
464 kDebug(7113) << "parse error in value, abort parsing";
465 break;
466 }
467 continue;
468 }
469
470 const int spos = key.indexOf(QLatin1Char('*'));
471 if (spos == key.length() - 1) {
472 key.chop(1);
473 encparams.insert(key, val);
474 } else if (spos >= 0) {
475 contparams.insert(key, val);
476 } else if (parameters.contains(key)) {
477 kDebug(7113) << "duplicate key" << key << "found, ignoring everything more";
478 parameters.remove(key);
479 return parameters;
480 } else {
481 parameters.insert(key, val);
482 }
483 }
484
485 QMap<QString, QString>::iterator i = contparams.begin();
486 while (i != contparams.end()) {
487 QString key = i.key();
488 int spos = key.indexOf(QLatin1Char('*'));
489 bool hasencoding = false;
490
491 if (key.at(spos + 1) != QLatin1Char('0')) {
492 ++i;
493 continue;
494 }
495
496 // no leading zeros allowed, so delete the junk
497 int klen = key.length();
498 if (klen > spos + 2) {
499 // nothing but continuations and encodings may insert * into parameter name
500 if ((klen > spos + 3) || ((klen == spos + 3) && (key.at(spos + 2) != QLatin1Char('*')))) {
501 kDebug(7113) << "removing invalid key " << key << "with val" << i.value() << key.at(spos + 2);
502 i = contparams.erase(i);
503 continue;
504 }
505 hasencoding = true;
506 }
507
508 int seqnum = 1;
509 QMap<QString, QString>::iterator partsi;
510 // we do not need to care about encoding specifications: only the first
511 // part is allowed to have one
512 QString val = i.value();
513
514 key.chop(hasencoding ? 2 : 1);
515
516 while ((partsi = contparams.find(key + QString::number(seqnum))) != contparams.end()) {
517 val += partsi.value();
518 contparams.erase(partsi);
519 }
520
521 i = contparams.erase(i);
522
523 key.chop(1);
524 if (hasencoding) {
525 encparams.insert(key, val);
526 } else {
527 if (parameters.contains(key)) {
528 kDebug(7113) << "duplicate key" << key << "found, ignoring everything more";
529 parameters.remove(key);
530 return parameters;
531 }
532
533 parameters.insert(key, val);
534 }
535 }
536
537 for (QMap<QString, QString>::iterator i = encparams.begin(); i != encparams.end(); ++i) {
538 QString val = i.value();
539
540 // RfC 2231 encoded character set in filename
541 int spos = val.indexOf(QLatin1Char('\''));
542 if (spos == -1) {
543 continue;
544 }
545 int npos = val.indexOf(QLatin1Char('\''), spos + 1);
546 if (npos == -1) {
547 continue;
548 }
549
550 const QString charset = val.left(spos);
551 const QString lang = val.mid(spos + 1, npos - spos - 1);
552 const QByteArray encodedVal = val.mid(npos + 1).toLatin1();
553
554 if ( ! isValidPercentEncoding(encodedVal) )
555 continue;
556
557 const QByteArray rawval = QByteArray::fromPercentEncoding(encodedVal);
558
559 if (charset.isEmpty() || (charset == QLatin1String("us-ascii"))) {
560 bool valid = true;
561 for (int j = rawval.length() - 1; (j >= 0) && valid; j--) {
562 valid = (rawval.at(j) >= 32);
563 }
564
565 if (!valid)
566 continue;
567 val = QString::fromLatin1(rawval.constData());
568 } else {
569 QTextCodec *codec = QTextCodec::codecForName(charset.toLatin1());
570 if (!codec)
571 continue;
572 val = codec->toUnicode(rawval);
573 }
574
575 parameters.insert(i.key(), val);
576 }
577
578 return parameters;
579}
580
581static QMap<QString, QString> contentDispositionParser(const QString &disposition)
582{
583 QMap<QString, QString> parameters = contentDispositionParserInternal(disposition);
584
585 const QLatin1String fn("filename");
586 if (parameters.contains(fn)) {
587 // Content-Disposition is not allowed to dictate directory
588 // path, thus we extract the filename only.
589 const QString val = QDir::toNativeSeparators(parameters[fn]);
590 int slpos = val.lastIndexOf(QDir::separator());
591
592 if (slpos > -1) {
593 parameters.insert(fn, val.mid(slpos + 1));
594 }
595 }
596
597 return parameters;
598}
HeaderTokenizer::tokenize
int tokenize(int begin, int end)
Definition: parsinghelpers.cpp:169
HeaderTokenizer::HeaderTokenizer
HeaderTokenizer(char *buffer)
Definition: parsinghelpers.cpp:118
QList
QMap
QPair
TokenIterator
Definition: parsinghelpers.h:40
TokenIterator::current
QByteArray current() const
Definition: parsinghelpers.cpp:100
TokenIterator::next
QByteArray next()
Definition: parsinghelpers.cpp:90
TokenIterator::all
QList< QByteArray > all() const
Definition: parsinghelpers.cpp:107
isxdigit
#define isxdigit(c)
kDebug
#define kDebug
kcodecs.h
kdebug.h
insert
const KShortcut & insert()
begin
const KShortcut & begin()
end
const KShortcut & end()
isValidPercentEncoding
static bool isValidPercentEncoding(const QByteArray &data)
Definition: parsinghelpers.cpp:71
skipSpace
static void skipSpace(const char input[], int *pos, int end)
Definition: parsinghelpers.cpp:32
skipLWS
static void skipLWS(const QString &str, int &pos)
Definition: parsinghelpers.cpp:289
attrSpecials
static const char attrSpecials[]
Definition: parsinghelpers.cpp:298
specialChar
static bool specialChar(const QChar &ch, const char *specials)
Definition: parsinghelpers.cpp:301
extractMaybeQuotedUntil
static QString extractMaybeQuotedUntil(const QString &str, int &pos)
Definition: parsinghelpers.cpp:370
typeSpecials
static const char typeSpecials[]
Definition: parsinghelpers.cpp:297
valueSpecials
static const char valueSpecials[]
Definition: parsinghelpers.cpp:299
contentDispositionParserInternal
static QMap< QString, QString > contentDispositionParserInternal(const QString &disposition)
Definition: parsinghelpers.cpp:428
contentDispositionParser
static QMap< QString, QString > contentDispositionParser(const QString &disposition)
Definition: parsinghelpers.cpp:581
nextLine
static bool nextLine(const char input[], int *pos, int end)
Definition: parsinghelpers.cpp:44
extractUntil
static QString extractUntil(const QString &str, QChar term, int &pos, const char *specials)
read and parse the input until the given terminator
Definition: parsinghelpers.cpp:336
HeaderField
Definition: parsinghelpers.h:27
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Mon Feb 20 2023 00:00:00 by doxygen 1.9.6 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KIOSlave

Skip menu "KIOSlave"
  • Main Page
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Related Pages

kdelibs-4.14.38 API Reference

Skip menu "kdelibs-4.14.38 API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal