From 668022ed649d9d347d993a346654853fd2bf33b9 Mon Sep 17 00:00:00 2001 From: nowrep Date: Mon, 4 Feb 2013 22:48:34 +0100 Subject: [PATCH] Improved parsing of Content-Disposition header. It now tries to detect whether UTF-8 encoding is used. Closes #745 --- CHANGELOG | 1 + src/lib/downloads/downloadfilehelper.cpp | 155 ++++++++++++++++++++--- src/lib/downloads/downloadfilehelper.h | 4 +- tests/autotests/autotests.pro | 6 +- tests/autotests/downloadstest.cpp | 70 ++++++++++ tests/autotests/downloadstest.h | 33 +++++ tests/autotests/main.cpp | 4 + 7 files changed, 250 insertions(+), 23 deletions(-) create mode 100644 tests/autotests/downloadstest.cpp create mode 100644 tests/autotests/downloadstest.h diff --git a/CHANGELOG b/CHANGELOG index afa32791c..f2e529cde 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -15,6 +15,7 @@ Version 1.4.0 * use .qupzilla/tmp instead of /tmp for temporary data * saving passwords should now work for much more sites * don't steal Ctrl+B/U/I shortcuts from page + * fixed parsing UTF-8 filenames in Content-Disposition header * fixed crash with context menu in websearchbar and locationbar * fixed loading NYTimes skimmer page * fixed cookie domain handling according to RFC 6265 diff --git a/src/lib/downloads/downloadfilehelper.cpp b/src/lib/downloads/downloadfilehelper.cpp index 8765399f1..34000be48 100644 --- a/src/lib/downloads/downloadfilehelper.cpp +++ b/src/lib/downloads/downloadfilehelper.cpp @@ -111,6 +111,140 @@ void DownloadFileHelper::handleUnsupportedContent(QNetworkReply* reply, const Do } } +// http://stackoverflow.com/questions/1031645/how-to-detect-utf-8-in-plain-c +static bool isUtf8(const char* string) +{ + if (!string) { + return 0; + } + + const unsigned char* bytes = (const unsigned char*)string; + while (*bytes) { + if ((// ASCII + bytes[0] == 0x09 || + bytes[0] == 0x0A || + bytes[0] == 0x0D || + (0x20 <= bytes[0] && bytes[0] <= 0x7F) + ) + ) { + bytes += 1; + continue; + } + + if ((// non-overlong 2-byte + (0xC2 <= bytes[0] && bytes[0] <= 0xDF) && + (0x80 <= bytes[1] && bytes[1] <= 0xBF) + ) + ) { + bytes += 2; + continue; + } + + if ((// excluding overlongs + bytes[0] == 0xE0 && + (0xA0 <= bytes[1] && bytes[1] <= 0xBF) && + (0x80 <= bytes[2] && bytes[2] <= 0xBF) + ) || + (// straight 3-byte + ((0xE1 <= bytes[0] && bytes[0] <= 0xEC) || + bytes[0] == 0xEE || + bytes[0] == 0xEF) && + (0x80 <= bytes[1] && bytes[1] <= 0xBF) && + (0x80 <= bytes[2] && bytes[2] <= 0xBF) + ) || + (// excluding surrogates + bytes[0] == 0xED && + (0x80 <= bytes[1] && bytes[1] <= 0x9F) && + (0x80 <= bytes[2] && bytes[2] <= 0xBF) + ) + ) { + bytes += 3; + continue; + } + + if ((// planes 1-3 + bytes[0] == 0xF0 && + (0x90 <= bytes[1] && bytes[1] <= 0xBF) && + (0x80 <= bytes[2] && bytes[2] <= 0xBF) && + (0x80 <= bytes[3] && bytes[3] <= 0xBF) + ) || + (// planes 4-15 + (0xF1 <= bytes[0] && bytes[0] <= 0xF3) && + (0x80 <= bytes[1] && bytes[1] <= 0xBF) && + (0x80 <= bytes[2] && bytes[2] <= 0xBF) && + (0x80 <= bytes[3] && bytes[3] <= 0xBF) + ) || + (// plane 16 + bytes[0] == 0xF4 && + (0x80 <= bytes[1] && bytes[1] <= 0x8F) && + (0x80 <= bytes[2] && bytes[2] <= 0xBF) && + (0x80 <= bytes[3] && bytes[3] <= 0xBF) + ) + ) { + bytes += 4; + continue; + } + + return false; + } + + return true; +} + +QString DownloadFileHelper::parseContentDisposition(const QByteArray &header) +{ + QString path; + + if (header.isEmpty()) { + return path; + } + + QString value; + + if (isUtf8(header.constData())) { + value = QString::fromUtf8(header); + } + else { + value = QString::fromLatin1(header); + } + + // We try to use UTF-8 encoded filename first if present + if (value.contains(QRegExp("[ ;]{1,}filename*\\*\\s*=\\s*UTF-8''", Qt::CaseInsensitive))) { + QRegExp reg("filename\\s*\\*\\s*=\\s*UTF-8''([^;]*)", Qt::CaseInsensitive); + reg.indexIn(value); + path = QUrl::fromPercentEncoding(reg.cap(1).toUtf8()).trimmed(); + } + else if (value.contains(QRegExp("[ ;]{1,}filename\\s*=", Qt::CaseInsensitive))) { + QRegExp reg("[ ;]{1,}filename\\s*=(.*)", Qt::CaseInsensitive); + reg.indexIn(value); + path = reg.cap(1).trimmed(); + + // Parse filename in quotes (to support semicolon inside filename) + if (path.startsWith(QLatin1Char('"')) && path.count(QLatin1Char('"')) > 1) { + int pos = path.indexOf(QLatin1Char('"'), 1); + while (pos != -1) { + if (path[pos - 1] != QLatin1Char('\\')) { + // We also need to strip starting quote + path = path.left(pos).mid(1); + break; + } + pos = path.indexOf(QLatin1Char('"'), pos + 1); + } + } + else { + QRegExp reg("([^;]*)", Qt::CaseInsensitive); + reg.indexIn(path); + path = reg.cap(1).trimmed(); + } + + if (path.startsWith(QLatin1Char('"')) && path.endsWith(QLatin1Char('"'))) { + path = path.mid(1, path.length() - 2); + } + } + + return path; +} + void DownloadFileHelper::optionsDialogAccepted(int finish) { bool forceChoosingPath = false; @@ -248,26 +382,7 @@ void DownloadFileHelper::fileNameChoosed(const QString &name, bool fileNameAutoG QString DownloadFileHelper::getFileName(QNetworkReply* reply) { - QString path; - if (reply->hasRawHeader("Content-Disposition")) { - QString value = QString::fromLatin1(reply->rawHeader("Content-Disposition")); - - // We try to use UTF-8 encoded filename first if present - if (value.contains(QRegExp("filename\\s*\\*\\s*=\\s*UTF-8", Qt::CaseInsensitive))) { - QRegExp reg("filename\\s*\\*\\s*=\\s*UTF-8''([^;]*)", Qt::CaseInsensitive); - reg.indexIn(value); - path = QUrl::fromPercentEncoding(reg.cap(1).toUtf8()).trimmed(); - } - else if (value.contains(QRegExp("filename\\s*=", Qt::CaseInsensitive))) { - QRegExp reg("filename\\s*=([^;]*)", Qt::CaseInsensitive); - reg.indexIn(value); - path = reg.cap(1).trimmed(); - - if (path.startsWith(QLatin1Char('"')) && path.endsWith(QLatin1Char('"'))) { - path = path.mid(1, path.length() - 2); - } - } - } + QString path = parseContentDisposition(reply->rawHeader("Content-Disposition")); if (path.isEmpty()) { path = reply->url().path(); diff --git a/src/lib/downloads/downloadfilehelper.h b/src/lib/downloads/downloadfilehelper.h index 3075d68c5..92c4682cd 100644 --- a/src/lib/downloads/downloadfilehelper.h +++ b/src/lib/downloads/downloadfilehelper.h @@ -1,6 +1,6 @@ /* ============================================================ * QupZilla - WebKit based browser -* Copyright (C) 2010-2012 David Rosca +* Copyright (C) 2010-2013 David Rosca * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -45,6 +45,8 @@ public: void handleUnsupportedContent(QNetworkReply* reply, const DownloadManager::DownloadInfo &info); + static QString parseContentDisposition(const QByteArray &header); + signals: void itemCreated(QListWidgetItem* item, DownloadItem* downItem); diff --git a/tests/autotests/autotests.pro b/tests/autotests/autotests.pro index fc493fc61..2e9961aa4 100644 --- a/tests/autotests/autotests.pro +++ b/tests/autotests/autotests.pro @@ -48,10 +48,12 @@ INCLUDEPATH += $$PWD/../../src/lib/3rdparty\ HEADERS += \ qztoolstest.h \ formcompletertest.h \ - cookiestest.h + cookiestest.h \ + downloadstest.h SOURCES += \ qztoolstest.cpp \ main.cpp \ formcompletertest.cpp \ - cookiestest.cpp + cookiestest.cpp \ + downloadstest.cpp diff --git a/tests/autotests/downloadstest.cpp b/tests/autotests/downloadstest.cpp new file mode 100644 index 000000000..601792ec1 --- /dev/null +++ b/tests/autotests/downloadstest.cpp @@ -0,0 +1,70 @@ +/* ============================================================ +* QupZilla - WebKit based browser +* Copyright (C) 2013 David Rosca +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +* ============================================================ */ +#include "downloadstest.h" +#include "downloadfilehelper.h" + +#include +#include + +void DownloadsTest::parseContentDispositionTest_data() +{ + QTest::addColumn("header"); + QTest::addColumn("result"); + + QTest::newRow("filename") << QByteArray("attachment; filename=\"foo.html\"") << "foo.html"; + QTest::newRow("filename25") << QByteArray("attachment; filename=\"0000000000111111111122222\"") << "0000000000111111111122222"; + QTest::newRow("filename35") << QByteArray("attachment; filename=\"00000000001111111111222222222233333\"") << "00000000001111111111222222222233333"; + QTest::newRow("semicolon") << QByteArray("attachment; filename=\"Here's a semicolon;.html\"") << "Here's a semicolon;.html"; + QTest::newRow("semicolon2") << QByteArray("attachment; filename=\"Here's a semi\\\"colon;.html\"") << "Here's a semi\\\"colon;.html"; + QTest::newRow("semicolon3") << QByteArray("attachment; filename=\"Here's a\\\" semi\\\"colon;.html\"") << "Here's a\\\" semi\\\"colon;.html"; + QTest::newRow("invalidParameter") << QByteArray("attachment; foo=\"bar\"; filename=\"foo.html\"") << "foo.html"; + QTest::newRow("filenameUpper") << QByteArray("attachment; FILENAME=\"foo.html\"") << "foo.html"; + QTest::newRow("noQuotes") << QByteArray("attachment; filename=foo.html") << "foo.html"; + QTest::newRow("singleQuotesFileame") << QByteArray("attachment; filename='foo.bar'") << "'foo.bar'"; + QTest::newRow("filenamePlain") << QByteArray("attachment; filename=\"foo-ä.html\"") << QString::fromUtf8("foo-ä.html"); + QTest::newRow("percent") << QByteArray("attachment; filename=\"foo-%41.html\"") << "foo-%41.html"; + QTest::newRow("percent2") << QByteArray("attachment; filename=\"foo-%c3%a4-%e2%82%ac.html\"") << "foo-%c3%a4-%e2%82%ac.html"; + QTest::newRow("withSpace") << QByteArray("attachment; filename =\"foo.html\"") << "foo.html"; + QTest::newRow("filenameInside") << QByteArray("attachment; example=\"filename=example.txt\"") << ""; + QTest::newRow("xfilename") << QByteArray("attachment; xfilename=\"example.txt\"") << ""; + QTest::newRow("withSpaceBefore") << QByteArray("attachment; filename *=UTF-8''foo-%c3%a4.html") << ""; + QTest::newRow("withSpaceAfter") << QByteArray("attachment; filename*= UTF-8''foo-%c3%a4.html") << QString::fromUtf8("foo-ä.html"); + QTest::newRow("withSpaceInside") << QByteArray("attachment; filename* =UTF-8''foo-%c3%a4.html") << QString::fromUtf8("foo-ä.html"); + QTest::newRow("withDoubleQuotes") << QByteArray("attachment; filename*=\"UTF-8''foo-%c3%a4.html\"") << ""; + QTest::newRow("multiTypes") << QByteArray("attachment; filename*=UTF-8''foo-%c3%a4.html; filename=\"foo-ae.html\"") << QString::fromUtf8("foo-ä.html"); + + // Ignored, but passing in browser + // QTest::newRow("filenameUtf8") << QByteArray("attachment; filename=\"foo-ä.html\"") << QString::fromUtf8("foo-ä.html"); + // QTest::newRow("*utf8") << QByteArray("attachment; filename*=UTF-8''foo-%c3%a4-%e2%82%ac.html") << QString::fromUtf8("foo-ä-€.html"); + // QTest::newRow("rfc2231") << QByteArray("attachment; filename*=UTF-8''foo-a%cc%88.html") << QString::fromUtf8("foo-ä.html"); + + // ISO-8859-1 decoding not supported + // QTest::newRow("*iso") << QByteArray("attachment; filename*=iso-8859-1''foo-%E4.html") << QString::fromUtf8("foo-ä.html"); + // QTest::newRow("multiTypes2") << QByteArray("attachment; filename*=ISO-8859-1''currency-sign%3d%a4; filename=\"foo-ae.html\"") << QString::fromUtf8("currency-sign=¤"); + + // Not yet supported + // QTest::newRow("multiType2") << QByteArray("attachment; filename*0*=ISO-8859-15''euro-sign%3d%a4; filename*=ISO-8859-1''currency-sign%3d%a4") << QString::fromUtf8("euro-sign=€"); +} + +void DownloadsTest::parseContentDispositionTest() +{ + QFETCH(QByteArray, header); + QFETCH(QString, result); + + QCOMPARE(DownloadFileHelper::parseContentDisposition(header), result); +} diff --git a/tests/autotests/downloadstest.h b/tests/autotests/downloadstest.h new file mode 100644 index 000000000..6b1ee33b3 --- /dev/null +++ b/tests/autotests/downloadstest.h @@ -0,0 +1,33 @@ +/* ============================================================ +* QupZilla - WebKit based browser +* Copyright (C) 2013 David Rosca +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +* ============================================================ */ +#ifndef DOWNLOADSTEST_H +#define DOWNLOADSTEST_H + +#include + +class DownloadsTest : public QObject +{ + Q_OBJECT + +private slots: + void parseContentDispositionTest_data(); + void parseContentDispositionTest(); + +}; + +#endif // DOWNLOADSTEST_H diff --git a/tests/autotests/main.cpp b/tests/autotests/main.cpp index 489a9d8af..42829f56e 100644 --- a/tests/autotests/main.cpp +++ b/tests/autotests/main.cpp @@ -18,6 +18,7 @@ #include "qztoolstest.h" #include "formcompletertest.h" #include "cookiestest.h" +#include "downloadstest.h" #include @@ -35,5 +36,8 @@ int main(int argc, char *argv[]) CookiesTest cookiesTest; QTest::qExec(&cookiesTest, argc, argv); + DownloadsTest downloadsTest; + QTest::qExec(&downloadsTest, argc, argv); + return 0; }