1
mirror of https://invent.kde.org/network/falkon.git synced 2024-12-19 10:16:34 +01:00

Improved parsing of Content-Disposition header.

It now tries to detect whether UTF-8 encoding is used.
Closes #745
This commit is contained in:
nowrep 2013-02-04 22:48:34 +01:00
parent 37a81cdf30
commit 668022ed64
7 changed files with 250 additions and 23 deletions

View File

@ -15,6 +15,7 @@ Version 1.4.0
* use .qupzilla/tmp instead of /tmp for temporary data
* saving passwords should now work for much more sites
* don't steal Ctrl+B/U/I shortcuts from page
* fixed parsing UTF-8 filenames in Content-Disposition header
* fixed crash with context menu in websearchbar and locationbar
* fixed loading NYTimes skimmer page
* fixed cookie domain handling according to RFC 6265

View File

@ -111,6 +111,140 @@ void DownloadFileHelper::handleUnsupportedContent(QNetworkReply* reply, const Do
}
}
// http://stackoverflow.com/questions/1031645/how-to-detect-utf-8-in-plain-c
static bool isUtf8(const char* string)
{
if (!string) {
return 0;
}
const unsigned char* bytes = (const unsigned char*)string;
while (*bytes) {
if ((// ASCII
bytes[0] == 0x09 ||
bytes[0] == 0x0A ||
bytes[0] == 0x0D ||
(0x20 <= bytes[0] && bytes[0] <= 0x7F)
)
) {
bytes += 1;
continue;
}
if ((// non-overlong 2-byte
(0xC2 <= bytes[0] && bytes[0] <= 0xDF) &&
(0x80 <= bytes[1] && bytes[1] <= 0xBF)
)
) {
bytes += 2;
continue;
}
if ((// excluding overlongs
bytes[0] == 0xE0 &&
(0xA0 <= bytes[1] && bytes[1] <= 0xBF) &&
(0x80 <= bytes[2] && bytes[2] <= 0xBF)
) ||
(// straight 3-byte
((0xE1 <= bytes[0] && bytes[0] <= 0xEC) ||
bytes[0] == 0xEE ||
bytes[0] == 0xEF) &&
(0x80 <= bytes[1] && bytes[1] <= 0xBF) &&
(0x80 <= bytes[2] && bytes[2] <= 0xBF)
) ||
(// excluding surrogates
bytes[0] == 0xED &&
(0x80 <= bytes[1] && bytes[1] <= 0x9F) &&
(0x80 <= bytes[2] && bytes[2] <= 0xBF)
)
) {
bytes += 3;
continue;
}
if ((// planes 1-3
bytes[0] == 0xF0 &&
(0x90 <= bytes[1] && bytes[1] <= 0xBF) &&
(0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
(0x80 <= bytes[3] && bytes[3] <= 0xBF)
) ||
(// planes 4-15
(0xF1 <= bytes[0] && bytes[0] <= 0xF3) &&
(0x80 <= bytes[1] && bytes[1] <= 0xBF) &&
(0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
(0x80 <= bytes[3] && bytes[3] <= 0xBF)
) ||
(// plane 16
bytes[0] == 0xF4 &&
(0x80 <= bytes[1] && bytes[1] <= 0x8F) &&
(0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
(0x80 <= bytes[3] && bytes[3] <= 0xBF)
)
) {
bytes += 4;
continue;
}
return false;
}
return true;
}
QString DownloadFileHelper::parseContentDisposition(const QByteArray &header)
{
QString path;
if (header.isEmpty()) {
return path;
}
QString value;
if (isUtf8(header.constData())) {
value = QString::fromUtf8(header);
}
else {
value = QString::fromLatin1(header);
}
// We try to use UTF-8 encoded filename first if present
if (value.contains(QRegExp("[ ;]{1,}filename*\\*\\s*=\\s*UTF-8''", Qt::CaseInsensitive))) {
QRegExp reg("filename\\s*\\*\\s*=\\s*UTF-8''([^;]*)", Qt::CaseInsensitive);
reg.indexIn(value);
path = QUrl::fromPercentEncoding(reg.cap(1).toUtf8()).trimmed();
}
else if (value.contains(QRegExp("[ ;]{1,}filename\\s*=", Qt::CaseInsensitive))) {
QRegExp reg("[ ;]{1,}filename\\s*=(.*)", Qt::CaseInsensitive);
reg.indexIn(value);
path = reg.cap(1).trimmed();
// Parse filename in quotes (to support semicolon inside filename)
if (path.startsWith(QLatin1Char('"')) && path.count(QLatin1Char('"')) > 1) {
int pos = path.indexOf(QLatin1Char('"'), 1);
while (pos != -1) {
if (path[pos - 1] != QLatin1Char('\\')) {
// We also need to strip starting quote
path = path.left(pos).mid(1);
break;
}
pos = path.indexOf(QLatin1Char('"'), pos + 1);
}
}
else {
QRegExp reg("([^;]*)", Qt::CaseInsensitive);
reg.indexIn(path);
path = reg.cap(1).trimmed();
}
if (path.startsWith(QLatin1Char('"')) && path.endsWith(QLatin1Char('"'))) {
path = path.mid(1, path.length() - 2);
}
}
return path;
}
void DownloadFileHelper::optionsDialogAccepted(int finish)
{
bool forceChoosingPath = false;
@ -248,26 +382,7 @@ void DownloadFileHelper::fileNameChoosed(const QString &name, bool fileNameAutoG
QString DownloadFileHelper::getFileName(QNetworkReply* reply)
{
QString path;
if (reply->hasRawHeader("Content-Disposition")) {
QString value = QString::fromLatin1(reply->rawHeader("Content-Disposition"));
// We try to use UTF-8 encoded filename first if present
if (value.contains(QRegExp("filename\\s*\\*\\s*=\\s*UTF-8", Qt::CaseInsensitive))) {
QRegExp reg("filename\\s*\\*\\s*=\\s*UTF-8''([^;]*)", Qt::CaseInsensitive);
reg.indexIn(value);
path = QUrl::fromPercentEncoding(reg.cap(1).toUtf8()).trimmed();
}
else if (value.contains(QRegExp("filename\\s*=", Qt::CaseInsensitive))) {
QRegExp reg("filename\\s*=([^;]*)", Qt::CaseInsensitive);
reg.indexIn(value);
path = reg.cap(1).trimmed();
if (path.startsWith(QLatin1Char('"')) && path.endsWith(QLatin1Char('"'))) {
path = path.mid(1, path.length() - 2);
}
}
}
QString path = parseContentDisposition(reply->rawHeader("Content-Disposition"));
if (path.isEmpty()) {
path = reply->url().path();

View File

@ -1,6 +1,6 @@
/* ============================================================
* QupZilla - WebKit based browser
* Copyright (C) 2010-2012 David Rosca <nowrep@gmail.com>
* Copyright (C) 2010-2013 David Rosca <nowrep@gmail.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -45,6 +45,8 @@ public:
void handleUnsupportedContent(QNetworkReply* reply, const DownloadManager::DownloadInfo &info);
static QString parseContentDisposition(const QByteArray &header);
signals:
void itemCreated(QListWidgetItem* item, DownloadItem* downItem);

View File

@ -48,10 +48,12 @@ INCLUDEPATH += $$PWD/../../src/lib/3rdparty\
HEADERS += \
qztoolstest.h \
formcompletertest.h \
cookiestest.h
cookiestest.h \
downloadstest.h
SOURCES += \
qztoolstest.cpp \
main.cpp \
formcompletertest.cpp \
cookiestest.cpp
cookiestest.cpp \
downloadstest.cpp

View File

@ -0,0 +1,70 @@
/* ============================================================
* QupZilla - WebKit based browser
* Copyright (C) 2013 David Rosca <nowrep@gmail.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* ============================================================ */
#include "downloadstest.h"
#include "downloadfilehelper.h"
#include <QtTest/QtTest>
#include <QNetworkReply>
void DownloadsTest::parseContentDispositionTest_data()
{
QTest::addColumn<QByteArray>("header");
QTest::addColumn<QString>("result");
QTest::newRow("filename") << QByteArray("attachment; filename=\"foo.html\"") << "foo.html";
QTest::newRow("filename25") << QByteArray("attachment; filename=\"0000000000111111111122222\"") << "0000000000111111111122222";
QTest::newRow("filename35") << QByteArray("attachment; filename=\"00000000001111111111222222222233333\"") << "00000000001111111111222222222233333";
QTest::newRow("semicolon") << QByteArray("attachment; filename=\"Here's a semicolon;.html\"") << "Here's a semicolon;.html";
QTest::newRow("semicolon2") << QByteArray("attachment; filename=\"Here's a semi\\\"colon;.html\"") << "Here's a semi\\\"colon;.html";
QTest::newRow("semicolon3") << QByteArray("attachment; filename=\"Here's a\\\" semi\\\"colon;.html\"") << "Here's a\\\" semi\\\"colon;.html";
QTest::newRow("invalidParameter") << QByteArray("attachment; foo=\"bar\"; filename=\"foo.html\"") << "foo.html";
QTest::newRow("filenameUpper") << QByteArray("attachment; FILENAME=\"foo.html\"") << "foo.html";
QTest::newRow("noQuotes") << QByteArray("attachment; filename=foo.html") << "foo.html";
QTest::newRow("singleQuotesFileame") << QByteArray("attachment; filename='foo.bar'") << "'foo.bar'";
QTest::newRow("filenamePlain") << QByteArray("attachment; filename=\"foo-ä.html\"") << QString::fromUtf8("foo-ä.html");
QTest::newRow("percent") << QByteArray("attachment; filename=\"foo-%41.html\"") << "foo-%41.html";
QTest::newRow("percent2") << QByteArray("attachment; filename=\"foo-%c3%a4-%e2%82%ac.html\"") << "foo-%c3%a4-%e2%82%ac.html";
QTest::newRow("withSpace") << QByteArray("attachment; filename =\"foo.html\"") << "foo.html";
QTest::newRow("filenameInside") << QByteArray("attachment; example=\"filename=example.txt\"") << "";
QTest::newRow("xfilename") << QByteArray("attachment; xfilename=\"example.txt\"") << "";
QTest::newRow("withSpaceBefore") << QByteArray("attachment; filename *=UTF-8''foo-%c3%a4.html") << "";
QTest::newRow("withSpaceAfter") << QByteArray("attachment; filename*= UTF-8''foo-%c3%a4.html") << QString::fromUtf8("foo-ä.html");
QTest::newRow("withSpaceInside") << QByteArray("attachment; filename* =UTF-8''foo-%c3%a4.html") << QString::fromUtf8("foo-ä.html");
QTest::newRow("withDoubleQuotes") << QByteArray("attachment; filename*=\"UTF-8''foo-%c3%a4.html\"") << "";
QTest::newRow("multiTypes") << QByteArray("attachment; filename*=UTF-8''foo-%c3%a4.html; filename=\"foo-ae.html\"") << QString::fromUtf8("foo-ä.html");
// Ignored, but passing in browser
// QTest::newRow("filenameUtf8") << QByteArray("attachment; filename=\"foo-ä.html\"") << QString::fromUtf8("foo-ä.html");
// QTest::newRow("*utf8") << QByteArray("attachment; filename*=UTF-8''foo-%c3%a4-%e2%82%ac.html") << QString::fromUtf8("foo-ä-€.html");
// QTest::newRow("rfc2231") << QByteArray("attachment; filename*=UTF-8''foo-a%cc%88.html") << QString::fromUtf8("foo-ä.html");
// ISO-8859-1 decoding not supported
// QTest::newRow("*iso") << QByteArray("attachment; filename*=iso-8859-1''foo-%E4.html") << QString::fromUtf8("foo-ä.html");
// QTest::newRow("multiTypes2") << QByteArray("attachment; filename*=ISO-8859-1''currency-sign%3d%a4; filename=\"foo-ae.html\"") << QString::fromUtf8("currency-sign=¤");
// Not yet supported
// QTest::newRow("multiType2") << QByteArray("attachment; filename*0*=ISO-8859-15''euro-sign%3d%a4; filename*=ISO-8859-1''currency-sign%3d%a4") << QString::fromUtf8("euro-sign=€");
}
void DownloadsTest::parseContentDispositionTest()
{
QFETCH(QByteArray, header);
QFETCH(QString, result);
QCOMPARE(DownloadFileHelper::parseContentDisposition(header), result);
}

View File

@ -0,0 +1,33 @@
/* ============================================================
* QupZilla - WebKit based browser
* Copyright (C) 2013 David Rosca <nowrep@gmail.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* ============================================================ */
#ifndef DOWNLOADSTEST_H
#define DOWNLOADSTEST_H
#include <QObject>
class DownloadsTest : public QObject
{
Q_OBJECT
private slots:
void parseContentDispositionTest_data();
void parseContentDispositionTest();
};
#endif // DOWNLOADSTEST_H

View File

@ -18,6 +18,7 @@
#include "qztoolstest.h"
#include "formcompletertest.h"
#include "cookiestest.h"
#include "downloadstest.h"
#include <QtTest/QtTest>
@ -35,5 +36,8 @@ int main(int argc, char *argv[])
CookiesTest cookiesTest;
QTest::qExec(&cookiesTest, argc, argv);
DownloadsTest downloadsTest;
QTest::qExec(&downloadsTest, argc, argv);
return 0;
}