mirror of
https://invent.kde.org/network/falkon.git
synced 2024-12-19 10:16:34 +01:00
Improved parsing of Content-Disposition header.
It now tries to detect whether UTF-8 encoding is used. Closes #745
This commit is contained in:
parent
37a81cdf30
commit
668022ed64
@ -15,6 +15,7 @@ Version 1.4.0
|
||||
* use .qupzilla/tmp instead of /tmp for temporary data
|
||||
* saving passwords should now work for much more sites
|
||||
* don't steal Ctrl+B/U/I shortcuts from page
|
||||
* fixed parsing UTF-8 filenames in Content-Disposition header
|
||||
* fixed crash with context menu in websearchbar and locationbar
|
||||
* fixed loading NYTimes skimmer page
|
||||
* fixed cookie domain handling according to RFC 6265
|
||||
|
@ -111,6 +111,140 @@ void DownloadFileHelper::handleUnsupportedContent(QNetworkReply* reply, const Do
|
||||
}
|
||||
}
|
||||
|
||||
// http://stackoverflow.com/questions/1031645/how-to-detect-utf-8-in-plain-c
|
||||
static bool isUtf8(const char* string)
|
||||
{
|
||||
if (!string) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const unsigned char* bytes = (const unsigned char*)string;
|
||||
while (*bytes) {
|
||||
if ((// ASCII
|
||||
bytes[0] == 0x09 ||
|
||||
bytes[0] == 0x0A ||
|
||||
bytes[0] == 0x0D ||
|
||||
(0x20 <= bytes[0] && bytes[0] <= 0x7F)
|
||||
)
|
||||
) {
|
||||
bytes += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((// non-overlong 2-byte
|
||||
(0xC2 <= bytes[0] && bytes[0] <= 0xDF) &&
|
||||
(0x80 <= bytes[1] && bytes[1] <= 0xBF)
|
||||
)
|
||||
) {
|
||||
bytes += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((// excluding overlongs
|
||||
bytes[0] == 0xE0 &&
|
||||
(0xA0 <= bytes[1] && bytes[1] <= 0xBF) &&
|
||||
(0x80 <= bytes[2] && bytes[2] <= 0xBF)
|
||||
) ||
|
||||
(// straight 3-byte
|
||||
((0xE1 <= bytes[0] && bytes[0] <= 0xEC) ||
|
||||
bytes[0] == 0xEE ||
|
||||
bytes[0] == 0xEF) &&
|
||||
(0x80 <= bytes[1] && bytes[1] <= 0xBF) &&
|
||||
(0x80 <= bytes[2] && bytes[2] <= 0xBF)
|
||||
) ||
|
||||
(// excluding surrogates
|
||||
bytes[0] == 0xED &&
|
||||
(0x80 <= bytes[1] && bytes[1] <= 0x9F) &&
|
||||
(0x80 <= bytes[2] && bytes[2] <= 0xBF)
|
||||
)
|
||||
) {
|
||||
bytes += 3;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((// planes 1-3
|
||||
bytes[0] == 0xF0 &&
|
||||
(0x90 <= bytes[1] && bytes[1] <= 0xBF) &&
|
||||
(0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
|
||||
(0x80 <= bytes[3] && bytes[3] <= 0xBF)
|
||||
) ||
|
||||
(// planes 4-15
|
||||
(0xF1 <= bytes[0] && bytes[0] <= 0xF3) &&
|
||||
(0x80 <= bytes[1] && bytes[1] <= 0xBF) &&
|
||||
(0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
|
||||
(0x80 <= bytes[3] && bytes[3] <= 0xBF)
|
||||
) ||
|
||||
(// plane 16
|
||||
bytes[0] == 0xF4 &&
|
||||
(0x80 <= bytes[1] && bytes[1] <= 0x8F) &&
|
||||
(0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
|
||||
(0x80 <= bytes[3] && bytes[3] <= 0xBF)
|
||||
)
|
||||
) {
|
||||
bytes += 4;
|
||||
continue;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
QString DownloadFileHelper::parseContentDisposition(const QByteArray &header)
|
||||
{
|
||||
QString path;
|
||||
|
||||
if (header.isEmpty()) {
|
||||
return path;
|
||||
}
|
||||
|
||||
QString value;
|
||||
|
||||
if (isUtf8(header.constData())) {
|
||||
value = QString::fromUtf8(header);
|
||||
}
|
||||
else {
|
||||
value = QString::fromLatin1(header);
|
||||
}
|
||||
|
||||
// We try to use UTF-8 encoded filename first if present
|
||||
if (value.contains(QRegExp("[ ;]{1,}filename*\\*\\s*=\\s*UTF-8''", Qt::CaseInsensitive))) {
|
||||
QRegExp reg("filename\\s*\\*\\s*=\\s*UTF-8''([^;]*)", Qt::CaseInsensitive);
|
||||
reg.indexIn(value);
|
||||
path = QUrl::fromPercentEncoding(reg.cap(1).toUtf8()).trimmed();
|
||||
}
|
||||
else if (value.contains(QRegExp("[ ;]{1,}filename\\s*=", Qt::CaseInsensitive))) {
|
||||
QRegExp reg("[ ;]{1,}filename\\s*=(.*)", Qt::CaseInsensitive);
|
||||
reg.indexIn(value);
|
||||
path = reg.cap(1).trimmed();
|
||||
|
||||
// Parse filename in quotes (to support semicolon inside filename)
|
||||
if (path.startsWith(QLatin1Char('"')) && path.count(QLatin1Char('"')) > 1) {
|
||||
int pos = path.indexOf(QLatin1Char('"'), 1);
|
||||
while (pos != -1) {
|
||||
if (path[pos - 1] != QLatin1Char('\\')) {
|
||||
// We also need to strip starting quote
|
||||
path = path.left(pos).mid(1);
|
||||
break;
|
||||
}
|
||||
pos = path.indexOf(QLatin1Char('"'), pos + 1);
|
||||
}
|
||||
}
|
||||
else {
|
||||
QRegExp reg("([^;]*)", Qt::CaseInsensitive);
|
||||
reg.indexIn(path);
|
||||
path = reg.cap(1).trimmed();
|
||||
}
|
||||
|
||||
if (path.startsWith(QLatin1Char('"')) && path.endsWith(QLatin1Char('"'))) {
|
||||
path = path.mid(1, path.length() - 2);
|
||||
}
|
||||
}
|
||||
|
||||
return path;
|
||||
}
|
||||
|
||||
void DownloadFileHelper::optionsDialogAccepted(int finish)
|
||||
{
|
||||
bool forceChoosingPath = false;
|
||||
@ -248,26 +382,7 @@ void DownloadFileHelper::fileNameChoosed(const QString &name, bool fileNameAutoG
|
||||
|
||||
QString DownloadFileHelper::getFileName(QNetworkReply* reply)
|
||||
{
|
||||
QString path;
|
||||
if (reply->hasRawHeader("Content-Disposition")) {
|
||||
QString value = QString::fromLatin1(reply->rawHeader("Content-Disposition"));
|
||||
|
||||
// We try to use UTF-8 encoded filename first if present
|
||||
if (value.contains(QRegExp("filename\\s*\\*\\s*=\\s*UTF-8", Qt::CaseInsensitive))) {
|
||||
QRegExp reg("filename\\s*\\*\\s*=\\s*UTF-8''([^;]*)", Qt::CaseInsensitive);
|
||||
reg.indexIn(value);
|
||||
path = QUrl::fromPercentEncoding(reg.cap(1).toUtf8()).trimmed();
|
||||
}
|
||||
else if (value.contains(QRegExp("filename\\s*=", Qt::CaseInsensitive))) {
|
||||
QRegExp reg("filename\\s*=([^;]*)", Qt::CaseInsensitive);
|
||||
reg.indexIn(value);
|
||||
path = reg.cap(1).trimmed();
|
||||
|
||||
if (path.startsWith(QLatin1Char('"')) && path.endsWith(QLatin1Char('"'))) {
|
||||
path = path.mid(1, path.length() - 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
QString path = parseContentDisposition(reply->rawHeader("Content-Disposition"));
|
||||
|
||||
if (path.isEmpty()) {
|
||||
path = reply->url().path();
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* ============================================================
|
||||
* QupZilla - WebKit based browser
|
||||
* Copyright (C) 2010-2012 David Rosca <nowrep@gmail.com>
|
||||
* Copyright (C) 2010-2013 David Rosca <nowrep@gmail.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@ -45,6 +45,8 @@ public:
|
||||
|
||||
void handleUnsupportedContent(QNetworkReply* reply, const DownloadManager::DownloadInfo &info);
|
||||
|
||||
static QString parseContentDisposition(const QByteArray &header);
|
||||
|
||||
signals:
|
||||
void itemCreated(QListWidgetItem* item, DownloadItem* downItem);
|
||||
|
||||
|
@ -48,10 +48,12 @@ INCLUDEPATH += $$PWD/../../src/lib/3rdparty\
|
||||
HEADERS += \
|
||||
qztoolstest.h \
|
||||
formcompletertest.h \
|
||||
cookiestest.h
|
||||
cookiestest.h \
|
||||
downloadstest.h
|
||||
|
||||
SOURCES += \
|
||||
qztoolstest.cpp \
|
||||
main.cpp \
|
||||
formcompletertest.cpp \
|
||||
cookiestest.cpp
|
||||
cookiestest.cpp \
|
||||
downloadstest.cpp
|
||||
|
70
tests/autotests/downloadstest.cpp
Normal file
70
tests/autotests/downloadstest.cpp
Normal file
@ -0,0 +1,70 @@
|
||||
/* ============================================================
|
||||
* QupZilla - WebKit based browser
|
||||
* Copyright (C) 2013 David Rosca <nowrep@gmail.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
* ============================================================ */
|
||||
#include "downloadstest.h"
|
||||
#include "downloadfilehelper.h"
|
||||
|
||||
#include <QtTest/QtTest>
|
||||
#include <QNetworkReply>
|
||||
|
||||
void DownloadsTest::parseContentDispositionTest_data()
|
||||
{
|
||||
QTest::addColumn<QByteArray>("header");
|
||||
QTest::addColumn<QString>("result");
|
||||
|
||||
QTest::newRow("filename") << QByteArray("attachment; filename=\"foo.html\"") << "foo.html";
|
||||
QTest::newRow("filename25") << QByteArray("attachment; filename=\"0000000000111111111122222\"") << "0000000000111111111122222";
|
||||
QTest::newRow("filename35") << QByteArray("attachment; filename=\"00000000001111111111222222222233333\"") << "00000000001111111111222222222233333";
|
||||
QTest::newRow("semicolon") << QByteArray("attachment; filename=\"Here's a semicolon;.html\"") << "Here's a semicolon;.html";
|
||||
QTest::newRow("semicolon2") << QByteArray("attachment; filename=\"Here's a semi\\\"colon;.html\"") << "Here's a semi\\\"colon;.html";
|
||||
QTest::newRow("semicolon3") << QByteArray("attachment; filename=\"Here's a\\\" semi\\\"colon;.html\"") << "Here's a\\\" semi\\\"colon;.html";
|
||||
QTest::newRow("invalidParameter") << QByteArray("attachment; foo=\"bar\"; filename=\"foo.html\"") << "foo.html";
|
||||
QTest::newRow("filenameUpper") << QByteArray("attachment; FILENAME=\"foo.html\"") << "foo.html";
|
||||
QTest::newRow("noQuotes") << QByteArray("attachment; filename=foo.html") << "foo.html";
|
||||
QTest::newRow("singleQuotesFileame") << QByteArray("attachment; filename='foo.bar'") << "'foo.bar'";
|
||||
QTest::newRow("filenamePlain") << QByteArray("attachment; filename=\"foo-ä.html\"") << QString::fromUtf8("foo-ä.html");
|
||||
QTest::newRow("percent") << QByteArray("attachment; filename=\"foo-%41.html\"") << "foo-%41.html";
|
||||
QTest::newRow("percent2") << QByteArray("attachment; filename=\"foo-%c3%a4-%e2%82%ac.html\"") << "foo-%c3%a4-%e2%82%ac.html";
|
||||
QTest::newRow("withSpace") << QByteArray("attachment; filename =\"foo.html\"") << "foo.html";
|
||||
QTest::newRow("filenameInside") << QByteArray("attachment; example=\"filename=example.txt\"") << "";
|
||||
QTest::newRow("xfilename") << QByteArray("attachment; xfilename=\"example.txt\"") << "";
|
||||
QTest::newRow("withSpaceBefore") << QByteArray("attachment; filename *=UTF-8''foo-%c3%a4.html") << "";
|
||||
QTest::newRow("withSpaceAfter") << QByteArray("attachment; filename*= UTF-8''foo-%c3%a4.html") << QString::fromUtf8("foo-ä.html");
|
||||
QTest::newRow("withSpaceInside") << QByteArray("attachment; filename* =UTF-8''foo-%c3%a4.html") << QString::fromUtf8("foo-ä.html");
|
||||
QTest::newRow("withDoubleQuotes") << QByteArray("attachment; filename*=\"UTF-8''foo-%c3%a4.html\"") << "";
|
||||
QTest::newRow("multiTypes") << QByteArray("attachment; filename*=UTF-8''foo-%c3%a4.html; filename=\"foo-ae.html\"") << QString::fromUtf8("foo-ä.html");
|
||||
|
||||
// Ignored, but passing in browser
|
||||
// QTest::newRow("filenameUtf8") << QByteArray("attachment; filename=\"foo-ä.html\"") << QString::fromUtf8("foo-ä.html");
|
||||
// QTest::newRow("*utf8") << QByteArray("attachment; filename*=UTF-8''foo-%c3%a4-%e2%82%ac.html") << QString::fromUtf8("foo-ä-€.html");
|
||||
// QTest::newRow("rfc2231") << QByteArray("attachment; filename*=UTF-8''foo-a%cc%88.html") << QString::fromUtf8("foo-ä.html");
|
||||
|
||||
// ISO-8859-1 decoding not supported
|
||||
// QTest::newRow("*iso") << QByteArray("attachment; filename*=iso-8859-1''foo-%E4.html") << QString::fromUtf8("foo-ä.html");
|
||||
// QTest::newRow("multiTypes2") << QByteArray("attachment; filename*=ISO-8859-1''currency-sign%3d%a4; filename=\"foo-ae.html\"") << QString::fromUtf8("currency-sign=¤");
|
||||
|
||||
// Not yet supported
|
||||
// QTest::newRow("multiType2") << QByteArray("attachment; filename*0*=ISO-8859-15''euro-sign%3d%a4; filename*=ISO-8859-1''currency-sign%3d%a4") << QString::fromUtf8("euro-sign=€");
|
||||
}
|
||||
|
||||
void DownloadsTest::parseContentDispositionTest()
|
||||
{
|
||||
QFETCH(QByteArray, header);
|
||||
QFETCH(QString, result);
|
||||
|
||||
QCOMPARE(DownloadFileHelper::parseContentDisposition(header), result);
|
||||
}
|
33
tests/autotests/downloadstest.h
Normal file
33
tests/autotests/downloadstest.h
Normal file
@ -0,0 +1,33 @@
|
||||
/* ============================================================
|
||||
* QupZilla - WebKit based browser
|
||||
* Copyright (C) 2013 David Rosca <nowrep@gmail.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
* ============================================================ */
|
||||
#ifndef DOWNLOADSTEST_H
|
||||
#define DOWNLOADSTEST_H
|
||||
|
||||
#include <QObject>
|
||||
|
||||
class DownloadsTest : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
|
||||
private slots:
|
||||
void parseContentDispositionTest_data();
|
||||
void parseContentDispositionTest();
|
||||
|
||||
};
|
||||
|
||||
#endif // DOWNLOADSTEST_H
|
@ -18,6 +18,7 @@
|
||||
#include "qztoolstest.h"
|
||||
#include "formcompletertest.h"
|
||||
#include "cookiestest.h"
|
||||
#include "downloadstest.h"
|
||||
|
||||
#include <QtTest/QtTest>
|
||||
|
||||
@ -35,5 +36,8 @@ int main(int argc, char *argv[])
|
||||
CookiesTest cookiesTest;
|
||||
QTest::qExec(&cookiesTest, argc, argv);
|
||||
|
||||
DownloadsTest downloadsTest;
|
||||
QTest::qExec(&downloadsTest, argc, argv);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user