mirror of
https://invent.kde.org/network/falkon.git
synced 2024-12-19 18:26:34 +01:00
Improved parsing of Content-Disposition header.
It now tries to detect whether UTF-8 encoding is used. Closes #745
This commit is contained in:
parent
37a81cdf30
commit
668022ed64
@ -15,6 +15,7 @@ Version 1.4.0
|
|||||||
* use .qupzilla/tmp instead of /tmp for temporary data
|
* use .qupzilla/tmp instead of /tmp for temporary data
|
||||||
* saving passwords should now work for much more sites
|
* saving passwords should now work for much more sites
|
||||||
* don't steal Ctrl+B/U/I shortcuts from page
|
* don't steal Ctrl+B/U/I shortcuts from page
|
||||||
|
* fixed parsing UTF-8 filenames in Content-Disposition header
|
||||||
* fixed crash with context menu in websearchbar and locationbar
|
* fixed crash with context menu in websearchbar and locationbar
|
||||||
* fixed loading NYTimes skimmer page
|
* fixed loading NYTimes skimmer page
|
||||||
* fixed cookie domain handling according to RFC 6265
|
* fixed cookie domain handling according to RFC 6265
|
||||||
|
@ -111,6 +111,140 @@ void DownloadFileHelper::handleUnsupportedContent(QNetworkReply* reply, const Do
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// http://stackoverflow.com/questions/1031645/how-to-detect-utf-8-in-plain-c
|
||||||
|
static bool isUtf8(const char* string)
|
||||||
|
{
|
||||||
|
if (!string) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const unsigned char* bytes = (const unsigned char*)string;
|
||||||
|
while (*bytes) {
|
||||||
|
if ((// ASCII
|
||||||
|
bytes[0] == 0x09 ||
|
||||||
|
bytes[0] == 0x0A ||
|
||||||
|
bytes[0] == 0x0D ||
|
||||||
|
(0x20 <= bytes[0] && bytes[0] <= 0x7F)
|
||||||
|
)
|
||||||
|
) {
|
||||||
|
bytes += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((// non-overlong 2-byte
|
||||||
|
(0xC2 <= bytes[0] && bytes[0] <= 0xDF) &&
|
||||||
|
(0x80 <= bytes[1] && bytes[1] <= 0xBF)
|
||||||
|
)
|
||||||
|
) {
|
||||||
|
bytes += 2;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((// excluding overlongs
|
||||||
|
bytes[0] == 0xE0 &&
|
||||||
|
(0xA0 <= bytes[1] && bytes[1] <= 0xBF) &&
|
||||||
|
(0x80 <= bytes[2] && bytes[2] <= 0xBF)
|
||||||
|
) ||
|
||||||
|
(// straight 3-byte
|
||||||
|
((0xE1 <= bytes[0] && bytes[0] <= 0xEC) ||
|
||||||
|
bytes[0] == 0xEE ||
|
||||||
|
bytes[0] == 0xEF) &&
|
||||||
|
(0x80 <= bytes[1] && bytes[1] <= 0xBF) &&
|
||||||
|
(0x80 <= bytes[2] && bytes[2] <= 0xBF)
|
||||||
|
) ||
|
||||||
|
(// excluding surrogates
|
||||||
|
bytes[0] == 0xED &&
|
||||||
|
(0x80 <= bytes[1] && bytes[1] <= 0x9F) &&
|
||||||
|
(0x80 <= bytes[2] && bytes[2] <= 0xBF)
|
||||||
|
)
|
||||||
|
) {
|
||||||
|
bytes += 3;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((// planes 1-3
|
||||||
|
bytes[0] == 0xF0 &&
|
||||||
|
(0x90 <= bytes[1] && bytes[1] <= 0xBF) &&
|
||||||
|
(0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
|
||||||
|
(0x80 <= bytes[3] && bytes[3] <= 0xBF)
|
||||||
|
) ||
|
||||||
|
(// planes 4-15
|
||||||
|
(0xF1 <= bytes[0] && bytes[0] <= 0xF3) &&
|
||||||
|
(0x80 <= bytes[1] && bytes[1] <= 0xBF) &&
|
||||||
|
(0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
|
||||||
|
(0x80 <= bytes[3] && bytes[3] <= 0xBF)
|
||||||
|
) ||
|
||||||
|
(// plane 16
|
||||||
|
bytes[0] == 0xF4 &&
|
||||||
|
(0x80 <= bytes[1] && bytes[1] <= 0x8F) &&
|
||||||
|
(0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
|
||||||
|
(0x80 <= bytes[3] && bytes[3] <= 0xBF)
|
||||||
|
)
|
||||||
|
) {
|
||||||
|
bytes += 4;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
QString DownloadFileHelper::parseContentDisposition(const QByteArray &header)
|
||||||
|
{
|
||||||
|
QString path;
|
||||||
|
|
||||||
|
if (header.isEmpty()) {
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
|
||||||
|
QString value;
|
||||||
|
|
||||||
|
if (isUtf8(header.constData())) {
|
||||||
|
value = QString::fromUtf8(header);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
value = QString::fromLatin1(header);
|
||||||
|
}
|
||||||
|
|
||||||
|
// We try to use UTF-8 encoded filename first if present
|
||||||
|
if (value.contains(QRegExp("[ ;]{1,}filename*\\*\\s*=\\s*UTF-8''", Qt::CaseInsensitive))) {
|
||||||
|
QRegExp reg("filename\\s*\\*\\s*=\\s*UTF-8''([^;]*)", Qt::CaseInsensitive);
|
||||||
|
reg.indexIn(value);
|
||||||
|
path = QUrl::fromPercentEncoding(reg.cap(1).toUtf8()).trimmed();
|
||||||
|
}
|
||||||
|
else if (value.contains(QRegExp("[ ;]{1,}filename\\s*=", Qt::CaseInsensitive))) {
|
||||||
|
QRegExp reg("[ ;]{1,}filename\\s*=(.*)", Qt::CaseInsensitive);
|
||||||
|
reg.indexIn(value);
|
||||||
|
path = reg.cap(1).trimmed();
|
||||||
|
|
||||||
|
// Parse filename in quotes (to support semicolon inside filename)
|
||||||
|
if (path.startsWith(QLatin1Char('"')) && path.count(QLatin1Char('"')) > 1) {
|
||||||
|
int pos = path.indexOf(QLatin1Char('"'), 1);
|
||||||
|
while (pos != -1) {
|
||||||
|
if (path[pos - 1] != QLatin1Char('\\')) {
|
||||||
|
// We also need to strip starting quote
|
||||||
|
path = path.left(pos).mid(1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
pos = path.indexOf(QLatin1Char('"'), pos + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
QRegExp reg("([^;]*)", Qt::CaseInsensitive);
|
||||||
|
reg.indexIn(path);
|
||||||
|
path = reg.cap(1).trimmed();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (path.startsWith(QLatin1Char('"')) && path.endsWith(QLatin1Char('"'))) {
|
||||||
|
path = path.mid(1, path.length() - 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
|
||||||
void DownloadFileHelper::optionsDialogAccepted(int finish)
|
void DownloadFileHelper::optionsDialogAccepted(int finish)
|
||||||
{
|
{
|
||||||
bool forceChoosingPath = false;
|
bool forceChoosingPath = false;
|
||||||
@ -248,26 +382,7 @@ void DownloadFileHelper::fileNameChoosed(const QString &name, bool fileNameAutoG
|
|||||||
|
|
||||||
QString DownloadFileHelper::getFileName(QNetworkReply* reply)
|
QString DownloadFileHelper::getFileName(QNetworkReply* reply)
|
||||||
{
|
{
|
||||||
QString path;
|
QString path = parseContentDisposition(reply->rawHeader("Content-Disposition"));
|
||||||
if (reply->hasRawHeader("Content-Disposition")) {
|
|
||||||
QString value = QString::fromLatin1(reply->rawHeader("Content-Disposition"));
|
|
||||||
|
|
||||||
// We try to use UTF-8 encoded filename first if present
|
|
||||||
if (value.contains(QRegExp("filename\\s*\\*\\s*=\\s*UTF-8", Qt::CaseInsensitive))) {
|
|
||||||
QRegExp reg("filename\\s*\\*\\s*=\\s*UTF-8''([^;]*)", Qt::CaseInsensitive);
|
|
||||||
reg.indexIn(value);
|
|
||||||
path = QUrl::fromPercentEncoding(reg.cap(1).toUtf8()).trimmed();
|
|
||||||
}
|
|
||||||
else if (value.contains(QRegExp("filename\\s*=", Qt::CaseInsensitive))) {
|
|
||||||
QRegExp reg("filename\\s*=([^;]*)", Qt::CaseInsensitive);
|
|
||||||
reg.indexIn(value);
|
|
||||||
path = reg.cap(1).trimmed();
|
|
||||||
|
|
||||||
if (path.startsWith(QLatin1Char('"')) && path.endsWith(QLatin1Char('"'))) {
|
|
||||||
path = path.mid(1, path.length() - 2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (path.isEmpty()) {
|
if (path.isEmpty()) {
|
||||||
path = reply->url().path();
|
path = reply->url().path();
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/* ============================================================
|
/* ============================================================
|
||||||
* QupZilla - WebKit based browser
|
* QupZilla - WebKit based browser
|
||||||
* Copyright (C) 2010-2012 David Rosca <nowrep@gmail.com>
|
* Copyright (C) 2010-2013 David Rosca <nowrep@gmail.com>
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@ -45,6 +45,8 @@ public:
|
|||||||
|
|
||||||
void handleUnsupportedContent(QNetworkReply* reply, const DownloadManager::DownloadInfo &info);
|
void handleUnsupportedContent(QNetworkReply* reply, const DownloadManager::DownloadInfo &info);
|
||||||
|
|
||||||
|
static QString parseContentDisposition(const QByteArray &header);
|
||||||
|
|
||||||
signals:
|
signals:
|
||||||
void itemCreated(QListWidgetItem* item, DownloadItem* downItem);
|
void itemCreated(QListWidgetItem* item, DownloadItem* downItem);
|
||||||
|
|
||||||
|
@ -48,10 +48,12 @@ INCLUDEPATH += $$PWD/../../src/lib/3rdparty\
|
|||||||
HEADERS += \
|
HEADERS += \
|
||||||
qztoolstest.h \
|
qztoolstest.h \
|
||||||
formcompletertest.h \
|
formcompletertest.h \
|
||||||
cookiestest.h
|
cookiestest.h \
|
||||||
|
downloadstest.h
|
||||||
|
|
||||||
SOURCES += \
|
SOURCES += \
|
||||||
qztoolstest.cpp \
|
qztoolstest.cpp \
|
||||||
main.cpp \
|
main.cpp \
|
||||||
formcompletertest.cpp \
|
formcompletertest.cpp \
|
||||||
cookiestest.cpp
|
cookiestest.cpp \
|
||||||
|
downloadstest.cpp
|
||||||
|
70
tests/autotests/downloadstest.cpp
Normal file
70
tests/autotests/downloadstest.cpp
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
/* ============================================================
|
||||||
|
* QupZilla - WebKit based browser
|
||||||
|
* Copyright (C) 2013 David Rosca <nowrep@gmail.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
* ============================================================ */
|
||||||
|
#include "downloadstest.h"
|
||||||
|
#include "downloadfilehelper.h"
|
||||||
|
|
||||||
|
#include <QtTest/QtTest>
|
||||||
|
#include <QNetworkReply>
|
||||||
|
|
||||||
|
void DownloadsTest::parseContentDispositionTest_data()
|
||||||
|
{
|
||||||
|
QTest::addColumn<QByteArray>("header");
|
||||||
|
QTest::addColumn<QString>("result");
|
||||||
|
|
||||||
|
QTest::newRow("filename") << QByteArray("attachment; filename=\"foo.html\"") << "foo.html";
|
||||||
|
QTest::newRow("filename25") << QByteArray("attachment; filename=\"0000000000111111111122222\"") << "0000000000111111111122222";
|
||||||
|
QTest::newRow("filename35") << QByteArray("attachment; filename=\"00000000001111111111222222222233333\"") << "00000000001111111111222222222233333";
|
||||||
|
QTest::newRow("semicolon") << QByteArray("attachment; filename=\"Here's a semicolon;.html\"") << "Here's a semicolon;.html";
|
||||||
|
QTest::newRow("semicolon2") << QByteArray("attachment; filename=\"Here's a semi\\\"colon;.html\"") << "Here's a semi\\\"colon;.html";
|
||||||
|
QTest::newRow("semicolon3") << QByteArray("attachment; filename=\"Here's a\\\" semi\\\"colon;.html\"") << "Here's a\\\" semi\\\"colon;.html";
|
||||||
|
QTest::newRow("invalidParameter") << QByteArray("attachment; foo=\"bar\"; filename=\"foo.html\"") << "foo.html";
|
||||||
|
QTest::newRow("filenameUpper") << QByteArray("attachment; FILENAME=\"foo.html\"") << "foo.html";
|
||||||
|
QTest::newRow("noQuotes") << QByteArray("attachment; filename=foo.html") << "foo.html";
|
||||||
|
QTest::newRow("singleQuotesFileame") << QByteArray("attachment; filename='foo.bar'") << "'foo.bar'";
|
||||||
|
QTest::newRow("filenamePlain") << QByteArray("attachment; filename=\"foo-ä.html\"") << QString::fromUtf8("foo-ä.html");
|
||||||
|
QTest::newRow("percent") << QByteArray("attachment; filename=\"foo-%41.html\"") << "foo-%41.html";
|
||||||
|
QTest::newRow("percent2") << QByteArray("attachment; filename=\"foo-%c3%a4-%e2%82%ac.html\"") << "foo-%c3%a4-%e2%82%ac.html";
|
||||||
|
QTest::newRow("withSpace") << QByteArray("attachment; filename =\"foo.html\"") << "foo.html";
|
||||||
|
QTest::newRow("filenameInside") << QByteArray("attachment; example=\"filename=example.txt\"") << "";
|
||||||
|
QTest::newRow("xfilename") << QByteArray("attachment; xfilename=\"example.txt\"") << "";
|
||||||
|
QTest::newRow("withSpaceBefore") << QByteArray("attachment; filename *=UTF-8''foo-%c3%a4.html") << "";
|
||||||
|
QTest::newRow("withSpaceAfter") << QByteArray("attachment; filename*= UTF-8''foo-%c3%a4.html") << QString::fromUtf8("foo-ä.html");
|
||||||
|
QTest::newRow("withSpaceInside") << QByteArray("attachment; filename* =UTF-8''foo-%c3%a4.html") << QString::fromUtf8("foo-ä.html");
|
||||||
|
QTest::newRow("withDoubleQuotes") << QByteArray("attachment; filename*=\"UTF-8''foo-%c3%a4.html\"") << "";
|
||||||
|
QTest::newRow("multiTypes") << QByteArray("attachment; filename*=UTF-8''foo-%c3%a4.html; filename=\"foo-ae.html\"") << QString::fromUtf8("foo-ä.html");
|
||||||
|
|
||||||
|
// Ignored, but passing in browser
|
||||||
|
// QTest::newRow("filenameUtf8") << QByteArray("attachment; filename=\"foo-ä.html\"") << QString::fromUtf8("foo-ä.html");
|
||||||
|
// QTest::newRow("*utf8") << QByteArray("attachment; filename*=UTF-8''foo-%c3%a4-%e2%82%ac.html") << QString::fromUtf8("foo-ä-€.html");
|
||||||
|
// QTest::newRow("rfc2231") << QByteArray("attachment; filename*=UTF-8''foo-a%cc%88.html") << QString::fromUtf8("foo-ä.html");
|
||||||
|
|
||||||
|
// ISO-8859-1 decoding not supported
|
||||||
|
// QTest::newRow("*iso") << QByteArray("attachment; filename*=iso-8859-1''foo-%E4.html") << QString::fromUtf8("foo-ä.html");
|
||||||
|
// QTest::newRow("multiTypes2") << QByteArray("attachment; filename*=ISO-8859-1''currency-sign%3d%a4; filename=\"foo-ae.html\"") << QString::fromUtf8("currency-sign=¤");
|
||||||
|
|
||||||
|
// Not yet supported
|
||||||
|
// QTest::newRow("multiType2") << QByteArray("attachment; filename*0*=ISO-8859-15''euro-sign%3d%a4; filename*=ISO-8859-1''currency-sign%3d%a4") << QString::fromUtf8("euro-sign=€");
|
||||||
|
}
|
||||||
|
|
||||||
|
void DownloadsTest::parseContentDispositionTest()
|
||||||
|
{
|
||||||
|
QFETCH(QByteArray, header);
|
||||||
|
QFETCH(QString, result);
|
||||||
|
|
||||||
|
QCOMPARE(DownloadFileHelper::parseContentDisposition(header), result);
|
||||||
|
}
|
33
tests/autotests/downloadstest.h
Normal file
33
tests/autotests/downloadstest.h
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
/* ============================================================
|
||||||
|
* QupZilla - WebKit based browser
|
||||||
|
* Copyright (C) 2013 David Rosca <nowrep@gmail.com>
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
* ============================================================ */
|
||||||
|
#ifndef DOWNLOADSTEST_H
|
||||||
|
#define DOWNLOADSTEST_H
|
||||||
|
|
||||||
|
#include <QObject>
|
||||||
|
|
||||||
|
class DownloadsTest : public QObject
|
||||||
|
{
|
||||||
|
Q_OBJECT
|
||||||
|
|
||||||
|
private slots:
|
||||||
|
void parseContentDispositionTest_data();
|
||||||
|
void parseContentDispositionTest();
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // DOWNLOADSTEST_H
|
@ -18,6 +18,7 @@
|
|||||||
#include "qztoolstest.h"
|
#include "qztoolstest.h"
|
||||||
#include "formcompletertest.h"
|
#include "formcompletertest.h"
|
||||||
#include "cookiestest.h"
|
#include "cookiestest.h"
|
||||||
|
#include "downloadstest.h"
|
||||||
|
|
||||||
#include <QtTest/QtTest>
|
#include <QtTest/QtTest>
|
||||||
|
|
||||||
@ -35,5 +36,8 @@ int main(int argc, char *argv[])
|
|||||||
CookiesTest cookiesTest;
|
CookiesTest cookiesTest;
|
||||||
QTest::qExec(&cookiesTest, argc, argv);
|
QTest::qExec(&cookiesTest, argc, argv);
|
||||||
|
|
||||||
|
DownloadsTest downloadsTest;
|
||||||
|
QTest::qExec(&downloadsTest, argc, argv);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user