diff --git a/CHANGELOG b/CHANGELOG index 11e53d877..fb8964549 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -24,6 +24,7 @@ Version 1.4.0 * moved config directory into ~/.config/qupzilla * certificates bundle is now only used on windows * reduced memory usage of AdBlock (saves up to 30MB with just EasyList) + * greatly improved performance when matching regexp rules in AdBlock * GreaseMonkey: reload script if source file changed on disk * GreaseMonkey: fixed don't loading invalid scripts * fixed opening browser with url with ampersand (%26) as command line argument diff --git a/src/lib/adblock/adblockrule.cpp b/src/lib/adblock/adblockrule.cpp index 8fe00b439..169595489 100644 --- a/src/lib/adblock/adblockrule.cpp +++ b/src/lib/adblock/adblockrule.cpp @@ -46,6 +46,7 @@ #include "adblockrule.h" #include "adblocksubscription.h" +#include "qztools.h" #include #include "qzregexp.h" @@ -206,12 +207,16 @@ bool AdBlockRule::networkMatch(const QNetworkRequest &request, const QString &do bool matched = false; if (m_useDomainMatch) { - matched = _matchDomain(domain, m_matchString); + matched = isMatchingDomain(domain, m_matchString); } else if (m_useEndsMatch) { matched = encodedUrl.endsWith(m_matchString, m_caseSensitivity); } else if (m_regExp) { + if (!isMatchingRegExpStrings(encodedUrl)) { + return false; + } + matched = (m_regExp->indexIn(encodedUrl) != -1); } else { @@ -277,14 +282,14 @@ bool AdBlockRule::matchDomain(const QString &domain) const if (m_blockedDomains.isEmpty()) { foreach(const QString & d, m_allowedDomains) { - if (_matchDomain(domain, d)) { + if (isMatchingDomain(domain, d)) { return true; } } } else if (m_allowedDomains.isEmpty()) { foreach(const QString & d, m_blockedDomains) { - if (_matchDomain(domain, d)) { + if (isMatchingDomain(domain, d)) { return false; } } @@ -292,13 +297,13 @@ bool AdBlockRule::matchDomain(const QString &domain) const } else { foreach(const QString & d, m_blockedDomains) { - if (_matchDomain(domain, d)) { + if (isMatchingDomain(domain, d)) { return false; } } foreach(const QString & d, m_allowedDomains) { - if (_matchDomain(domain, d)) { + if (isMatchingDomain(domain, d)) { return true; } } @@ -388,6 +393,7 @@ void AdBlockRule::parseFilter() m_cssSelector = parsedLine.mid(pos + 2); m_cssSelector.remove('\\'); + // CSS rule cannot have more options -> stop parsing return; } @@ -504,7 +510,9 @@ void AdBlockRule::parseFilter() // we must modify parsedLine to comply with QzRegExp if (parsedLine.contains(QLatin1Char('*')) || parsedLine.contains(QLatin1Char('^')) || parsedLine.contains(QLatin1Char('|'))) { - parsedLine.replace(QzRegExp(QLatin1String("\\*+")), QLatin1String("*")) // remove multiple wildcards + QString parsedRegExp = parsedLine; + + parsedRegExp.replace(QzRegExp(QLatin1String("\\*+")), QLatin1String("*")) // remove multiple wildcards .replace(QzRegExp(QLatin1String("\\^\\|$")), QLatin1String("^")) // remove anchors following separator placeholder .replace(QzRegExp(QLatin1String("^(\\*)")), QString()) // remove leading wildcards .replace(QzRegExp(QLatin1String("(\\*)$")), QString()) @@ -517,7 +525,8 @@ void AdBlockRule::parseFilter() .replace(QzRegExp(QLatin1String("\\\\\\|$")), QLatin1String("$")) // process anchor at expression end .replace(QzRegExp(QLatin1String("\\\\\\*")), QLatin1String(".*")); // replace wildcards by .* - m_regExp = new QzRegExp(parsedLine, m_caseSensitivity); + m_regExpStrings = parseRegExpFilter(parsedLine); + m_regExp = new QzRegExp(parsedRegExp, m_caseSensitivity); return; } @@ -544,19 +553,41 @@ void AdBlockRule::parseDomains(const QString &domains, const QChar &separator) m_domainRestricted = (!m_blockedDomains.isEmpty() || !m_allowedDomains.isEmpty()); } -bool AdBlockRule::_matchDomain(const QString &domain, const QString &filter) const +bool AdBlockRule::isMatchingDomain(const QString &domain, const QString &filter) const { - if (!domain.endsWith(filter)) { - return false; + return QzTools::matchDomain(filter, domain); +} + +bool AdBlockRule::isMatchingRegExpStrings(const QString &url) const +{ + foreach(const QString & string, m_regExpStrings) { + if (!url.contains(string)) { + return false; + } } - int index = domain.indexOf(filter); + return true; +} - if (index == 0 || filter[0] == QLatin1Char('.')) { - return true; +// Split regexp filter into strings that can be used with QString::contains +// Don't use parts that contains only 1 char and duplicated parts +QStringList AdBlockRule::parseRegExpFilter(const QString &parsedFilter) const +{ + // Meta characters in AdBlock rules are | * ^ + QStringList list = parsedFilter.split(QzRegExp("[|\\*\\^]"), QString::SkipEmptyParts); + + list.removeDuplicates(); + + for (int i = 0; i < list.length(); ++i) { + const QString &part = list.at(i); + + if (part.length() < 2) { + list.removeAt(i); + i--; + } } - return domain[index - 1] == QLatin1Char('.'); + return list; } AdBlockRule::~AdBlockRule() diff --git a/src/lib/adblock/adblockrule.h b/src/lib/adblock/adblockrule.h index f64f7d4b0..ed9afc480 100644 --- a/src/lib/adblock/adblockrule.h +++ b/src/lib/adblock/adblockrule.h @@ -95,12 +95,15 @@ public: bool matchXmlHttpRequest(const QNetworkRequest &request) const; bool matchImage(const QString &encodedUrl) const; +protected: + bool isMatchingDomain(const QString &domain, const QString &filter) const; + bool isMatchingRegExpStrings(const QString &url) const; + QStringList parseRegExpFilter(const QString &parsedFilter) const; + private: void parseFilter(); void parseDomains(const QString &domains, const QChar &separator); - bool _matchDomain(const QString &domain, const QString &filter) const; - AdBlockSubscription* m_subscription; QString m_filter; @@ -112,6 +115,7 @@ private: bool m_domainRestricted; QzRegExp* m_regExp; + QStringList m_regExpStrings; bool m_useDomainMatch; bool m_useEndsMatch; diff --git a/src/lib/cookies/cookiejar.cpp b/src/lib/cookies/cookiejar.cpp index 4c20c9134..122d8ae0a 100644 --- a/src/lib/cookies/cookiejar.cpp +++ b/src/lib/cookies/cookiejar.cpp @@ -19,6 +19,7 @@ #include "qupzilla.h" #include "mainapplication.h" #include "settings.h" +#include "qztools.h" #include #include @@ -234,17 +235,7 @@ bool CookieJar::matchDomain(QString cookieDomain, QString siteDomain) siteDomain = siteDomain.mid(1); } - if (cookieDomain == siteDomain) { - return true; - } - - if (!siteDomain.endsWith(cookieDomain)) { - return false; - } - - int index = siteDomain.indexOf(cookieDomain); - - return index > 0 && siteDomain[index - 1] == QLatin1Char('.'); + return QzTools::matchDomain(cookieDomain, siteDomain); } bool CookieJar::listMatchesDomain(const QStringList &list, const QString &cookieDomain) diff --git a/src/lib/tools/qztools.cpp b/src/lib/tools/qztools.cpp index 0fd775cec..996303fbc 100644 --- a/src/lib/tools/qztools.cpp +++ b/src/lib/tools/qztools.cpp @@ -472,6 +472,24 @@ bool QzTools::isUtf8(const char* string) return true; } +// Matches domain (assumes both pattern and domain not starting with dot) +// pattern = domain to be matched +// domain = site domain +bool QzTools::matchDomain(const QString &pattern, const QString &domain) +{ + if (pattern == domain) { + return true; + } + + if (!domain.endsWith(pattern)) { + return false; + } + + int index = domain.indexOf(pattern); + + return index > 0 && domain[index - 1] == QLatin1Char('.'); +} + static inline bool isQuote(const QChar &c) { return (c == QLatin1Char('"') || c == QLatin1Char('\'')); diff --git a/src/lib/tools/qztools.h b/src/lib/tools/qztools.h index 0d1008bd3..7465831f8 100644 --- a/src/lib/tools/qztools.h +++ b/src/lib/tools/qztools.h @@ -63,6 +63,8 @@ bool QT_QUPZILLA_EXPORT startExternalProcess(const QString &executable, const QS QIcon QT_QUPZILLA_EXPORT iconFromFileName(const QString &fileName); bool QT_QUPZILLA_EXPORT isUtf8(const char* string); +bool QT_QUPZILLA_EXPORT matchDomain(const QString &pattern, const QString &domain); + QString QT_QUPZILLA_EXPORT operatingSystem(); // Qt5 migration help functions diff --git a/tests/autotests/adblocktest.cpp b/tests/autotests/adblocktest.cpp new file mode 100644 index 000000000..9bcac640b --- /dev/null +++ b/tests/autotests/adblocktest.cpp @@ -0,0 +1,109 @@ +/* ============================================================ +* QupZilla - WebKit based browser +* Copyright (C) 2013 David Rosca +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +* ============================================================ */ +#include "adblocktest.h" +#include "adblockrule.h" + +#include + +class AdBlockRule_Test : public AdBlockRule +{ +public: + QStringList parseRegExpFilter(const QString &parsedFilter) + { + return AdBlockRule::parseRegExpFilter(parsedFilter); + } + + bool isMatchingDomain(const QString &domain, const QString &filter) const + { + return AdBlockRule::isMatchingDomain(domain, filter); + } +}; + +void AdBlockTest::isMatchingCookieTest_data() +{ + // Test copied from CookiesTest + QTest::addColumn("filterDomain"); + QTest::addColumn("siteDomain"); + QTest::addColumn("result"); + + QTest::newRow("test1") << "example.com" << "www.example.com" << true; + QTest::newRow("test2") << "example.com" << "example.com" << true; + QTest::newRow("test3") << "example.com" << "anotherexample.com" << false; + QTest::newRow("test4") << "test.example.com" << "example.com" << false; + QTest::newRow("test5") << "www.example.com" << "example.com" << false; + QTest::newRow("test_empty") << "www.example.com" << "" << false; + QTest::newRow("test_empty2") << "" << "example.com" << false; +} + +void AdBlockTest::isMatchingCookieTest() +{ + AdBlockRule_Test rule_test; + + QFETCH(QString, filterDomain); + QFETCH(QString, siteDomain); + QFETCH(bool, result); + + QCOMPARE(rule_test.isMatchingDomain(siteDomain, filterDomain), result); +} + +void AdBlockTest::parseRegExpFilterTest_data() +{ + QTest::addColumn("parsedFilter"); + QTest::addColumn("result"); + + QTest::newRow("test1") << "||doubleclick.net/pfadx/tmg.telegraph." + << (QStringList() << "doubleclick.net/pfadx/tmg.telegraph."); + QTest::newRow("test2") << "||doubleclick.net/pfadx/*.mtvi" + << (QStringList() << "doubleclick.net/pfadx/" << ".mtvi"); + QTest::newRow("test3") << "&prvtof=*&poru=" + << (QStringList() << "&prvtof=" << "&poru="); + QTest::newRow("test4") << "/addyn|*;adtech;" + << (QStringList() << "/addyn" << ";adtech;"); + QTest::newRow("test5") << "/eas_fif.html^" + << (QStringList() << "/eas_fif.html"); + QTest::newRow("test6") << "://findnsave.^.*/api/groupon.json?" + << (QStringList() << "://findnsave." << "/api/groupon.json?"); + QTest::newRow("test7") << "^fp=*&prvtof=" + << (QStringList() << "fp=" << "&prvtof="); + QTest::newRow("test8") << "|http://ax-d.*/jstag^" + << (QStringList() << "http://ax-d." << "/jstag"); + QTest::newRow("test9") << "||reuters.com^*/rcom-wt-mlt.js" + << (QStringList() << "reuters.com" <<"/rcom-wt-mlt.js"); + QTest::newRow("test10") << "||chip.de^*/tracking.js" + << (QStringList() << "chip.de" << "/tracking.js"); + QTest::newRow("ignore1char") << "/search.php?uid=*.*&src=" + << (QStringList() << "/search.php?uid=" << "&src="); + QTest::newRow("ignoreDuplicates") << "/search.*.dup.*.dup.*&src=" + << (QStringList() << "/search." << ".dup." << "&src="); + QTest::newRow("empty") << QString() + << (QStringList()); + QTest::newRow("justspaces") << QString(" ") + << (QStringList() << " "); + QTest::newRow("spacesWithMetachars") << QString(" * ?") + << (QStringList() << " " << " ?"); +} + +void AdBlockTest::parseRegExpFilterTest() +{ + AdBlockRule_Test rule_test; + + QFETCH(QString, parsedFilter); + QFETCH(QStringList, result); + + QCOMPARE(rule_test.parseRegExpFilter(parsedFilter), result); +} diff --git a/tests/autotests/adblocktest.h b/tests/autotests/adblocktest.h new file mode 100644 index 000000000..447720f43 --- /dev/null +++ b/tests/autotests/adblocktest.h @@ -0,0 +1,36 @@ +/* ============================================================ +* QupZilla - WebKit based browser +* Copyright (C) 2013 David Rosca +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +* ============================================================ */ +#ifndef ADBLOCKTEST_H +#define ADBLOCKTEST_H + +#include + +class AdBlockTest : public QObject +{ + Q_OBJECT + +private slots: + void isMatchingCookieTest_data(); + void isMatchingCookieTest(); + + void parseRegExpFilterTest_data(); + void parseRegExpFilterTest(); + +}; + +#endif // ADBLOCKTEST_H diff --git a/tests/autotests/autotests.pro b/tests/autotests/autotests.pro index 4ea0bdd00..faf2b388f 100644 --- a/tests/autotests/autotests.pro +++ b/tests/autotests/autotests.pro @@ -49,11 +49,13 @@ HEADERS += \ qztoolstest.h \ formcompletertest.h \ cookiestest.h \ - downloadstest.h + downloadstest.h \ + adblocktest.h SOURCES += \ qztoolstest.cpp \ main.cpp \ formcompletertest.cpp \ cookiestest.cpp \ - downloadstest.cpp + downloadstest.cpp \ + adblocktest.cpp diff --git a/tests/autotests/main.cpp b/tests/autotests/main.cpp index 42829f56e..03f592db6 100644 --- a/tests/autotests/main.cpp +++ b/tests/autotests/main.cpp @@ -19,6 +19,7 @@ #include "formcompletertest.h" #include "cookiestest.h" #include "downloadstest.h" +#include "adblocktest.h" #include @@ -39,5 +40,8 @@ int main(int argc, char *argv[]) DownloadsTest downloadsTest; QTest::qExec(&downloadsTest, argc, argv); + AdBlockTest adblockTest; + QTest::qExec(&adblockTest, argc, argv); + return 0; }