From c6ccd6bd1fbc1d608547677e52d368ff182b8f82 Mon Sep 17 00:00:00 2001 From: nowrep Date: Thu, 28 Jun 2012 01:41:01 +0200 Subject: [PATCH] AdBlock: Added full support for $third-party option - checking Referer of network request to determine if it is third party request or not * matching is performed on second-level domains (there is minor issue with it in Qt < 4.8) --- src/lib/adblock/adblockmanager.cpp | 10 +-- src/lib/adblock/adblockrule.cpp | 94 +++++++++++++++++++++---- src/lib/adblock/adblockrule.h | 8 ++- src/lib/adblock/adblocksubscription.cpp | 13 ++-- src/lib/adblock/adblocksubscription.h | 4 +- 5 files changed, 93 insertions(+), 36 deletions(-) diff --git a/src/lib/adblock/adblockmanager.cpp b/src/lib/adblock/adblockmanager.cpp index a4da1ea34..ef81a9767 100644 --- a/src/lib/adblock/adblockmanager.cpp +++ b/src/lib/adblock/adblockmanager.cpp @@ -78,16 +78,8 @@ QNetworkReply* AdBlockManager::block(const QNetworkRequest &request) return 0; } - const AdBlockRule* blockedRule = 0; - foreach(AdBlockSubscription * subscription, m_subscriptions) { - if (subscription->allow(urlDomain, urlString)) { - return 0; - } - - if (const AdBlockRule* rule = subscription->block(urlDomain, urlString)) { - blockedRule = rule; - } + const AdBlockRule* blockedRule = subscription->match(request, urlDomain, urlString); if (blockedRule) { QVariant v = request.attribute((QNetworkRequest::Attribute)(QNetworkRequest::User + 100)); diff --git a/src/lib/adblock/adblockrule.cpp b/src/lib/adblock/adblockrule.cpp index 6175b2220..544b8039b 100644 --- a/src/lib/adblock/adblockrule.cpp +++ b/src/lib/adblock/adblockrule.cpp @@ -52,8 +52,45 @@ #include #include #include +#include -// #define ADBLOCKRULE_DEBUG +// Version for Qt < 4.8 has one issue, it will wrongly +// count .co.uk (and others) as second-level domain +QString toSecondLevelDomain(const QUrl &url) +{ +#if QT_VERSION >= 0x040800 + const QString &topLevelDomain = url.topLevelDomain(); + const QString &urlHost = url.host(); + + if (topLevelDomain.isEmpty() || urlHost.isEmpty()) { + return QString(); + } + + QString domain = urlHost.left(urlHost.size() - topLevelDomain.size()); + + if (domain.count('.') == 0) { + return urlHost; + } + + while (domain.count('.') != 0) { + domain = domain.mid(domain.indexOf('.') + 1); + } + + return domain + topLevelDomain; +#else + QString domain = url.host(); + + if (domain.count('.') == 0) { + return QString(); + } + + while (domain.count('.') != 1) { + domain = domain.mid(domain.indexOf('.') + 1); + } + + return domain; +#endif +} AdBlockRule::AdBlockRule(const QString &filter) : m_enabled(true) @@ -62,6 +99,8 @@ AdBlockRule::AdBlockRule(const QString &filter) , m_internalDisabled(false) , m_domainRestricted(false) , m_useRegExp(false) + , m_thirdParty(false) + , m_thirdPartyException(false) , m_caseSensitivity(Qt::CaseInsensitive) { setFilter(filter); @@ -119,23 +158,34 @@ bool AdBlockRule::isInternalDisabled() const return m_internalDisabled; } -bool AdBlockRule::networkMatch(const QString &domain, const QString &encodedUrl) const +bool AdBlockRule::networkMatch(const QNetworkRequest &request, const QString &domain, const QString &encodedUrl) const { if (m_cssRule || !m_enabled || m_internalDisabled) { return false; } - // Match domain first - if (m_domainRestricted && !matchDomain(domain)) { - return false; - } + bool matched = false; - // Use regExp match if necessary if (m_useRegExp) { - return (m_regExp.indexIn(encodedUrl) != -1); + matched = (m_regExp.indexIn(encodedUrl) != -1); + } + else { + matched = encodedUrl.contains(m_matchString, m_caseSensitivity); } - return encodedUrl.contains(m_matchString, m_caseSensitivity); + if (matched) { + // Check domain restrictions + if (m_domainRestricted && !matchDomain(domain)) { + return false; + } + + // Check third-party restriction + if (m_thirdParty && !matchThirdParty(request)) { + return false; + } + } + + return matched; } bool AdBlockRule::matchDomain(const QString &domain) const @@ -176,6 +226,20 @@ bool AdBlockRule::matchDomain(const QString &domain) const return false; } +bool AdBlockRule::matchThirdParty(const QNetworkRequest &request) const +{ + const QString &referer = request.rawHeader("Referer"); + if (referer.isEmpty()) { + return false; + } + + // Third-party matching should be performed on second-level domains + const QString &refererHost = toSecondLevelDomain(QUrl(referer)); + const QString &host = toSecondLevelDomain(request.url()); + + return m_thirdPartyException ? refererHost == host : refererHost != host; +} + void AdBlockRule::parseFilter() { QString parsedLine = m_filter; @@ -186,8 +250,7 @@ void AdBlockRule::parseFilter() return; } - // Disabled rule - modify parsedLine to not contain starting ! so we can - // continue parsing rule + // Disabled rule - modify parsedLine to not contain starting ! so we can continue parsing rule if (m_filter.startsWith('!')) { m_enabled = false; parsedLine = m_filter.mid(1); @@ -230,8 +293,9 @@ void AdBlockRule::parseFilter() m_caseSensitivity = Qt::CaseSensitive; ++handledOptions; } - else if (option.startsWith("third-party")) { - // I think we can ignore it + else if (option.contains("third-party")) { + m_thirdParty = true; + m_thirdPartyException = option.startsWith('~'); ++handledOptions; } } @@ -264,9 +328,9 @@ void AdBlockRule::parseFilter() parsedLine = parsedLine.left(parsedLine.size() - 1); } - // If we still find a wildcard (*) or separator (^) or start with domain (||) + // If we still find a wildcard (*) or separator (^) or (|) // we must modify parsedLine to comply with QRegExp - if (parsedLine.contains('*') || parsedLine.contains('^') || parsedLine.startsWith("||")) { + if (parsedLine.contains('*') || parsedLine.contains('^') || parsedLine.contains('|')) { parsedLine.replace(QRegExp(QLatin1String("\\*+")), QLatin1String("*")) // remove multiple wildcards .replace(QRegExp(QLatin1String("\\^\\|$")), QLatin1String("^")) // remove anchors following separator placeholder .replace(QRegExp(QLatin1String("^(\\*)")), QLatin1String("")) // remove leading wildcards diff --git a/src/lib/adblock/adblockrule.h b/src/lib/adblock/adblockrule.h index 1f4331234..78ea00199 100644 --- a/src/lib/adblock/adblockrule.h +++ b/src/lib/adblock/adblockrule.h @@ -52,11 +52,11 @@ #include "qz_namespace.h" +class QNetworkRequest; class QUrl; class AdBlockRule { - public: AdBlockRule(const QString &filter = QString()); @@ -74,8 +74,10 @@ public: bool isInternalDisabled() const; - bool networkMatch(const QString &domain, const QString &encodedUrl) const; + bool networkMatch(const QNetworkRequest &request, const QString &domain, const QString &encodedUrl) const; + bool matchDomain(const QString &domain) const; + bool matchThirdParty(const QNetworkRequest &request) const; private: void parseFilter(); @@ -100,6 +102,8 @@ private: QStringList m_allowedDomains; QStringList m_blockedDomains; + bool m_thirdParty; + bool m_thirdPartyException; Qt::CaseSensitivity m_caseSensitivity; }; diff --git a/src/lib/adblock/adblocksubscription.cpp b/src/lib/adblock/adblocksubscription.cpp index b5e449ee7..2ac0349aa 100644 --- a/src/lib/adblock/adblocksubscription.cpp +++ b/src/lib/adblock/adblocksubscription.cpp @@ -195,23 +195,20 @@ void AdBlockSubscription::saveDownloadedData(QByteArray &data) file.close(); } -const AdBlockRule* AdBlockSubscription::allow(const QString &urlDomain, const QString &urlString) const +const AdBlockRule* AdBlockSubscription::match(const QNetworkRequest &request, const QString &urlDomain, const QString &urlString) const { foreach(const AdBlockRule * rule, m_networkExceptionRules) { - if (rule->networkMatch(urlDomain, urlString)) { - return rule; + if (rule->networkMatch(request, urlDomain, urlString)) { + return 0; } } - return 0; -} -const AdBlockRule* AdBlockSubscription::block(const QString &urlDomain, const QString &urlString) const -{ foreach(const AdBlockRule * rule, m_networkBlockRules) { - if (rule->networkMatch(urlDomain, urlString)) { + if (rule->networkMatch(request, urlDomain, urlString)) { return rule; } } + return 0; } diff --git a/src/lib/adblock/adblocksubscription.h b/src/lib/adblock/adblocksubscription.h index 9afa72b90..bfac6e7db 100644 --- a/src/lib/adblock/adblocksubscription.h +++ b/src/lib/adblock/adblocksubscription.h @@ -52,6 +52,7 @@ #include "qz_namespace.h" #include "adblockrule.h" +class QNetworkRequest; class QNetworkReply; class QUrl; @@ -72,8 +73,7 @@ public: virtual void loadSubscription(); virtual void saveSubscription(); - const AdBlockRule* allow(const QString &urlDomain, const QString &urlString) const; - const AdBlockRule* block(const QString &urlDomain, const QString &urlString) const; + const AdBlockRule* match(const QNetworkRequest &request, const QString &urlDomain, const QString &urlString) const; QString elementHidingRules() const; QString elementHidingRulesForDomain(const QString &domain) const;