1
mirror of https://invent.kde.org/network/falkon.git synced 2024-12-20 18:56:34 +01:00

AdBlock: Added full support for $third-party option

- checking Referer of network request to determine
  if it is third party request or not
   * matching is performed on second-level domains
     (there is minor issue with it in Qt < 4.8)
This commit is contained in:
nowrep 2012-06-28 01:41:01 +02:00
parent fa72a38050
commit c6ccd6bd1f
5 changed files with 93 additions and 36 deletions

View File

@ -78,16 +78,8 @@ QNetworkReply* AdBlockManager::block(const QNetworkRequest &request)
return 0; return 0;
} }
const AdBlockRule* blockedRule = 0;
foreach(AdBlockSubscription * subscription, m_subscriptions) { foreach(AdBlockSubscription * subscription, m_subscriptions) {
if (subscription->allow(urlDomain, urlString)) { const AdBlockRule* blockedRule = subscription->match(request, urlDomain, urlString);
return 0;
}
if (const AdBlockRule* rule = subscription->block(urlDomain, urlString)) {
blockedRule = rule;
}
if (blockedRule) { if (blockedRule) {
QVariant v = request.attribute((QNetworkRequest::Attribute)(QNetworkRequest::User + 100)); QVariant v = request.attribute((QNetworkRequest::Attribute)(QNetworkRequest::User + 100));

View File

@ -52,8 +52,45 @@
#include <QUrl> #include <QUrl>
#include <QString> #include <QString>
#include <QStringList> #include <QStringList>
#include <QNetworkRequest>
// #define ADBLOCKRULE_DEBUG // Version for Qt < 4.8 has one issue, it will wrongly
// count .co.uk (and others) as second-level domain
QString toSecondLevelDomain(const QUrl &url)
{
#if QT_VERSION >= 0x040800
const QString &topLevelDomain = url.topLevelDomain();
const QString &urlHost = url.host();
if (topLevelDomain.isEmpty() || urlHost.isEmpty()) {
return QString();
}
QString domain = urlHost.left(urlHost.size() - topLevelDomain.size());
if (domain.count('.') == 0) {
return urlHost;
}
while (domain.count('.') != 0) {
domain = domain.mid(domain.indexOf('.') + 1);
}
return domain + topLevelDomain;
#else
QString domain = url.host();
if (domain.count('.') == 0) {
return QString();
}
while (domain.count('.') != 1) {
domain = domain.mid(domain.indexOf('.') + 1);
}
return domain;
#endif
}
AdBlockRule::AdBlockRule(const QString &filter) AdBlockRule::AdBlockRule(const QString &filter)
: m_enabled(true) : m_enabled(true)
@ -62,6 +99,8 @@ AdBlockRule::AdBlockRule(const QString &filter)
, m_internalDisabled(false) , m_internalDisabled(false)
, m_domainRestricted(false) , m_domainRestricted(false)
, m_useRegExp(false) , m_useRegExp(false)
, m_thirdParty(false)
, m_thirdPartyException(false)
, m_caseSensitivity(Qt::CaseInsensitive) , m_caseSensitivity(Qt::CaseInsensitive)
{ {
setFilter(filter); setFilter(filter);
@ -119,23 +158,34 @@ bool AdBlockRule::isInternalDisabled() const
return m_internalDisabled; return m_internalDisabled;
} }
bool AdBlockRule::networkMatch(const QString &domain, const QString &encodedUrl) const bool AdBlockRule::networkMatch(const QNetworkRequest &request, const QString &domain, const QString &encodedUrl) const
{ {
if (m_cssRule || !m_enabled || m_internalDisabled) { if (m_cssRule || !m_enabled || m_internalDisabled) {
return false; return false;
} }
// Match domain first bool matched = false;
if (m_domainRestricted && !matchDomain(domain)) {
return false;
}
// Use regExp match if necessary
if (m_useRegExp) { if (m_useRegExp) {
return (m_regExp.indexIn(encodedUrl) != -1); matched = (m_regExp.indexIn(encodedUrl) != -1);
}
else {
matched = encodedUrl.contains(m_matchString, m_caseSensitivity);
} }
return encodedUrl.contains(m_matchString, m_caseSensitivity); if (matched) {
// Check domain restrictions
if (m_domainRestricted && !matchDomain(domain)) {
return false;
}
// Check third-party restriction
if (m_thirdParty && !matchThirdParty(request)) {
return false;
}
}
return matched;
} }
bool AdBlockRule::matchDomain(const QString &domain) const bool AdBlockRule::matchDomain(const QString &domain) const
@ -176,6 +226,20 @@ bool AdBlockRule::matchDomain(const QString &domain) const
return false; return false;
} }
bool AdBlockRule::matchThirdParty(const QNetworkRequest &request) const
{
const QString &referer = request.rawHeader("Referer");
if (referer.isEmpty()) {
return false;
}
// Third-party matching should be performed on second-level domains
const QString &refererHost = toSecondLevelDomain(QUrl(referer));
const QString &host = toSecondLevelDomain(request.url());
return m_thirdPartyException ? refererHost == host : refererHost != host;
}
void AdBlockRule::parseFilter() void AdBlockRule::parseFilter()
{ {
QString parsedLine = m_filter; QString parsedLine = m_filter;
@ -186,8 +250,7 @@ void AdBlockRule::parseFilter()
return; return;
} }
// Disabled rule - modify parsedLine to not contain starting ! so we can // Disabled rule - modify parsedLine to not contain starting ! so we can continue parsing rule
// continue parsing rule
if (m_filter.startsWith('!')) { if (m_filter.startsWith('!')) {
m_enabled = false; m_enabled = false;
parsedLine = m_filter.mid(1); parsedLine = m_filter.mid(1);
@ -230,8 +293,9 @@ void AdBlockRule::parseFilter()
m_caseSensitivity = Qt::CaseSensitive; m_caseSensitivity = Qt::CaseSensitive;
++handledOptions; ++handledOptions;
} }
else if (option.startsWith("third-party")) { else if (option.contains("third-party")) {
// I think we can ignore it m_thirdParty = true;
m_thirdPartyException = option.startsWith('~');
++handledOptions; ++handledOptions;
} }
} }
@ -264,9 +328,9 @@ void AdBlockRule::parseFilter()
parsedLine = parsedLine.left(parsedLine.size() - 1); parsedLine = parsedLine.left(parsedLine.size() - 1);
} }
// If we still find a wildcard (*) or separator (^) or start with domain (||) // If we still find a wildcard (*) or separator (^) or (|)
// we must modify parsedLine to comply with QRegExp // we must modify parsedLine to comply with QRegExp
if (parsedLine.contains('*') || parsedLine.contains('^') || parsedLine.startsWith("||")) { if (parsedLine.contains('*') || parsedLine.contains('^') || parsedLine.contains('|')) {
parsedLine.replace(QRegExp(QLatin1String("\\*+")), QLatin1String("*")) // remove multiple wildcards parsedLine.replace(QRegExp(QLatin1String("\\*+")), QLatin1String("*")) // remove multiple wildcards
.replace(QRegExp(QLatin1String("\\^\\|$")), QLatin1String("^")) // remove anchors following separator placeholder .replace(QRegExp(QLatin1String("\\^\\|$")), QLatin1String("^")) // remove anchors following separator placeholder
.replace(QRegExp(QLatin1String("^(\\*)")), QLatin1String("")) // remove leading wildcards .replace(QRegExp(QLatin1String("^(\\*)")), QLatin1String("")) // remove leading wildcards

View File

@ -52,11 +52,11 @@
#include "qz_namespace.h" #include "qz_namespace.h"
class QNetworkRequest;
class QUrl; class QUrl;
class AdBlockRule class AdBlockRule
{ {
public: public:
AdBlockRule(const QString &filter = QString()); AdBlockRule(const QString &filter = QString());
@ -74,8 +74,10 @@ public:
bool isInternalDisabled() const; bool isInternalDisabled() const;
bool networkMatch(const QString &domain, const QString &encodedUrl) const; bool networkMatch(const QNetworkRequest &request, const QString &domain, const QString &encodedUrl) const;
bool matchDomain(const QString &domain) const; bool matchDomain(const QString &domain) const;
bool matchThirdParty(const QNetworkRequest &request) const;
private: private:
void parseFilter(); void parseFilter();
@ -100,6 +102,8 @@ private:
QStringList m_allowedDomains; QStringList m_allowedDomains;
QStringList m_blockedDomains; QStringList m_blockedDomains;
bool m_thirdParty;
bool m_thirdPartyException;
Qt::CaseSensitivity m_caseSensitivity; Qt::CaseSensitivity m_caseSensitivity;
}; };

View File

@ -195,23 +195,20 @@ void AdBlockSubscription::saveDownloadedData(QByteArray &data)
file.close(); file.close();
} }
const AdBlockRule* AdBlockSubscription::allow(const QString &urlDomain, const QString &urlString) const const AdBlockRule* AdBlockSubscription::match(const QNetworkRequest &request, const QString &urlDomain, const QString &urlString) const
{ {
foreach(const AdBlockRule * rule, m_networkExceptionRules) { foreach(const AdBlockRule * rule, m_networkExceptionRules) {
if (rule->networkMatch(urlDomain, urlString)) { if (rule->networkMatch(request, urlDomain, urlString)) {
return rule; return 0;
} }
} }
return 0;
}
const AdBlockRule* AdBlockSubscription::block(const QString &urlDomain, const QString &urlString) const
{
foreach(const AdBlockRule * rule, m_networkBlockRules) { foreach(const AdBlockRule * rule, m_networkBlockRules) {
if (rule->networkMatch(urlDomain, urlString)) { if (rule->networkMatch(request, urlDomain, urlString)) {
return rule; return rule;
} }
} }
return 0; return 0;
} }

View File

@ -52,6 +52,7 @@
#include "qz_namespace.h" #include "qz_namespace.h"
#include "adblockrule.h" #include "adblockrule.h"
class QNetworkRequest;
class QNetworkReply; class QNetworkReply;
class QUrl; class QUrl;
@ -72,8 +73,7 @@ public:
virtual void loadSubscription(); virtual void loadSubscription();
virtual void saveSubscription(); virtual void saveSubscription();
const AdBlockRule* allow(const QString &urlDomain, const QString &urlString) const; const AdBlockRule* match(const QNetworkRequest &request, const QString &urlDomain, const QString &urlString) const;
const AdBlockRule* block(const QString &urlDomain, const QString &urlString) const;
QString elementHidingRules() const; QString elementHidingRules() const;
QString elementHidingRulesForDomain(const QString &domain) const; QString elementHidingRulesForDomain(const QString &domain) const;