mirror of
https://invent.kde.org/network/falkon.git
synced 2024-12-20 18:56:34 +01:00
AdBlock: Added full support for $third-party option
- checking Referer of network request to determine if it is third party request or not * matching is performed on second-level domains (there is minor issue with it in Qt < 4.8)
This commit is contained in:
parent
fa72a38050
commit
c6ccd6bd1f
@ -78,16 +78,8 @@ QNetworkReply* AdBlockManager::block(const QNetworkRequest &request)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
const AdBlockRule* blockedRule = 0;
|
|
||||||
|
|
||||||
foreach(AdBlockSubscription * subscription, m_subscriptions) {
|
foreach(AdBlockSubscription * subscription, m_subscriptions) {
|
||||||
if (subscription->allow(urlDomain, urlString)) {
|
const AdBlockRule* blockedRule = subscription->match(request, urlDomain, urlString);
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (const AdBlockRule* rule = subscription->block(urlDomain, urlString)) {
|
|
||||||
blockedRule = rule;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (blockedRule) {
|
if (blockedRule) {
|
||||||
QVariant v = request.attribute((QNetworkRequest::Attribute)(QNetworkRequest::User + 100));
|
QVariant v = request.attribute((QNetworkRequest::Attribute)(QNetworkRequest::User + 100));
|
||||||
|
@ -52,8 +52,45 @@
|
|||||||
#include <QUrl>
|
#include <QUrl>
|
||||||
#include <QString>
|
#include <QString>
|
||||||
#include <QStringList>
|
#include <QStringList>
|
||||||
|
#include <QNetworkRequest>
|
||||||
|
|
||||||
// #define ADBLOCKRULE_DEBUG
|
// Version for Qt < 4.8 has one issue, it will wrongly
|
||||||
|
// count .co.uk (and others) as second-level domain
|
||||||
|
QString toSecondLevelDomain(const QUrl &url)
|
||||||
|
{
|
||||||
|
#if QT_VERSION >= 0x040800
|
||||||
|
const QString &topLevelDomain = url.topLevelDomain();
|
||||||
|
const QString &urlHost = url.host();
|
||||||
|
|
||||||
|
if (topLevelDomain.isEmpty() || urlHost.isEmpty()) {
|
||||||
|
return QString();
|
||||||
|
}
|
||||||
|
|
||||||
|
QString domain = urlHost.left(urlHost.size() - topLevelDomain.size());
|
||||||
|
|
||||||
|
if (domain.count('.') == 0) {
|
||||||
|
return urlHost;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (domain.count('.') != 0) {
|
||||||
|
domain = domain.mid(domain.indexOf('.') + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return domain + topLevelDomain;
|
||||||
|
#else
|
||||||
|
QString domain = url.host();
|
||||||
|
|
||||||
|
if (domain.count('.') == 0) {
|
||||||
|
return QString();
|
||||||
|
}
|
||||||
|
|
||||||
|
while (domain.count('.') != 1) {
|
||||||
|
domain = domain.mid(domain.indexOf('.') + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return domain;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
AdBlockRule::AdBlockRule(const QString &filter)
|
AdBlockRule::AdBlockRule(const QString &filter)
|
||||||
: m_enabled(true)
|
: m_enabled(true)
|
||||||
@ -62,6 +99,8 @@ AdBlockRule::AdBlockRule(const QString &filter)
|
|||||||
, m_internalDisabled(false)
|
, m_internalDisabled(false)
|
||||||
, m_domainRestricted(false)
|
, m_domainRestricted(false)
|
||||||
, m_useRegExp(false)
|
, m_useRegExp(false)
|
||||||
|
, m_thirdParty(false)
|
||||||
|
, m_thirdPartyException(false)
|
||||||
, m_caseSensitivity(Qt::CaseInsensitive)
|
, m_caseSensitivity(Qt::CaseInsensitive)
|
||||||
{
|
{
|
||||||
setFilter(filter);
|
setFilter(filter);
|
||||||
@ -119,23 +158,34 @@ bool AdBlockRule::isInternalDisabled() const
|
|||||||
return m_internalDisabled;
|
return m_internalDisabled;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AdBlockRule::networkMatch(const QString &domain, const QString &encodedUrl) const
|
bool AdBlockRule::networkMatch(const QNetworkRequest &request, const QString &domain, const QString &encodedUrl) const
|
||||||
{
|
{
|
||||||
if (m_cssRule || !m_enabled || m_internalDisabled) {
|
if (m_cssRule || !m_enabled || m_internalDisabled) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Match domain first
|
bool matched = false;
|
||||||
if (m_domainRestricted && !matchDomain(domain)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use regExp match if necessary
|
|
||||||
if (m_useRegExp) {
|
if (m_useRegExp) {
|
||||||
return (m_regExp.indexIn(encodedUrl) != -1);
|
matched = (m_regExp.indexIn(encodedUrl) != -1);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
matched = encodedUrl.contains(m_matchString, m_caseSensitivity);
|
||||||
}
|
}
|
||||||
|
|
||||||
return encodedUrl.contains(m_matchString, m_caseSensitivity);
|
if (matched) {
|
||||||
|
// Check domain restrictions
|
||||||
|
if (m_domainRestricted && !matchDomain(domain)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check third-party restriction
|
||||||
|
if (m_thirdParty && !matchThirdParty(request)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return matched;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool AdBlockRule::matchDomain(const QString &domain) const
|
bool AdBlockRule::matchDomain(const QString &domain) const
|
||||||
@ -176,6 +226,20 @@ bool AdBlockRule::matchDomain(const QString &domain) const
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool AdBlockRule::matchThirdParty(const QNetworkRequest &request) const
|
||||||
|
{
|
||||||
|
const QString &referer = request.rawHeader("Referer");
|
||||||
|
if (referer.isEmpty()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Third-party matching should be performed on second-level domains
|
||||||
|
const QString &refererHost = toSecondLevelDomain(QUrl(referer));
|
||||||
|
const QString &host = toSecondLevelDomain(request.url());
|
||||||
|
|
||||||
|
return m_thirdPartyException ? refererHost == host : refererHost != host;
|
||||||
|
}
|
||||||
|
|
||||||
void AdBlockRule::parseFilter()
|
void AdBlockRule::parseFilter()
|
||||||
{
|
{
|
||||||
QString parsedLine = m_filter;
|
QString parsedLine = m_filter;
|
||||||
@ -186,8 +250,7 @@ void AdBlockRule::parseFilter()
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Disabled rule - modify parsedLine to not contain starting ! so we can
|
// Disabled rule - modify parsedLine to not contain starting ! so we can continue parsing rule
|
||||||
// continue parsing rule
|
|
||||||
if (m_filter.startsWith('!')) {
|
if (m_filter.startsWith('!')) {
|
||||||
m_enabled = false;
|
m_enabled = false;
|
||||||
parsedLine = m_filter.mid(1);
|
parsedLine = m_filter.mid(1);
|
||||||
@ -230,8 +293,9 @@ void AdBlockRule::parseFilter()
|
|||||||
m_caseSensitivity = Qt::CaseSensitive;
|
m_caseSensitivity = Qt::CaseSensitive;
|
||||||
++handledOptions;
|
++handledOptions;
|
||||||
}
|
}
|
||||||
else if (option.startsWith("third-party")) {
|
else if (option.contains("third-party")) {
|
||||||
// I think we can ignore it
|
m_thirdParty = true;
|
||||||
|
m_thirdPartyException = option.startsWith('~');
|
||||||
++handledOptions;
|
++handledOptions;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -264,9 +328,9 @@ void AdBlockRule::parseFilter()
|
|||||||
parsedLine = parsedLine.left(parsedLine.size() - 1);
|
parsedLine = parsedLine.left(parsedLine.size() - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we still find a wildcard (*) or separator (^) or start with domain (||)
|
// If we still find a wildcard (*) or separator (^) or (|)
|
||||||
// we must modify parsedLine to comply with QRegExp
|
// we must modify parsedLine to comply with QRegExp
|
||||||
if (parsedLine.contains('*') || parsedLine.contains('^') || parsedLine.startsWith("||")) {
|
if (parsedLine.contains('*') || parsedLine.contains('^') || parsedLine.contains('|')) {
|
||||||
parsedLine.replace(QRegExp(QLatin1String("\\*+")), QLatin1String("*")) // remove multiple wildcards
|
parsedLine.replace(QRegExp(QLatin1String("\\*+")), QLatin1String("*")) // remove multiple wildcards
|
||||||
.replace(QRegExp(QLatin1String("\\^\\|$")), QLatin1String("^")) // remove anchors following separator placeholder
|
.replace(QRegExp(QLatin1String("\\^\\|$")), QLatin1String("^")) // remove anchors following separator placeholder
|
||||||
.replace(QRegExp(QLatin1String("^(\\*)")), QLatin1String("")) // remove leading wildcards
|
.replace(QRegExp(QLatin1String("^(\\*)")), QLatin1String("")) // remove leading wildcards
|
||||||
|
@ -52,11 +52,11 @@
|
|||||||
|
|
||||||
#include "qz_namespace.h"
|
#include "qz_namespace.h"
|
||||||
|
|
||||||
|
class QNetworkRequest;
|
||||||
class QUrl;
|
class QUrl;
|
||||||
|
|
||||||
class AdBlockRule
|
class AdBlockRule
|
||||||
{
|
{
|
||||||
|
|
||||||
public:
|
public:
|
||||||
AdBlockRule(const QString &filter = QString());
|
AdBlockRule(const QString &filter = QString());
|
||||||
|
|
||||||
@ -74,8 +74,10 @@ public:
|
|||||||
|
|
||||||
bool isInternalDisabled() const;
|
bool isInternalDisabled() const;
|
||||||
|
|
||||||
bool networkMatch(const QString &domain, const QString &encodedUrl) const;
|
bool networkMatch(const QNetworkRequest &request, const QString &domain, const QString &encodedUrl) const;
|
||||||
|
|
||||||
bool matchDomain(const QString &domain) const;
|
bool matchDomain(const QString &domain) const;
|
||||||
|
bool matchThirdParty(const QNetworkRequest &request) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void parseFilter();
|
void parseFilter();
|
||||||
@ -100,6 +102,8 @@ private:
|
|||||||
QStringList m_allowedDomains;
|
QStringList m_allowedDomains;
|
||||||
QStringList m_blockedDomains;
|
QStringList m_blockedDomains;
|
||||||
|
|
||||||
|
bool m_thirdParty;
|
||||||
|
bool m_thirdPartyException;
|
||||||
Qt::CaseSensitivity m_caseSensitivity;
|
Qt::CaseSensitivity m_caseSensitivity;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -195,23 +195,20 @@ void AdBlockSubscription::saveDownloadedData(QByteArray &data)
|
|||||||
file.close();
|
file.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
const AdBlockRule* AdBlockSubscription::allow(const QString &urlDomain, const QString &urlString) const
|
const AdBlockRule* AdBlockSubscription::match(const QNetworkRequest &request, const QString &urlDomain, const QString &urlString) const
|
||||||
{
|
{
|
||||||
foreach(const AdBlockRule * rule, m_networkExceptionRules) {
|
foreach(const AdBlockRule * rule, m_networkExceptionRules) {
|
||||||
if (rule->networkMatch(urlDomain, urlString)) {
|
if (rule->networkMatch(request, urlDomain, urlString)) {
|
||||||
return rule;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
const AdBlockRule* AdBlockSubscription::block(const QString &urlDomain, const QString &urlString) const
|
|
||||||
{
|
|
||||||
foreach(const AdBlockRule * rule, m_networkBlockRules) {
|
foreach(const AdBlockRule * rule, m_networkBlockRules) {
|
||||||
if (rule->networkMatch(urlDomain, urlString)) {
|
if (rule->networkMatch(request, urlDomain, urlString)) {
|
||||||
return rule;
|
return rule;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -52,6 +52,7 @@
|
|||||||
#include "qz_namespace.h"
|
#include "qz_namespace.h"
|
||||||
#include "adblockrule.h"
|
#include "adblockrule.h"
|
||||||
|
|
||||||
|
class QNetworkRequest;
|
||||||
class QNetworkReply;
|
class QNetworkReply;
|
||||||
class QUrl;
|
class QUrl;
|
||||||
|
|
||||||
@ -72,8 +73,7 @@ public:
|
|||||||
virtual void loadSubscription();
|
virtual void loadSubscription();
|
||||||
virtual void saveSubscription();
|
virtual void saveSubscription();
|
||||||
|
|
||||||
const AdBlockRule* allow(const QString &urlDomain, const QString &urlString) const;
|
const AdBlockRule* match(const QNetworkRequest &request, const QString &urlDomain, const QString &urlString) const;
|
||||||
const AdBlockRule* block(const QString &urlDomain, const QString &urlString) const;
|
|
||||||
|
|
||||||
QString elementHidingRules() const;
|
QString elementHidingRules() const;
|
||||||
QString elementHidingRulesForDomain(const QString &domain) const;
|
QString elementHidingRulesForDomain(const QString &domain) const;
|
||||||
|
Loading…
Reference in New Issue
Block a user