1
mirror of https://invent.kde.org/network/falkon.git synced 2024-09-21 17:52:10 +02:00

AdBlock: Support for element hiding rules & improved performance

- improved performance with not using regexps when not necessary
- added support for element hiding even with domain restrictions
- almost all types of rules are supported now
   * exceptions are only some special cases when rule has
     unssuported options (part of rule after $ character)
   * those rules are ignored
This commit is contained in:
nowrep 2012-06-25 16:07:25 +02:00
parent 84e4e70b15
commit 847469e0be
10 changed files with 365 additions and 141 deletions

View File

@ -63,7 +63,10 @@ void AdBlockAddSubscriptionDialog::indexChanged(int index)
const Subscription &subscription = m_knownSubscriptions.at(index);
ui->title->setText(subscription.title);
ui->title->setCursorPosition(0);
ui->url->setText(subscription.url);
ui->url->setCursorPosition(0);
}
AdBlockAddSubscriptionDialog::~AdBlockAddSubscriptionDialog()

View File

@ -40,6 +40,7 @@ AdBlockManager::AdBlockManager(QObject* parent)
, m_loaded(false)
, m_enabled(true)
{
load();
}
AdBlockManager* AdBlockManager::instance()
@ -69,20 +70,22 @@ QList<AdBlockSubscription*> AdBlockManager::subscriptions() const
QNetworkReply* AdBlockManager::block(const QNetworkRequest &request)
{
const QString &urlString = request.url().toEncoded();
const QString &urlDomain = request.url().host();
const QString &urlScheme = request.url().scheme();
if (!isEnabled() || urlScheme == "data" || urlScheme == "qrc" || urlScheme == "file" || urlScheme == "qupzilla") {
if (!isEnabled() || urlScheme == "data" || urlScheme == "qrc" ||
urlScheme == "file" || urlScheme == "qupzilla" || urlScheme == "abp") {
return 0;
}
const AdBlockRule* blockedRule = 0;
foreach(AdBlockSubscription * subscription, m_subscriptions) {
if (subscription->allow(urlString)) {
if (subscription->allow(urlDomain, urlString)) {
return 0;
}
if (const AdBlockRule* rule = subscription->block(urlString)) {
if (const AdBlockRule* rule = subscription->block(urlDomain, urlString)) {
blockedRule = rule;
}
@ -148,10 +151,9 @@ bool AdBlockManager::removeSubscription(AdBlockSubscription* subscription)
void AdBlockManager::load()
{
if (m_loaded) {
if (!m_enabled || m_loaded) {
return;
}
m_loaded = true;
Settings settings;
settings.beginGroup("AdBlock");
@ -159,6 +161,10 @@ void AdBlockManager::load()
QDateTime lastUpdate = settings.value("lastUpdate", QDateTime()).toDateTime();
settings.endGroup();
if (!m_enabled) {
return;
}
QDir adblockDir(mApp->currentProfilePath() + "adblock");
// Create if neccessary
if (!adblockDir.exists()) {
@ -207,6 +213,8 @@ void AdBlockManager::load()
if (lastUpdate.addDays(5) < QDateTime::currentDateTime()) {
QTimer::singleShot(1000 * 60, this, SLOT(updateAllSubscriptions()));
}
m_loaded = true;
}
void AdBlockManager::updateAllSubscriptions()
@ -237,6 +245,43 @@ void AdBlockManager::save()
settings.endGroup();
}
bool AdBlockManager::isEnabled()
{
return m_enabled;
}
QString AdBlockManager::elementHidingRules() const
{
QString rules;
foreach(AdBlockSubscription * subscription, m_subscriptions) {
rules.append(subscription->elementHidingRules());
}
// Remove last ","
if (!rules.isEmpty()) {
rules = rules.mid(0, rules.size() - 1);
}
return rules;
}
QString AdBlockManager::elementHidingRulesForDomain(const QString &domain) const
{
QString rules;
foreach(AdBlockSubscription * subscription, m_subscriptions) {
rules.append(subscription->elementHidingRulesForDomain(domain));
}
// Remove last ","
if (!rules.isEmpty()) {
rules = rules.mid(0, rules.size() - 1);
}
return rules;
}
AdBlockDialog* AdBlockManager::showDialog()
{
if (!m_adBlockDialog) {

View File

@ -41,9 +41,13 @@ public:
void load();
void save();
bool isEnabled() { if (!m_loaded) load(); return m_enabled; }
bool isEnabled();
QString elementHidingRules() const;
QString elementHidingRulesForDomain(const QString &domain) const;
QList<AdBlockSubscription*> subscriptions() const;
QNetworkReply* block(const QNetworkRequest &request);
AdBlockSubscription* addSubscription(const QString &title, const QString &url);

View File

@ -56,6 +56,13 @@
// #define ADBLOCKRULE_DEBUG
AdBlockRule::AdBlockRule(const QString &filter)
: m_enabled(true)
, m_cssRule(false)
, m_exception(false)
, m_internalDisabled(false)
, m_domainRestricted(false)
, m_useRegExp(false)
, m_caseSensitivity(Qt::CaseInsensitive)
{
setFilter(filter);
}
@ -68,102 +75,22 @@ QString AdBlockRule::filter() const
void AdBlockRule::setFilter(const QString &filter)
{
m_filter = filter;
m_cssRule = false;
m_enabled = true;
m_exception = false;
bool regExpRule = false;
if (filter.startsWith(QLatin1String("!"))
|| filter.trimmed().isEmpty()) {
m_enabled = false;
}
if (filter.contains(QLatin1String("##"))) {
m_cssRule = true;
}
QString parsedLine = filter;
if (parsedLine.startsWith(QLatin1String("@@"))) {
m_exception = true;
parsedLine = parsedLine.mid(2);
}
if (parsedLine.startsWith(QLatin1Char('/'))) {
if (parsedLine.endsWith(QLatin1Char('/'))) {
parsedLine = parsedLine.mid(1);
parsedLine = parsedLine.left(parsedLine.size() - 1);
regExpRule = true;
}
}
int options = parsedLine.indexOf(QLatin1String("$"), 0);
if (options >= 0) {
m_options = parsedLine.mid(options + 1).split(QLatin1Char(','));
parsedLine = parsedLine.left(options);
}
setPattern(parsedLine, regExpRule);
if (m_options.contains(QLatin1String("match-case"))) {
m_regExp.setCaseSensitivity(Qt::CaseSensitive);
m_options.removeOne(QLatin1String("match-case"));
}
parseFilter();
}
bool AdBlockRule::networkMatch(const QString &encodedUrl) const
bool AdBlockRule::isCssRule() const
{
if (m_cssRule) {
#if defined(ADBLOCKRULE_DEBUG)
qDebug() << "AdBlockRule::" << __FUNCTION__ << "m_cssRule" << m_cssRule;
#endif
return false;
}
return m_cssRule;
}
if (!m_enabled) {
#if defined(ADBLOCKRULE_DEBUG)
qDebug() << "AdBlockRule::" << __FUNCTION__ << "is not enabled";
#endif
return false;
}
QString AdBlockRule::cssSelector() const
{
return m_cssSelector;
}
bool matched = m_regExp.indexIn(encodedUrl) != -1;
if (matched
&& !m_options.isEmpty()) {
// we only support domain right now
if (m_options.count() == 1) {
foreach(const QString & option, m_options) {
if (option.startsWith("domain=")) {
QUrl url = QUrl::fromEncoded(encodedUrl.toUtf8());
QString host = url.host();
QStringList domainOptions = option.mid(7).split('|');
foreach(QString domainOption, domainOptions) {
bool negate = domainOption.at(0) == '~';
if (negate) {
domainOption = domainOption.mid(1);
}
bool hostMatched = domainOption == host;
if (hostMatched && !negate) {
return true;
}
if (!hostMatched && negate) {
return true;
}
}
}
}
}
#if defined(ADBLOCKRULE_DEBUG)
qDebug() << "AdBlockRule::" << __FUNCTION__ << "options are currently not supported" << m_options;
#endif
return false;
}
#if defined(ADBLOCKRULE_DEBUG)
//qDebug() << "AdBlockRule::" << __FUNCTION__ << encodedUrl << "MATCHED" << matched << filter();
#endif
return matched;
bool AdBlockRule::isDomainRestricted() const
{
return m_domainRestricted;
}
bool AdBlockRule::isException() const
@ -171,11 +98,6 @@ bool AdBlockRule::isException() const
return m_exception;
}
void AdBlockRule::setException(bool exception)
{
m_exception = exception;
}
bool AdBlockRule::isEnabled() const
{
return m_enabled;
@ -192,32 +114,197 @@ void AdBlockRule::setEnabled(bool enabled)
}
}
QString AdBlockRule::regExpPattern() const
bool AdBlockRule::isInternalDisabled() const
{
return m_regExp.pattern();
return m_internalDisabled;
}
static QString convertPatternToRegExp(const QString &wildcardPattern)
bool AdBlockRule::networkMatch(const QString &domain, const QString &encodedUrl) const
{
QString pattern = wildcardPattern;
return pattern.replace(QRegExp(QLatin1String("\\*+")), QLatin1String("*")) // remove multiple wildcards
.replace(QRegExp(QLatin1String("\\^\\|$")), QLatin1String("^")) // remove anchors following separator placeholder
.replace(QRegExp(QLatin1String("^(\\*)")), QLatin1String("")) // remove leading wildcards
.replace(QRegExp(QLatin1String("(\\*)$")), QLatin1String(""))
.replace(QRegExp(QLatin1String("(\\W)")), QLatin1String("\\\\1")) // escape special symbols
.replace(QRegExp(QLatin1String("^\\\\\\|\\\\\\|")),
QLatin1String("^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?")) // process extended anchor at expression start
.replace(QRegExp(QLatin1String("\\\\\\^")),
QLatin1String("(?:[^\\w\\d\\-.%]|$)")) // process separator placeholders
.replace(QRegExp(QLatin1String("^\\\\\\|")), QLatin1String("^")) // process anchor at expression start
.replace(QRegExp(QLatin1String("\\\\\\|$")), QLatin1String("$")) // process anchor at expression end
.replace(QRegExp(QLatin1String("\\\\\\*")), QLatin1String(".*")) // replace wildcards by .*
;
if (m_cssRule || !m_enabled || m_internalDisabled) {
return false;
}
// Match domain first
if (m_domainRestricted && !matchDomain(domain)) {
return false;
}
// Use regExp match if necessary
if (m_useRegExp) {
return (m_regExp.indexIn(encodedUrl) != -1);
}
return encodedUrl.contains(m_matchString, m_caseSensitivity);
}
void AdBlockRule::setPattern(const QString &pattern, bool isRegExp)
bool AdBlockRule::matchDomain(const QString &domain) const
{
Q_UNUSED(isRegExp)
m_regExp = QRegExp(convertPatternToRegExp(pattern), Qt::CaseInsensitive, QRegExp::RegExp);
if (!m_domainRestricted) {
return true;
}
if (m_blockedDomains.isEmpty()) {
foreach(const QString & d, m_allowedDomains) {
if (domain.contains(d)) {
return true;
}
}
}
else if (m_allowedDomains.isEmpty()) {
foreach(const QString & d, m_blockedDomains) {
if (domain.contains(d)) {
return false;
}
}
return true;
}
else {
foreach(const QString & d, m_blockedDomains) {
if (domain.contains(d)) {
return false;
}
}
foreach(const QString & d, m_allowedDomains) {
if (domain.contains(d)) {
return true;
}
}
}
return false;
}
void AdBlockRule::parseFilter()
{
QString parsedLine = m_filter;
// Empty rule
if (m_filter.trimmed().isEmpty()) {
m_enabled = false;
return;
}
// Disabled rule - modify parsedLine to not contain starting ! so we can
// continue parsing rule
if (m_filter.startsWith('!')) {
m_enabled = false;
parsedLine = m_filter.mid(1);
}
// CSS Element hiding rule
if (parsedLine.contains("##")) {
m_cssRule = true;
int pos = parsedLine.indexOf("##");
// Domain restricted rule
if (!parsedLine.startsWith("##")) {
QString domains = parsedLine.mid(0, pos);
parseDomains(domains, ',');
}
m_cssSelector = parsedLine.mid(pos + 2);
// CSS rule cannot have more options -> stop parsing
return;
}
// Exception always starts with @@
if (parsedLine.startsWith("@@")) {
m_exception = true;
parsedLine = parsedLine.mid(2);
}
// Parse all options following $ char
int optionsIndex = parsedLine.indexOf('$');
if (optionsIndex >= 0) {
QStringList options = parsedLine.mid(optionsIndex + 1).split(',');
int handledOptions = 0;
foreach(const QString & option, options) {
if (option.startsWith("domain=")) {
parseDomains(option.mid(7), '|');
++handledOptions;
}
else if (option.startsWith("match-case")) {
m_caseSensitivity = Qt::CaseSensitive;
++handledOptions;
}
else if (option.startsWith("third-party")) {
// I think we can ignore it
++handledOptions;
}
}
// If we don't handle all known options, it's safer to just disable this rule
if (handledOptions != options.count()) {
m_internalDisabled = true;
return;
}
parsedLine = parsedLine.left(optionsIndex);
}
// Rule is classic regexp
if (parsedLine.startsWith('/') && parsedLine.endsWith('/')) {
parsedLine = parsedLine.mid(1);
parsedLine = parsedLine.left(parsedLine.size() - 1);
m_useRegExp = true;
m_regExp = QRegExp(parsedLine, m_caseSensitivity, QRegExp::RegExp);
return;
}
// Remove starting and ending wildcards (*)
if (parsedLine.startsWith("*")) {
parsedLine = parsedLine.mid(1);
}
if (parsedLine.endsWith("*")) {
parsedLine = parsedLine.left(parsedLine.size() - 1);
}
// If we still find a wildcard (*) or separator (^) or start with domain (||)
// we must modify parsedLine to comply with QRegExp
if (parsedLine.contains('*') || parsedLine.contains('^') || parsedLine.startsWith("||")) {
parsedLine.replace(QRegExp(QLatin1String("\\*+")), QLatin1String("*")) // remove multiple wildcards
.replace(QRegExp(QLatin1String("\\^\\|$")), QLatin1String("^")) // remove anchors following separator placeholder
.replace(QRegExp(QLatin1String("^(\\*)")), QLatin1String("")) // remove leading wildcards
.replace(QRegExp(QLatin1String("(\\*)$")), QLatin1String(""))
.replace(QRegExp(QLatin1String("(\\W)")), QLatin1String("\\\\1")) // escape special symbols
.replace(QRegExp(QLatin1String("^\\\\\\|\\\\\\|")),
QLatin1String("^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?")) // process extended anchor at expression start
.replace(QRegExp(QLatin1String("\\\\\\^")),
QLatin1String("(?:[^\\w\\d\\-.%]|$)")) // process separator placeholders
.replace(QRegExp(QLatin1String("^\\\\\\|")), QLatin1String("^")) // process anchor at expression start
.replace(QRegExp(QLatin1String("\\\\\\|$")), QLatin1String("$")) // process anchor at expression end
.replace(QRegExp(QLatin1String("\\\\\\*")), QLatin1String(".*")); // replace wildcards by .*
m_useRegExp = true;
m_regExp = QRegExp(parsedLine, m_caseSensitivity, QRegExp::RegExp);
return;
}
// We haven't found anything that needs use of regexp, yay!
m_useRegExp = false;
m_matchString = parsedLine;
}
void AdBlockRule::parseDomains(const QString &domains, const QChar &separator)
{
QStringList domainsList = domains.split(separator);
foreach(const QString domain, domainsList) {
if (domain.isEmpty()) {
continue;
}
if (domain.startsWith('~')) {
m_blockedDomains.append(domain.mid(1));
}
else {
m_allowedDomains.append(domain);
}
}
m_domainRestricted = (!m_blockedDomains.isEmpty() || !m_allowedDomains.isEmpty());
}

View File

@ -63,26 +63,44 @@ public:
QString filter() const;
void setFilter(const QString &filter);
bool isCSSRule() const { return m_cssRule; }
bool networkMatch(const QString &encodedUrl) const;
bool isCssRule() const;
QString cssSelector() const;
bool isDomainRestricted() const;
bool isException() const;
void setException(bool exception);
bool isEnabled() const;
void setEnabled(bool enabled);
QString regExpPattern() const;
void setPattern(const QString &pattern, bool isRegExp);
bool isInternalDisabled() const;
bool networkMatch(const QString &domain, const QString &encodedUrl) const;
bool matchDomain(const QString &domain) const;
private:
void parseFilter();
void parseDomains(const QString &domains, const QChar &separator);
QString m_filter;
bool m_enabled;
bool m_cssRule;
bool m_exception;
bool m_enabled;
bool m_internalDisabled;
bool m_domainRestricted;
bool m_useRegExp;
QRegExp m_regExp;
QStringList m_options;
QString m_cssSelector;
QString m_matchString;
// Rule $options
QStringList m_allowedDomains;
QStringList m_blockedDomains;
Qt::CaseSensitivity m_caseSensitivity;
};
#endif // ADBLOCKRULE_H

View File

@ -195,26 +195,44 @@ void AdBlockSubscription::saveDownloadedData(QByteArray &data)
file.close();
}
const AdBlockRule* AdBlockSubscription::allow(const QString &urlString) const
const AdBlockRule* AdBlockSubscription::allow(const QString &urlDomain, const QString &urlString) const
{
foreach(const AdBlockRule * rule, m_networkExceptionRules) {
if (rule->networkMatch(urlString)) {
if (rule->networkMatch(urlDomain, urlString)) {
return rule;
}
}
return 0;
}
const AdBlockRule* AdBlockSubscription::block(const QString &urlString) const
const AdBlockRule* AdBlockSubscription::block(const QString &urlDomain, const QString &urlString) const
{
foreach(const AdBlockRule * rule, m_networkBlockRules) {
if (rule->networkMatch(urlString)) {
if (rule->networkMatch(urlDomain, urlString)) {
return rule;
}
}
return 0;
}
QString AdBlockSubscription::elementHidingRules() const
{
return m_elementHidingRules;
}
QString AdBlockSubscription::elementHidingRulesForDomain(const QString &domain) const
{
QString rules;
foreach(const AdBlockRule * rule, m_domainRestrictedCssRules) {
if (rule->matchDomain(domain)) {
rules.append(rule->cssSelector() + ",");
}
}
return rules;
}
QList<AdBlockRule> AdBlockSubscription::allRules() const
{
return m_rules;
@ -279,6 +297,7 @@ void AdBlockSubscription::populateCache()
{
m_networkExceptionRules.clear();
m_networkBlockRules.clear();
m_domainRestrictedCssRules.clear();
m_elementHidingRules.clear();
for (int i = 0; i < m_rules.count(); ++i) {
@ -287,8 +306,13 @@ void AdBlockSubscription::populateCache()
continue;
}
if (rule->isCSSRule()) {
m_elementHidingRules.append(rule->filter() + ",");
if (rule->isCssRule()) {
if (rule->isDomainRestricted()) {
m_domainRestrictedCssRules.append(rule);
}
else {
m_elementHidingRules.append(rule->cssSelector() + ",");
}
continue;
}
@ -324,8 +348,9 @@ void AdBlockEasyList::saveDownloadedData(QByteArray &data)
return;
}
// We do not support more than standard blocking, so remove element hiding rules, etc...
data = data.left(data.indexOf("General element hiding rules"));
// Third-party advertisers rules are with start domain (||) placeholder which needs regexps
// So we are ignoring it for keeping good performance
data = data.left(data.indexOf("!---------------------------Third-party advertisers"));
file.write(data);
file.close();

View File

@ -72,10 +72,11 @@ public:
virtual void loadSubscription();
virtual void saveSubscription();
const AdBlockRule* allow(const QString &urlString) const;
const AdBlockRule* block(const QString &urlString) const;
const AdBlockRule* allow(const QString &urlDomain, const QString &urlString) const;
const AdBlockRule* block(const QString &urlDomain, const QString &urlString) const;
QString elementHidingRules();
QString elementHidingRules() const;
QString elementHidingRulesForDomain(const QString &domain) const;
QList<AdBlockRule> allRules() const;
void enableRule(int offset);
@ -110,6 +111,7 @@ protected:
// sorted list
QList<const AdBlockRule*> m_networkExceptionRules;
QList<const AdBlockRule*> m_networkBlockRules;
QList<const AdBlockRule*> m_domainRestrictedCssRules;
private:
QString m_title;

View File

@ -330,7 +330,7 @@ void MainApplication::loadSettings()
#endif
setWheelScrollLines(settings.value("wheelScrollLines", wheelScrollLines()).toInt());
m_websettings->setUserStyleSheetUrl(QUrl::fromLocalFile(settings.value("userStyleSheet", "").toString()));
m_websettings->setUserStyleSheetUrl(userStyleSheet(settings.value("userStyleSheet", "").toString()));
WebPage::setUserAgent(settings.value("UserAgent", "").toString());
settings.endGroup();
@ -844,6 +844,25 @@ bool MainApplication::restoreStateSlot(QupZilla* window)
return true;
}
QUrl MainApplication::userStyleSheet(const QString &filePath) const
{
QString userStyle;
QFile file(filePath);
if (!filePath.isEmpty() && file.open(QFile::ReadOnly)) {
userStyle = file.readAll();
userStyle.remove("\n");
file.close();
}
userStyle.append(AdBlockManager::instance()->elementHidingRules() + "{ display:none !important;}");
QString encodedStyle = userStyle.toAscii().toBase64();
QString dataString = QString("data:text/css;charset=utf-8;base64,%1").arg(encodedStyle);
return QUrl(dataString);
}
bool MainApplication::checkSettingsDir()
{
/*

View File

@ -125,6 +125,8 @@ private:
void translateApp();
void restoreOtherWindows();
QUrl userStyleSheet(const QString &filePath) const;
CookieManager* m_cookiemanager;
BrowsingLibrary* m_browsingLibrary;
History* m_historymodel;

View File

@ -31,6 +31,7 @@
#include "popupwebview.h"
#include "networkmanagerproxy.h"
#include "adblockicon.h"
#include "adblockmanager.h"
#include "iconprovider.h"
#include "websettings.h"
@ -474,6 +475,17 @@ void WebPage::addAdBlockRule(const QString &filter, const QUrl &url)
void WebPage::cleanBlockedObjects()
{
if (!AdBlockManager::instance()->isEnabled()) {
return;
}
// Don't run on local schemes
const QString &urlScheme = url().scheme();
if (urlScheme == "data" || urlScheme == "qrc" || urlScheme == "file" ||
urlScheme == "qupzilla" || urlScheme == "abp") {
return;
}
QStringList findingStrings;
foreach(const AdBlockedEntry & entry, m_adBlockedEntries) {
@ -504,6 +516,13 @@ void WebPage::cleanBlockedObjects()
foreach(QWebElement element, elements) {
element.setStyleProperty("visibility", "hidden");
}
// Apply domain-specific element hiding rules
QString elementHiding = AdBlockManager::instance()->elementHidingRulesForDomain(url().host());
elementHiding.append("{display: none !important;}\n</style>");
QWebElement headElement = docElement.findFirst("body");
headElement.appendInside("<style type=\"text/css\">\n/* AdBlock for QupZilla */\n" + elementHiding);
}
QString WebPage::userAgentForUrl(const QUrl &url) const