1
mirror of https://invent.kde.org/network/falkon.git synced 2024-12-24 04:36:34 +01:00

AdBlock: Use fast search tree for basic rules

For all rules that can be matched with simple string-contains match,
use fast tree searching.
Only in EasyList, there is 6000 those rules.
It improves overall performance of AdBlock matching by 5 - 10 times,
at the cost of memory.
The tree needs about 1.5MB memory for 1000 rules, which is a great
tradeoff for such big performance improvement.
This commit is contained in:
nowrep 2013-11-02 17:43:18 +01:00
parent 22b8103fb4
commit 3159407c18
5 changed files with 223 additions and 4 deletions

View File

@ -0,0 +1,134 @@
/* ============================================================
* QupZilla - WebKit based browser
* Copyright (C) 2013 David Rosca <nowrep@gmail.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* ============================================================ */
#include "adblocksearchtree.h"
#include "adblockrule.h"
#include <QDebug>
AdBlockSearchTree::AdBlockSearchTree()
: m_root(new Node)
{
}
bool AdBlockSearchTree::add(const AdBlockRule* rule)
{
if (rule->m_type != AdBlockRule::StringContainsMatchRule) {
return false;
}
const QString &filter = rule->m_matchString;
int len = filter.size();
if (len <= 0) {
qDebug() << "AdBlockSearchTree: Inserting rule with filter len <= 0!";
return false;
}
Node* node = m_root;
for (int i = 0; i < len; ++i) {
const QChar &c = filter.at(i);
if (!node->childs.contains(c)) {
Node* n = new Node;
n->c = c;
node->childs[c] = n;
}
node = node->childs[c];
}
node->rule = rule;
return true;
}
const AdBlockRule* AdBlockSearchTree::find(const QNetworkRequest &request, const QString &domain, const QString &string)
{
int len = string.size();
if (len <= 0) {
return 0;
}
for (int i = 0; i < len; ++i) {
const AdBlockRule* rule = prefixSearch(request, domain, string, string.mid(i));
if (rule) {
return rule;
}
}
return 0;
}
const AdBlockRule* AdBlockSearchTree::prefixSearch(const QNetworkRequest &request, const QString &domain, const QString &urlString, const QString &string)
{
int len = string.size();
if (len <= 0) {
return 0;
}
QChar c = string.at(0);
if (!m_root->childs.contains(c)) {
return 0;
}
Node* node = m_root->childs[c];
for (int i = 1; i < len; ++i) {
const QChar &c = string.at(i);
if (node->rule && node->rule->networkMatch(request, domain, urlString)) {
return node->rule;
}
if (!node->childs.contains(c)) {
return 0;
}
node = node->childs[c];
}
if (node->rule && node->rule->networkMatch(request, domain, urlString)) {
return node->rule;
}
return 0;
}
void AdBlockSearchTree::deleteNode(AdBlockSearchTree::Node* node)
{
if (!node) {
return;
}
QHashIterator<QChar, Node*> i(node->childs);
while (i.hasNext()) {
i.next();
deleteNode(i.value());
}
delete node;
}
AdBlockSearchTree::~AdBlockSearchTree()
{
deleteNode(m_root);
}

View File

@ -0,0 +1,56 @@
/* ============================================================
* QupZilla - WebKit based browser
* Copyright (C) 2013 David Rosca <nowrep@gmail.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* ============================================================ */
#ifndef ADBLOCKSEARCHTREE_H
#define ADBLOCKSEARCHTREE_H
#include <QChar>
#include <QHash>
#include "qz_namespace.h"
class QNetworkRequest;
class AdBlockRule;
class QT_QUPZILLA_EXPORT AdBlockSearchTree
{
public:
explicit AdBlockSearchTree();
~AdBlockSearchTree();
bool add(const AdBlockRule* rule);
const AdBlockRule* find(const QNetworkRequest &request, const QString &domain, const QString &string);
private:
struct Node {
QChar c;
const AdBlockRule* rule;
QHash<QChar, Node*> childs;
Node() : c(0) , rule(0) { }
};
const AdBlockRule* prefixSearch(const QNetworkRequest &request, const QString &domain,
const QString &urlString, const QString &string);
void deleteNode(Node* node);
Node* m_root;
};
#endif // ADBLOCKSEARCHTREE_H

View File

@ -44,6 +44,7 @@
*/
#include "adblocksubscription.h"
#include "adblockmanager.h"
#include "adblocksearchtree.h"
#include "mainapplication.h"
#include "networkmanager.h"
#include "qztools.h"
@ -57,6 +58,8 @@
AdBlockSubscription::AdBlockSubscription(const QString &title, QObject* parent)
: QObject(parent)
, m_reply(0)
, m_networkBlockTree(0)
, m_networkExceptionTree(0)
, m_title(title)
, m_updated(false)
{
@ -200,6 +203,14 @@ void AdBlockSubscription::saveDownloadedData(const QByteArray &data)
const AdBlockRule* AdBlockSubscription::match(const QNetworkRequest &request, const QString &urlDomain, const QString &urlString) const
{
if (m_networkExceptionTree->find(request, urlDomain, urlString)) {
return 0;
}
if (const AdBlockRule* rule = m_networkBlockTree->find(request, urlDomain, urlString)) {
return rule;
}
int count = m_networkExceptionRules.count();
for (int i = 0; i < count; ++i) {
const AdBlockRule* rule = m_networkExceptionRules.at(i);
@ -373,6 +384,12 @@ void AdBlockSubscription::populateCache()
m_documentRules.clear();
m_elemhideRules.clear();
delete m_networkBlockTree;
delete m_networkExceptionTree;
m_networkBlockTree = new AdBlockSearchTree;
m_networkExceptionTree = new AdBlockSearchTree;
// Apparently, excessive amount of selectors for one CSS rule is not what WebKit likes.
// (In my testings, 4931 is the number that makes it crash)
// So let's split it by 1000 selectors...
@ -406,10 +423,14 @@ void AdBlockSubscription::populateCache()
m_elemhideRules.append(rule);
}
else if (rule->isException()) {
m_networkExceptionRules.append(rule);
if (!m_networkExceptionTree->add(rule)) {
m_networkExceptionRules.append(rule);
}
}
else {
m_networkBlockRules.append(rule);
if (!m_networkBlockTree->add(rule)) {
m_networkBlockRules.append(rule);
}
}
}
@ -422,6 +443,8 @@ void AdBlockSubscription::populateCache()
AdBlockSubscription::~AdBlockSubscription()
{
qDeleteAll(m_rules);
delete m_networkBlockTree;
delete m_networkExceptionTree;
}
// AdBlockCustomList

View File

@ -56,6 +56,7 @@ class QNetworkRequest;
class QNetworkReply;
class QUrl;
class AdBlockSearchTree;
class FollowRedirectReply;
class QT_QUPZILLA_EXPORT AdBlockSubscription : public QObject
@ -123,6 +124,9 @@ protected:
QVector<const AdBlockRule*> m_documentRules;
QVector<const AdBlockRule*> m_elemhideRules;
AdBlockSearchTree* m_networkBlockTree;
AdBlockSearchTree* m_networkExceptionTree;
private:
QString m_title;
QString m_filePath;

View File

@ -247,7 +247,8 @@ SOURCES += \
autofill/passwordbackends/passwordbackend.cpp \
tools/aesinterface.cpp \
autofill/passwordbackends/databaseencryptedpasswordbackend.cpp \
network/sslerrordialog.cpp
network/sslerrordialog.cpp \
adblock/adblocksearchtree.cpp
HEADERS += \
@ -431,7 +432,8 @@ HEADERS += \
autofill/passwordbackends/databasepasswordbackend.h \
tools/aesinterface.h \
autofill/passwordbackends/databaseencryptedpasswordbackend.h \
network/sslerrordialog.h
network/sslerrordialog.h \
adblock/adblocksearchtree.h
FORMS += \
preferences/autofillmanager.ui \