mirror of
https://invent.kde.org/network/falkon.git
synced 2024-12-24 04:36:34 +01:00
AdBlock: Use fast search tree for basic rules
For all rules that can be matched with simple string-contains match, use fast tree searching. Only in EasyList, there is 6000 those rules. It improves overall performance of AdBlock matching by 5 - 10 times, at the cost of memory. The tree needs about 1.5MB memory for 1000 rules, which is a great tradeoff for such big performance improvement.
This commit is contained in:
parent
22b8103fb4
commit
3159407c18
134
src/lib/adblock/adblocksearchtree.cpp
Normal file
134
src/lib/adblock/adblocksearchtree.cpp
Normal file
@ -0,0 +1,134 @@
|
||||
/* ============================================================
|
||||
* QupZilla - WebKit based browser
|
||||
* Copyright (C) 2013 David Rosca <nowrep@gmail.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
* ============================================================ */
|
||||
#include "adblocksearchtree.h"
|
||||
#include "adblockrule.h"
|
||||
|
||||
#include <QDebug>
|
||||
|
||||
AdBlockSearchTree::AdBlockSearchTree()
|
||||
: m_root(new Node)
|
||||
{
|
||||
}
|
||||
|
||||
bool AdBlockSearchTree::add(const AdBlockRule* rule)
|
||||
{
|
||||
if (rule->m_type != AdBlockRule::StringContainsMatchRule) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const QString &filter = rule->m_matchString;
|
||||
int len = filter.size();
|
||||
|
||||
if (len <= 0) {
|
||||
qDebug() << "AdBlockSearchTree: Inserting rule with filter len <= 0!";
|
||||
return false;
|
||||
}
|
||||
|
||||
Node* node = m_root;
|
||||
|
||||
for (int i = 0; i < len; ++i) {
|
||||
const QChar &c = filter.at(i);
|
||||
if (!node->childs.contains(c)) {
|
||||
Node* n = new Node;
|
||||
n->c = c;
|
||||
|
||||
node->childs[c] = n;
|
||||
}
|
||||
|
||||
node = node->childs[c];
|
||||
}
|
||||
|
||||
node->rule = rule;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
const AdBlockRule* AdBlockSearchTree::find(const QNetworkRequest &request, const QString &domain, const QString &string)
|
||||
{
|
||||
int len = string.size();
|
||||
|
||||
if (len <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (int i = 0; i < len; ++i) {
|
||||
const AdBlockRule* rule = prefixSearch(request, domain, string, string.mid(i));
|
||||
if (rule) {
|
||||
return rule;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const AdBlockRule* AdBlockSearchTree::prefixSearch(const QNetworkRequest &request, const QString &domain, const QString &urlString, const QString &string)
|
||||
{
|
||||
int len = string.size();
|
||||
|
||||
if (len <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
QChar c = string.at(0);
|
||||
|
||||
if (!m_root->childs.contains(c)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
Node* node = m_root->childs[c];
|
||||
|
||||
for (int i = 1; i < len; ++i) {
|
||||
const QChar &c = string.at(i);
|
||||
|
||||
if (node->rule && node->rule->networkMatch(request, domain, urlString)) {
|
||||
return node->rule;
|
||||
}
|
||||
|
||||
if (!node->childs.contains(c)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
node = node->childs[c];
|
||||
}
|
||||
|
||||
if (node->rule && node->rule->networkMatch(request, domain, urlString)) {
|
||||
return node->rule;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void AdBlockSearchTree::deleteNode(AdBlockSearchTree::Node* node)
|
||||
{
|
||||
if (!node) {
|
||||
return;
|
||||
}
|
||||
|
||||
QHashIterator<QChar, Node*> i(node->childs);
|
||||
while (i.hasNext()) {
|
||||
i.next();
|
||||
deleteNode(i.value());
|
||||
}
|
||||
|
||||
delete node;
|
||||
}
|
||||
|
||||
AdBlockSearchTree::~AdBlockSearchTree()
|
||||
{
|
||||
deleteNode(m_root);
|
||||
}
|
56
src/lib/adblock/adblocksearchtree.h
Normal file
56
src/lib/adblock/adblocksearchtree.h
Normal file
@ -0,0 +1,56 @@
|
||||
/* ============================================================
|
||||
* QupZilla - WebKit based browser
|
||||
* Copyright (C) 2013 David Rosca <nowrep@gmail.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
* ============================================================ */
|
||||
#ifndef ADBLOCKSEARCHTREE_H
|
||||
#define ADBLOCKSEARCHTREE_H
|
||||
|
||||
#include <QChar>
|
||||
#include <QHash>
|
||||
|
||||
#include "qz_namespace.h"
|
||||
|
||||
class QNetworkRequest;
|
||||
|
||||
class AdBlockRule;
|
||||
|
||||
class QT_QUPZILLA_EXPORT AdBlockSearchTree
|
||||
{
|
||||
public:
|
||||
explicit AdBlockSearchTree();
|
||||
~AdBlockSearchTree();
|
||||
|
||||
bool add(const AdBlockRule* rule);
|
||||
const AdBlockRule* find(const QNetworkRequest &request, const QString &domain, const QString &string);
|
||||
|
||||
private:
|
||||
struct Node {
|
||||
QChar c;
|
||||
const AdBlockRule* rule;
|
||||
QHash<QChar, Node*> childs;
|
||||
|
||||
Node() : c(0) , rule(0) { }
|
||||
};
|
||||
|
||||
const AdBlockRule* prefixSearch(const QNetworkRequest &request, const QString &domain,
|
||||
const QString &urlString, const QString &string);
|
||||
|
||||
void deleteNode(Node* node);
|
||||
|
||||
Node* m_root;
|
||||
};
|
||||
|
||||
#endif // ADBLOCKSEARCHTREE_H
|
@ -44,6 +44,7 @@
|
||||
*/
|
||||
#include "adblocksubscription.h"
|
||||
#include "adblockmanager.h"
|
||||
#include "adblocksearchtree.h"
|
||||
#include "mainapplication.h"
|
||||
#include "networkmanager.h"
|
||||
#include "qztools.h"
|
||||
@ -57,6 +58,8 @@
|
||||
AdBlockSubscription::AdBlockSubscription(const QString &title, QObject* parent)
|
||||
: QObject(parent)
|
||||
, m_reply(0)
|
||||
, m_networkBlockTree(0)
|
||||
, m_networkExceptionTree(0)
|
||||
, m_title(title)
|
||||
, m_updated(false)
|
||||
{
|
||||
@ -200,6 +203,14 @@ void AdBlockSubscription::saveDownloadedData(const QByteArray &data)
|
||||
|
||||
const AdBlockRule* AdBlockSubscription::match(const QNetworkRequest &request, const QString &urlDomain, const QString &urlString) const
|
||||
{
|
||||
if (m_networkExceptionTree->find(request, urlDomain, urlString)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (const AdBlockRule* rule = m_networkBlockTree->find(request, urlDomain, urlString)) {
|
||||
return rule;
|
||||
}
|
||||
|
||||
int count = m_networkExceptionRules.count();
|
||||
for (int i = 0; i < count; ++i) {
|
||||
const AdBlockRule* rule = m_networkExceptionRules.at(i);
|
||||
@ -373,6 +384,12 @@ void AdBlockSubscription::populateCache()
|
||||
m_documentRules.clear();
|
||||
m_elemhideRules.clear();
|
||||
|
||||
delete m_networkBlockTree;
|
||||
delete m_networkExceptionTree;
|
||||
|
||||
m_networkBlockTree = new AdBlockSearchTree;
|
||||
m_networkExceptionTree = new AdBlockSearchTree;
|
||||
|
||||
// Apparently, excessive amount of selectors for one CSS rule is not what WebKit likes.
|
||||
// (In my testings, 4931 is the number that makes it crash)
|
||||
// So let's split it by 1000 selectors...
|
||||
@ -406,10 +423,14 @@ void AdBlockSubscription::populateCache()
|
||||
m_elemhideRules.append(rule);
|
||||
}
|
||||
else if (rule->isException()) {
|
||||
m_networkExceptionRules.append(rule);
|
||||
if (!m_networkExceptionTree->add(rule)) {
|
||||
m_networkExceptionRules.append(rule);
|
||||
}
|
||||
}
|
||||
else {
|
||||
m_networkBlockRules.append(rule);
|
||||
if (!m_networkBlockTree->add(rule)) {
|
||||
m_networkBlockRules.append(rule);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -422,6 +443,8 @@ void AdBlockSubscription::populateCache()
|
||||
AdBlockSubscription::~AdBlockSubscription()
|
||||
{
|
||||
qDeleteAll(m_rules);
|
||||
delete m_networkBlockTree;
|
||||
delete m_networkExceptionTree;
|
||||
}
|
||||
|
||||
// AdBlockCustomList
|
||||
|
@ -56,6 +56,7 @@ class QNetworkRequest;
|
||||
class QNetworkReply;
|
||||
class QUrl;
|
||||
|
||||
class AdBlockSearchTree;
|
||||
class FollowRedirectReply;
|
||||
|
||||
class QT_QUPZILLA_EXPORT AdBlockSubscription : public QObject
|
||||
@ -123,6 +124,9 @@ protected:
|
||||
QVector<const AdBlockRule*> m_documentRules;
|
||||
QVector<const AdBlockRule*> m_elemhideRules;
|
||||
|
||||
AdBlockSearchTree* m_networkBlockTree;
|
||||
AdBlockSearchTree* m_networkExceptionTree;
|
||||
|
||||
private:
|
||||
QString m_title;
|
||||
QString m_filePath;
|
||||
|
@ -247,7 +247,8 @@ SOURCES += \
|
||||
autofill/passwordbackends/passwordbackend.cpp \
|
||||
tools/aesinterface.cpp \
|
||||
autofill/passwordbackends/databaseencryptedpasswordbackend.cpp \
|
||||
network/sslerrordialog.cpp
|
||||
network/sslerrordialog.cpp \
|
||||
adblock/adblocksearchtree.cpp
|
||||
|
||||
|
||||
HEADERS += \
|
||||
@ -431,7 +432,8 @@ HEADERS += \
|
||||
autofill/passwordbackends/databasepasswordbackend.h \
|
||||
tools/aesinterface.h \
|
||||
autofill/passwordbackends/databaseencryptedpasswordbackend.h \
|
||||
network/sslerrordialog.h
|
||||
network/sslerrordialog.h \
|
||||
adblock/adblocksearchtree.h
|
||||
|
||||
FORMS += \
|
||||
preferences/autofillmanager.ui \
|
||||
|
Loading…
Reference in New Issue
Block a user