From 3159407c18bef15822106d66862d0f3065d77550 Mon Sep 17 00:00:00 2001 From: nowrep Date: Sat, 2 Nov 2013 17:43:18 +0100 Subject: [PATCH] AdBlock: Use fast search tree for basic rules For all rules that can be matched with simple string-contains match, use fast tree searching. Only in EasyList, there is 6000 those rules. It improves overall performance of AdBlock matching by 5 - 10 times, at the cost of memory. The tree needs about 1.5MB memory for 1000 rules, which is a great tradeoff for such big performance improvement. --- src/lib/adblock/adblocksearchtree.cpp | 134 ++++++++++++++++++++++++ src/lib/adblock/adblocksearchtree.h | 56 ++++++++++ src/lib/adblock/adblocksubscription.cpp | 27 ++++- src/lib/adblock/adblocksubscription.h | 4 + src/lib/lib.pro | 6 +- 5 files changed, 223 insertions(+), 4 deletions(-) create mode 100644 src/lib/adblock/adblocksearchtree.cpp create mode 100644 src/lib/adblock/adblocksearchtree.h diff --git a/src/lib/adblock/adblocksearchtree.cpp b/src/lib/adblock/adblocksearchtree.cpp new file mode 100644 index 000000000..bbaa2d4f3 --- /dev/null +++ b/src/lib/adblock/adblocksearchtree.cpp @@ -0,0 +1,134 @@ +/* ============================================================ +* QupZilla - WebKit based browser +* Copyright (C) 2013 David Rosca +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +* ============================================================ */ +#include "adblocksearchtree.h" +#include "adblockrule.h" + +#include + +AdBlockSearchTree::AdBlockSearchTree() + : m_root(new Node) +{ +} + +bool AdBlockSearchTree::add(const AdBlockRule* rule) +{ + if (rule->m_type != AdBlockRule::StringContainsMatchRule) { + return false; + } + + const QString &filter = rule->m_matchString; + int len = filter.size(); + + if (len <= 0) { + qDebug() << "AdBlockSearchTree: Inserting rule with filter len <= 0!"; + return false; + } + + Node* node = m_root; + + for (int i = 0; i < len; ++i) { + const QChar &c = filter.at(i); + if (!node->childs.contains(c)) { + Node* n = new Node; + n->c = c; + + node->childs[c] = n; + } + + node = node->childs[c]; + } + + node->rule = rule; + + return true; +} + +const AdBlockRule* AdBlockSearchTree::find(const QNetworkRequest &request, const QString &domain, const QString &string) +{ + int len = string.size(); + + if (len <= 0) { + return 0; + } + + for (int i = 0; i < len; ++i) { + const AdBlockRule* rule = prefixSearch(request, domain, string, string.mid(i)); + if (rule) { + return rule; + } + } + + return 0; +} + +const AdBlockRule* AdBlockSearchTree::prefixSearch(const QNetworkRequest &request, const QString &domain, const QString &urlString, const QString &string) +{ + int len = string.size(); + + if (len <= 0) { + return 0; + } + + QChar c = string.at(0); + + if (!m_root->childs.contains(c)) { + return 0; + } + + Node* node = m_root->childs[c]; + + for (int i = 1; i < len; ++i) { + const QChar &c = string.at(i); + + if (node->rule && node->rule->networkMatch(request, domain, urlString)) { + return node->rule; + } + + if (!node->childs.contains(c)) { + return 0; + } + + node = node->childs[c]; + } + + if (node->rule && node->rule->networkMatch(request, domain, urlString)) { + return node->rule; + } + + return 0; +} + +void AdBlockSearchTree::deleteNode(AdBlockSearchTree::Node* node) +{ + if (!node) { + return; + } + + QHashIterator i(node->childs); + while (i.hasNext()) { + i.next(); + deleteNode(i.value()); + } + + delete node; +} + +AdBlockSearchTree::~AdBlockSearchTree() +{ + deleteNode(m_root); +} diff --git a/src/lib/adblock/adblocksearchtree.h b/src/lib/adblock/adblocksearchtree.h new file mode 100644 index 000000000..c4a7bb8b3 --- /dev/null +++ b/src/lib/adblock/adblocksearchtree.h @@ -0,0 +1,56 @@ +/* ============================================================ +* QupZilla - WebKit based browser +* Copyright (C) 2013 David Rosca +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +* ============================================================ */ +#ifndef ADBLOCKSEARCHTREE_H +#define ADBLOCKSEARCHTREE_H + +#include +#include + +#include "qz_namespace.h" + +class QNetworkRequest; + +class AdBlockRule; + +class QT_QUPZILLA_EXPORT AdBlockSearchTree +{ +public: + explicit AdBlockSearchTree(); + ~AdBlockSearchTree(); + + bool add(const AdBlockRule* rule); + const AdBlockRule* find(const QNetworkRequest &request, const QString &domain, const QString &string); + +private: + struct Node { + QChar c; + const AdBlockRule* rule; + QHash childs; + + Node() : c(0) , rule(0) { } + }; + + const AdBlockRule* prefixSearch(const QNetworkRequest &request, const QString &domain, + const QString &urlString, const QString &string); + + void deleteNode(Node* node); + + Node* m_root; +}; + +#endif // ADBLOCKSEARCHTREE_H diff --git a/src/lib/adblock/adblocksubscription.cpp b/src/lib/adblock/adblocksubscription.cpp index 9384fd63a..68aa4d854 100644 --- a/src/lib/adblock/adblocksubscription.cpp +++ b/src/lib/adblock/adblocksubscription.cpp @@ -44,6 +44,7 @@ */ #include "adblocksubscription.h" #include "adblockmanager.h" +#include "adblocksearchtree.h" #include "mainapplication.h" #include "networkmanager.h" #include "qztools.h" @@ -57,6 +58,8 @@ AdBlockSubscription::AdBlockSubscription(const QString &title, QObject* parent) : QObject(parent) , m_reply(0) + , m_networkBlockTree(0) + , m_networkExceptionTree(0) , m_title(title) , m_updated(false) { @@ -200,6 +203,14 @@ void AdBlockSubscription::saveDownloadedData(const QByteArray &data) const AdBlockRule* AdBlockSubscription::match(const QNetworkRequest &request, const QString &urlDomain, const QString &urlString) const { + if (m_networkExceptionTree->find(request, urlDomain, urlString)) { + return 0; + } + + if (const AdBlockRule* rule = m_networkBlockTree->find(request, urlDomain, urlString)) { + return rule; + } + int count = m_networkExceptionRules.count(); for (int i = 0; i < count; ++i) { const AdBlockRule* rule = m_networkExceptionRules.at(i); @@ -373,6 +384,12 @@ void AdBlockSubscription::populateCache() m_documentRules.clear(); m_elemhideRules.clear(); + delete m_networkBlockTree; + delete m_networkExceptionTree; + + m_networkBlockTree = new AdBlockSearchTree; + m_networkExceptionTree = new AdBlockSearchTree; + // Apparently, excessive amount of selectors for one CSS rule is not what WebKit likes. // (In my testings, 4931 is the number that makes it crash) // So let's split it by 1000 selectors... @@ -406,10 +423,14 @@ void AdBlockSubscription::populateCache() m_elemhideRules.append(rule); } else if (rule->isException()) { - m_networkExceptionRules.append(rule); + if (!m_networkExceptionTree->add(rule)) { + m_networkExceptionRules.append(rule); + } } else { - m_networkBlockRules.append(rule); + if (!m_networkBlockTree->add(rule)) { + m_networkBlockRules.append(rule); + } } } @@ -422,6 +443,8 @@ void AdBlockSubscription::populateCache() AdBlockSubscription::~AdBlockSubscription() { qDeleteAll(m_rules); + delete m_networkBlockTree; + delete m_networkExceptionTree; } // AdBlockCustomList diff --git a/src/lib/adblock/adblocksubscription.h b/src/lib/adblock/adblocksubscription.h index a83ffd151..a931544d1 100644 --- a/src/lib/adblock/adblocksubscription.h +++ b/src/lib/adblock/adblocksubscription.h @@ -56,6 +56,7 @@ class QNetworkRequest; class QNetworkReply; class QUrl; +class AdBlockSearchTree; class FollowRedirectReply; class QT_QUPZILLA_EXPORT AdBlockSubscription : public QObject @@ -123,6 +124,9 @@ protected: QVector m_documentRules; QVector m_elemhideRules; + AdBlockSearchTree* m_networkBlockTree; + AdBlockSearchTree* m_networkExceptionTree; + private: QString m_title; QString m_filePath; diff --git a/src/lib/lib.pro b/src/lib/lib.pro index 42c6b3ae0..9c1b32809 100644 --- a/src/lib/lib.pro +++ b/src/lib/lib.pro @@ -247,7 +247,8 @@ SOURCES += \ autofill/passwordbackends/passwordbackend.cpp \ tools/aesinterface.cpp \ autofill/passwordbackends/databaseencryptedpasswordbackend.cpp \ - network/sslerrordialog.cpp + network/sslerrordialog.cpp \ + adblock/adblocksearchtree.cpp HEADERS += \ @@ -431,7 +432,8 @@ HEADERS += \ autofill/passwordbackends/databasepasswordbackend.h \ tools/aesinterface.h \ autofill/passwordbackends/databaseencryptedpasswordbackend.h \ - network/sslerrordialog.h + network/sslerrordialog.h \ + adblock/adblocksearchtree.h FORMS += \ preferences/autofillmanager.ui \