1
mirror of https://invent.kde.org/network/falkon.git synced 2024-11-11 01:22:10 +01:00

AdBlock: Improved performance in 2 types of rules

- ||domain.com^ and anything| rules are now using string matching
- edited updating EasyList to download more rules
   * only 50 rules are slow now (use regexp) in EasyList
This commit is contained in:
nowrep 2012-07-01 14:44:01 +02:00
parent 6228082225
commit 1b000812b9
3 changed files with 39 additions and 2 deletions

View File

@ -101,6 +101,8 @@ AdBlockRule::AdBlockRule(const QString &filter)
, m_internalDisabled(false)
, m_domainRestricted(false)
, m_useRegExp(false)
, m_useDomainMatch(false)
, m_useEndsMatch(false)
, m_thirdParty(false)
, m_thirdPartyException(false)
, m_object(false)
@ -153,14 +155,20 @@ bool AdBlockRule::isEnabled() const
void AdBlockRule::setEnabled(bool enabled)
{
m_enabled = enabled;
if (!enabled) {
m_filter = QLatin1String("!") + m_filter;
m_filter = "!" + m_filter;
}
else {
m_filter = m_filter.mid(1);
}
}
bool AdBlockRule::isSlow() const
{
return m_useRegExp;
}
bool AdBlockRule::isInternalDisabled() const
{
return m_internalDisabled;
@ -177,6 +185,12 @@ bool AdBlockRule::networkMatch(const QNetworkRequest &request, const QString &do
if (m_useRegExp) {
matched = (m_regExp.indexIn(encodedUrl) != -1);
}
else if (m_useDomainMatch) {
matched = (domain == m_matchString);
}
else if (m_useEndsMatch) {
matched = encodedUrl.endsWith(m_matchString, m_caseSensitivity);
}
else {
matched = encodedUrl.contains(m_matchString, m_caseSensitivity);
}
@ -399,6 +413,25 @@ void AdBlockRule::parseFilter()
parsedLine = parsedLine.left(parsedLine.size() - 1);
}
// We can use fast string matching for domain here
if (parsedLine.startsWith("||") && parsedLine.endsWith("^") && !parsedLine.contains(QRegExp("[/:?=&\\*]"))) {
parsedLine = parsedLine.mid(2);
parsedLine = parsedLine.left(parsedLine.size() - 1);
m_useDomainMatch = true;
m_matchString = parsedLine;
return;
}
// If rule contains only | at end, we can also use string matching
if (parsedLine.endsWith("|") && !parsedLine.contains(QRegExp("[\\^\\*]")) && parsedLine.count('|') == 1) {
parsedLine = parsedLine.left(parsedLine.size() - 1);
m_useEndsMatch = true;
m_matchString = parsedLine;
return;
}
// If we still find a wildcard (*) or separator (^) or (|)
// we must modify parsedLine to comply with QRegExp
if (parsedLine.contains('*') || parsedLine.contains('^') || parsedLine.contains('|')) {

View File

@ -72,6 +72,7 @@ public:
bool isEnabled() const;
void setEnabled(bool enabled);
bool isSlow() const;
bool isInternalDisabled() const;
bool networkMatch(const QNetworkRequest &request, const QString &domain, const QString &encodedUrl) const;
@ -98,6 +99,9 @@ private:
bool m_useRegExp;
QRegExp m_regExp;
bool m_useDomainMatch;
bool m_useEndsMatch;
QString m_cssSelector;
QString m_matchString;

View File

@ -341,7 +341,7 @@ void AdBlockEasyList::saveDownloadedData(QByteArray &data)
// Third-party advertisers rules are with start domain (||) placeholder which needs regexps
// So we are ignoring it for keeping good performance
data = data.left(data.indexOf("!---------------------------Third-party advertisers"));
data = data.left(data.indexOf("!-----------------------------Third-party adverts-----------------------------!"));
file.write(data);
file.close();