mirror of
https://invent.kde.org/network/falkon.git
synced 2024-12-20 10:46:35 +01:00
AdBlock: Improved performance in 2 types of rules
- ||domain.com^ and anything| rules are now using string matching - edited updating EasyList to download more rules * only 50 rules are slow now (use regexp) in EasyList
This commit is contained in:
parent
6228082225
commit
1b000812b9
@ -101,6 +101,8 @@ AdBlockRule::AdBlockRule(const QString &filter)
|
||||
, m_internalDisabled(false)
|
||||
, m_domainRestricted(false)
|
||||
, m_useRegExp(false)
|
||||
, m_useDomainMatch(false)
|
||||
, m_useEndsMatch(false)
|
||||
, m_thirdParty(false)
|
||||
, m_thirdPartyException(false)
|
||||
, m_object(false)
|
||||
@ -153,14 +155,20 @@ bool AdBlockRule::isEnabled() const
|
||||
void AdBlockRule::setEnabled(bool enabled)
|
||||
{
|
||||
m_enabled = enabled;
|
||||
|
||||
if (!enabled) {
|
||||
m_filter = QLatin1String("!") + m_filter;
|
||||
m_filter = "!" + m_filter;
|
||||
}
|
||||
else {
|
||||
m_filter = m_filter.mid(1);
|
||||
}
|
||||
}
|
||||
|
||||
bool AdBlockRule::isSlow() const
|
||||
{
|
||||
return m_useRegExp;
|
||||
}
|
||||
|
||||
bool AdBlockRule::isInternalDisabled() const
|
||||
{
|
||||
return m_internalDisabled;
|
||||
@ -177,6 +185,12 @@ bool AdBlockRule::networkMatch(const QNetworkRequest &request, const QString &do
|
||||
if (m_useRegExp) {
|
||||
matched = (m_regExp.indexIn(encodedUrl) != -1);
|
||||
}
|
||||
else if (m_useDomainMatch) {
|
||||
matched = (domain == m_matchString);
|
||||
}
|
||||
else if (m_useEndsMatch) {
|
||||
matched = encodedUrl.endsWith(m_matchString, m_caseSensitivity);
|
||||
}
|
||||
else {
|
||||
matched = encodedUrl.contains(m_matchString, m_caseSensitivity);
|
||||
}
|
||||
@ -399,6 +413,25 @@ void AdBlockRule::parseFilter()
|
||||
parsedLine = parsedLine.left(parsedLine.size() - 1);
|
||||
}
|
||||
|
||||
// We can use fast string matching for domain here
|
||||
if (parsedLine.startsWith("||") && parsedLine.endsWith("^") && !parsedLine.contains(QRegExp("[/:?=&\\*]"))) {
|
||||
parsedLine = parsedLine.mid(2);
|
||||
parsedLine = parsedLine.left(parsedLine.size() - 1);
|
||||
|
||||
m_useDomainMatch = true;
|
||||
m_matchString = parsedLine;
|
||||
return;
|
||||
}
|
||||
|
||||
// If rule contains only | at end, we can also use string matching
|
||||
if (parsedLine.endsWith("|") && !parsedLine.contains(QRegExp("[\\^\\*]")) && parsedLine.count('|') == 1) {
|
||||
parsedLine = parsedLine.left(parsedLine.size() - 1);
|
||||
|
||||
m_useEndsMatch = true;
|
||||
m_matchString = parsedLine;
|
||||
return;
|
||||
}
|
||||
|
||||
// If we still find a wildcard (*) or separator (^) or (|)
|
||||
// we must modify parsedLine to comply with QRegExp
|
||||
if (parsedLine.contains('*') || parsedLine.contains('^') || parsedLine.contains('|')) {
|
||||
|
@ -72,6 +72,7 @@ public:
|
||||
bool isEnabled() const;
|
||||
void setEnabled(bool enabled);
|
||||
|
||||
bool isSlow() const;
|
||||
bool isInternalDisabled() const;
|
||||
|
||||
bool networkMatch(const QNetworkRequest &request, const QString &domain, const QString &encodedUrl) const;
|
||||
@ -98,6 +99,9 @@ private:
|
||||
bool m_useRegExp;
|
||||
QRegExp m_regExp;
|
||||
|
||||
bool m_useDomainMatch;
|
||||
bool m_useEndsMatch;
|
||||
|
||||
QString m_cssSelector;
|
||||
QString m_matchString;
|
||||
|
||||
|
@ -341,7 +341,7 @@ void AdBlockEasyList::saveDownloadedData(QByteArray &data)
|
||||
|
||||
// Third-party advertisers rules are with start domain (||) placeholder which needs regexps
|
||||
// So we are ignoring it for keeping good performance
|
||||
data = data.left(data.indexOf("!---------------------------Third-party advertisers"));
|
||||
data = data.left(data.indexOf("!-----------------------------Third-party adverts-----------------------------!"));
|
||||
|
||||
file.write(data);
|
||||
file.close();
|
||||
|
Loading…
Reference in New Issue
Block a user