2011-03-27 21:59:40 +02:00
|
|
|
/* ============================================================
|
|
|
|
* QupZilla - WebKit based browser
|
2012-01-01 15:29:55 +01:00
|
|
|
* Copyright (C) 2010-2012 David Rosca <nowrep@gmail.com>
|
2011-03-27 21:59:40 +02:00
|
|
|
*
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
* ============================================================ */
|
|
|
|
/**
|
|
|
|
* Copyright (c) 2009, Zsombor Gegesy <gzsombor@gmail.com>
|
|
|
|
* Copyright (c) 2009, Benjamin C. Meyer <ben@meyerhome.net>
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. Neither the name of the Benjamin Meyer nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "adblockrule.h"
|
|
|
|
#include "adblocksubscription.h"
|
|
|
|
|
2012-02-29 18:33:50 +01:00
|
|
|
#include <QDebug>
|
|
|
|
#include <QRegExp>
|
|
|
|
#include <QUrl>
|
|
|
|
#include <QString>
|
|
|
|
#include <QStringList>
|
2012-06-28 01:41:01 +02:00
|
|
|
#include <QNetworkRequest>
|
2012-07-01 12:07:00 +02:00
|
|
|
#include <QWebFrame>
|
|
|
|
#include <QWebPage>
|
2011-03-27 21:59:40 +02:00
|
|
|
|
2012-06-28 01:41:01 +02:00
|
|
|
// Version for Qt < 4.8 has one issue, it will wrongly
|
|
|
|
// count .co.uk (and others) as second-level domain
|
|
|
|
QString toSecondLevelDomain(const QUrl &url)
|
|
|
|
{
|
|
|
|
#if QT_VERSION >= 0x040800
|
|
|
|
const QString &topLevelDomain = url.topLevelDomain();
|
|
|
|
const QString &urlHost = url.host();
|
|
|
|
|
|
|
|
if (topLevelDomain.isEmpty() || urlHost.isEmpty()) {
|
|
|
|
return QString();
|
|
|
|
}
|
|
|
|
|
|
|
|
QString domain = urlHost.left(urlHost.size() - topLevelDomain.size());
|
|
|
|
|
2012-09-04 11:24:41 +02:00
|
|
|
if (domain.count(QLatin1Char('.')) == 0) {
|
2012-06-28 01:41:01 +02:00
|
|
|
return urlHost;
|
|
|
|
}
|
|
|
|
|
2012-09-04 11:24:41 +02:00
|
|
|
while (domain.count(QLatin1Char('.')) != 0) {
|
|
|
|
domain = domain.mid(domain.indexOf(QLatin1Char('.')) + 1);
|
2012-06-28 01:41:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return domain + topLevelDomain;
|
|
|
|
#else
|
|
|
|
QString domain = url.host();
|
|
|
|
|
2012-09-04 11:24:41 +02:00
|
|
|
if (domain.count(QLatin1Char('.')) == 0) {
|
2012-06-28 01:41:01 +02:00
|
|
|
return QString();
|
|
|
|
}
|
|
|
|
|
2012-09-04 11:24:41 +02:00
|
|
|
while (domain.count(QLatin1Char('.')) != 1) {
|
|
|
|
domain = domain.mid(domain.indexOf(QLatin1Char('.')) + 1);
|
2012-06-28 01:41:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return domain;
|
|
|
|
#endif
|
|
|
|
}
|
2011-03-27 21:59:40 +02:00
|
|
|
|
2012-07-01 18:11:43 +02:00
|
|
|
AdBlockRule::AdBlockRule(const QString &filter, AdBlockSubscription* subscription)
|
|
|
|
: m_subscription(subscription)
|
|
|
|
, m_enabled(true)
|
2012-06-25 16:07:25 +02:00
|
|
|
, m_cssRule(false)
|
|
|
|
, m_exception(false)
|
|
|
|
, m_internalDisabled(false)
|
|
|
|
, m_domainRestricted(false)
|
|
|
|
, m_useRegExp(false)
|
2012-07-01 14:44:01 +02:00
|
|
|
, m_useDomainMatch(false)
|
|
|
|
, m_useEndsMatch(false)
|
2012-06-28 01:41:01 +02:00
|
|
|
, m_thirdParty(false)
|
|
|
|
, m_thirdPartyException(false)
|
2012-07-01 12:07:00 +02:00
|
|
|
, m_object(false)
|
|
|
|
, m_objectException(false)
|
|
|
|
, m_subdocument(false)
|
|
|
|
, m_subdocumentException(false)
|
|
|
|
, m_xmlhttprequest(false)
|
|
|
|
, m_xmlhttprequestException(false)
|
2012-07-04 16:00:53 +02:00
|
|
|
, m_document(false)
|
|
|
|
, m_elemhide(false)
|
2012-06-25 16:07:25 +02:00
|
|
|
, m_caseSensitivity(Qt::CaseInsensitive)
|
2011-03-27 21:59:40 +02:00
|
|
|
{
|
|
|
|
setFilter(filter);
|
|
|
|
}
|
|
|
|
|
2012-07-01 18:11:43 +02:00
|
|
|
AdBlockSubscription* AdBlockRule::subscription() const
|
|
|
|
{
|
|
|
|
return m_subscription;
|
|
|
|
}
|
|
|
|
|
|
|
|
void AdBlockRule::setSubscription(AdBlockSubscription* subscription)
|
|
|
|
{
|
|
|
|
m_subscription = subscription;
|
|
|
|
}
|
|
|
|
|
2011-03-27 21:59:40 +02:00
|
|
|
QString AdBlockRule::filter() const
|
|
|
|
{
|
|
|
|
return m_filter;
|
|
|
|
}
|
|
|
|
|
|
|
|
void AdBlockRule::setFilter(const QString &filter)
|
|
|
|
{
|
|
|
|
m_filter = filter;
|
2012-06-25 16:07:25 +02:00
|
|
|
parseFilter();
|
2011-03-27 21:59:40 +02:00
|
|
|
}
|
|
|
|
|
2012-06-25 16:07:25 +02:00
|
|
|
bool AdBlockRule::isCssRule() const
|
2011-03-27 21:59:40 +02:00
|
|
|
{
|
2012-06-25 16:07:25 +02:00
|
|
|
return m_cssRule;
|
|
|
|
}
|
2011-03-27 21:59:40 +02:00
|
|
|
|
2012-06-25 16:07:25 +02:00
|
|
|
QString AdBlockRule::cssSelector() const
|
|
|
|
{
|
|
|
|
return m_cssSelector;
|
2011-03-27 21:59:40 +02:00
|
|
|
}
|
|
|
|
|
2012-07-04 16:00:53 +02:00
|
|
|
bool AdBlockRule::isDocument() const
|
|
|
|
{
|
|
|
|
return m_document;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AdBlockRule::isElemhide() const
|
|
|
|
{
|
|
|
|
return m_elemhide;
|
|
|
|
}
|
|
|
|
|
2012-06-25 16:07:25 +02:00
|
|
|
bool AdBlockRule::isDomainRestricted() const
|
2011-03-27 21:59:40 +02:00
|
|
|
{
|
2012-06-25 16:07:25 +02:00
|
|
|
return m_domainRestricted;
|
2011-03-27 21:59:40 +02:00
|
|
|
}
|
|
|
|
|
2012-06-25 16:07:25 +02:00
|
|
|
bool AdBlockRule::isException() const
|
2011-03-27 21:59:40 +02:00
|
|
|
{
|
2012-06-25 16:07:25 +02:00
|
|
|
return m_exception;
|
2011-03-27 21:59:40 +02:00
|
|
|
}
|
|
|
|
|
2012-07-01 20:11:37 +02:00
|
|
|
bool AdBlockRule::isComment() const
|
|
|
|
{
|
2012-09-04 12:42:45 +02:00
|
|
|
return m_filter.startsWith(QLatin1Char('!'));
|
2012-07-01 20:11:37 +02:00
|
|
|
}
|
|
|
|
|
2011-03-27 21:59:40 +02:00
|
|
|
bool AdBlockRule::isEnabled() const
|
|
|
|
{
|
|
|
|
return m_enabled;
|
|
|
|
}
|
|
|
|
|
|
|
|
void AdBlockRule::setEnabled(bool enabled)
|
|
|
|
{
|
|
|
|
m_enabled = enabled;
|
|
|
|
}
|
|
|
|
|
2012-07-01 14:44:01 +02:00
|
|
|
bool AdBlockRule::isSlow() const
|
|
|
|
{
|
|
|
|
return m_useRegExp;
|
|
|
|
}
|
|
|
|
|
2012-06-25 16:07:25 +02:00
|
|
|
bool AdBlockRule::isInternalDisabled() const
|
2011-03-27 21:59:40 +02:00
|
|
|
{
|
2012-06-25 16:07:25 +02:00
|
|
|
return m_internalDisabled;
|
|
|
|
}
|
|
|
|
|
2012-06-28 01:41:01 +02:00
|
|
|
bool AdBlockRule::networkMatch(const QNetworkRequest &request, const QString &domain, const QString &encodedUrl) const
|
2012-06-25 16:07:25 +02:00
|
|
|
{
|
|
|
|
if (m_cssRule || !m_enabled || m_internalDisabled) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2012-06-28 01:41:01 +02:00
|
|
|
bool matched = false;
|
2012-06-25 16:07:25 +02:00
|
|
|
|
2012-08-09 19:02:25 +02:00
|
|
|
if (m_useDomainMatch) {
|
2012-09-01 11:41:12 +02:00
|
|
|
matched = _matchDomain(domain, m_matchString);
|
2012-07-01 14:44:01 +02:00
|
|
|
}
|
|
|
|
else if (m_useEndsMatch) {
|
|
|
|
matched = encodedUrl.endsWith(m_matchString, m_caseSensitivity);
|
|
|
|
}
|
2012-08-09 19:02:25 +02:00
|
|
|
else if (m_useRegExp) {
|
|
|
|
matched = (m_regExp.indexIn(encodedUrl) != -1);
|
|
|
|
}
|
2012-06-28 01:41:01 +02:00
|
|
|
else {
|
|
|
|
matched = encodedUrl.contains(m_matchString, m_caseSensitivity);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (matched) {
|
|
|
|
// Check domain restrictions
|
|
|
|
if (m_domainRestricted && !matchDomain(domain)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check third-party restriction
|
|
|
|
if (m_thirdParty && !matchThirdParty(request)) {
|
|
|
|
return false;
|
|
|
|
}
|
2012-07-01 12:07:00 +02:00
|
|
|
|
|
|
|
// Check object restrictions
|
|
|
|
if (m_object && !matchObject(request)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check subdocument restriction
|
|
|
|
if (m_subdocument && !matchSubdocument(request)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check xmlhttprequest restriction
|
|
|
|
if (m_xmlhttprequest && !matchXmlHttpRequest(request)) {
|
|
|
|
return false;
|
|
|
|
}
|
2012-06-25 16:07:25 +02:00
|
|
|
}
|
|
|
|
|
2012-06-28 01:41:01 +02:00
|
|
|
return matched;
|
2011-03-27 21:59:40 +02:00
|
|
|
}
|
|
|
|
|
2012-07-04 16:00:53 +02:00
|
|
|
bool AdBlockRule::urlMatch(const QUrl &url) const
|
|
|
|
{
|
|
|
|
if (!m_document && !m_elemhide) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
const QString &encodedUrl = url.toEncoded();
|
|
|
|
const QString &domain = url.host();
|
|
|
|
|
|
|
|
return networkMatch(QNetworkRequest(url), domain, encodedUrl);
|
|
|
|
}
|
|
|
|
|
2012-06-25 16:07:25 +02:00
|
|
|
bool AdBlockRule::matchDomain(const QString &domain) const
|
2011-11-06 17:01:23 +01:00
|
|
|
{
|
2012-07-04 17:53:49 +02:00
|
|
|
if (!m_enabled) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2012-06-25 16:07:25 +02:00
|
|
|
if (!m_domainRestricted) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (m_blockedDomains.isEmpty()) {
|
|
|
|
foreach(const QString & d, m_allowedDomains) {
|
2012-09-01 11:41:12 +02:00
|
|
|
if (_matchDomain(domain, d)) {
|
2012-06-25 16:07:25 +02:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (m_allowedDomains.isEmpty()) {
|
|
|
|
foreach(const QString & d, m_blockedDomains) {
|
2012-09-01 11:41:12 +02:00
|
|
|
if (_matchDomain(domain, d)) {
|
2012-06-25 16:07:25 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
foreach(const QString & d, m_blockedDomains) {
|
2012-09-01 11:41:12 +02:00
|
|
|
if (_matchDomain(domain, d)) {
|
2012-06-25 16:07:25 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
foreach(const QString & d, m_allowedDomains) {
|
2012-09-01 11:41:12 +02:00
|
|
|
if (_matchDomain(domain, d)) {
|
2012-06-25 16:07:25 +02:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
2011-03-27 21:59:40 +02:00
|
|
|
}
|
|
|
|
|
2012-06-28 01:41:01 +02:00
|
|
|
bool AdBlockRule::matchThirdParty(const QNetworkRequest &request) const
|
|
|
|
{
|
2012-07-13 11:04:14 +02:00
|
|
|
const QString &referer = request.attribute(QNetworkRequest::Attribute(QNetworkRequest::User + 151), QString()).toString();
|
|
|
|
|
2012-06-28 01:41:01 +02:00
|
|
|
if (referer.isEmpty()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Third-party matching should be performed on second-level domains
|
|
|
|
const QString &refererHost = toSecondLevelDomain(QUrl(referer));
|
|
|
|
const QString &host = toSecondLevelDomain(request.url());
|
|
|
|
|
2012-07-01 12:07:00 +02:00
|
|
|
bool match = refererHost != host;
|
|
|
|
|
|
|
|
return m_thirdPartyException ? !match : match;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AdBlockRule::matchObject(const QNetworkRequest &request) const
|
|
|
|
{
|
2012-09-04 11:24:41 +02:00
|
|
|
bool match = request.attribute(QNetworkRequest::Attribute(QNetworkRequest::User + 150)).toString() == QLatin1String("object");
|
2012-07-01 12:07:00 +02:00
|
|
|
|
|
|
|
return m_objectException ? !match : match;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AdBlockRule::matchSubdocument(const QNetworkRequest &request) const
|
|
|
|
{
|
|
|
|
QWebFrame* originatingFrame = static_cast<QWebFrame*>(request.originatingObject());
|
|
|
|
if (!originatingFrame) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
QWebPage* page = originatingFrame->page();
|
|
|
|
if (!page) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2012-07-13 14:12:48 +02:00
|
|
|
bool match = !(originatingFrame == page->mainFrame());
|
2012-07-01 12:07:00 +02:00
|
|
|
|
|
|
|
return m_subdocumentException ? !match : match;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AdBlockRule::matchXmlHttpRequest(const QNetworkRequest &request) const
|
|
|
|
{
|
|
|
|
bool match = request.rawHeader("X-Requested-With") == QByteArray("XMLHttpRequest");
|
|
|
|
|
|
|
|
return m_xmlhttprequestException ? !match : match;
|
2012-06-28 01:41:01 +02:00
|
|
|
}
|
|
|
|
|
2012-06-25 16:07:25 +02:00
|
|
|
void AdBlockRule::parseFilter()
|
2011-03-27 21:59:40 +02:00
|
|
|
{
|
2012-06-25 16:07:25 +02:00
|
|
|
QString parsedLine = m_filter;
|
|
|
|
|
2012-07-01 20:11:37 +02:00
|
|
|
// Empty rule or just comment
|
2012-09-04 11:24:41 +02:00
|
|
|
if (m_filter.trimmed().isEmpty() || m_filter.startsWith(QLatin1Char('!'))) {
|
2012-06-25 16:07:25 +02:00
|
|
|
m_enabled = false;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// CSS Element hiding rule
|
2012-09-04 11:24:41 +02:00
|
|
|
if (parsedLine.contains(QLatin1String("##"))) {
|
2012-06-25 16:07:25 +02:00
|
|
|
m_cssRule = true;
|
2012-09-04 11:24:41 +02:00
|
|
|
int pos = parsedLine.indexOf(QLatin1String("##"));
|
2012-06-25 16:07:25 +02:00
|
|
|
|
|
|
|
// Domain restricted rule
|
2012-09-04 11:24:41 +02:00
|
|
|
if (!parsedLine.startsWith(QLatin1String("##"))) {
|
2012-07-01 18:11:43 +02:00
|
|
|
QString domains = parsedLine.left(pos);
|
2012-09-04 11:24:41 +02:00
|
|
|
parseDomains(domains, QLatin1Char(','));
|
2012-06-25 16:07:25 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
m_cssSelector = parsedLine.mid(pos + 2);
|
|
|
|
// CSS rule cannot have more options -> stop parsing
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Exception always starts with @@
|
2012-09-04 11:24:41 +02:00
|
|
|
if (parsedLine.startsWith(QLatin1String("@@"))) {
|
2012-06-25 16:07:25 +02:00
|
|
|
m_exception = true;
|
|
|
|
parsedLine = parsedLine.mid(2);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Parse all options following $ char
|
2012-09-04 11:24:41 +02:00
|
|
|
int optionsIndex = parsedLine.indexOf(QLatin1Char('$'));
|
2012-06-25 16:07:25 +02:00
|
|
|
if (optionsIndex >= 0) {
|
2012-09-04 11:24:41 +02:00
|
|
|
QStringList options = parsedLine.mid(optionsIndex + 1).split(QLatin1Char(','));
|
2012-06-25 16:07:25 +02:00
|
|
|
|
|
|
|
int handledOptions = 0;
|
|
|
|
foreach(const QString & option, options) {
|
2012-09-04 11:24:41 +02:00
|
|
|
if (option.startsWith(QLatin1String("domain="))) {
|
|
|
|
parseDomains(option.mid(7), QLatin1Char('|'));
|
2012-06-25 16:07:25 +02:00
|
|
|
++handledOptions;
|
|
|
|
}
|
2012-09-04 11:24:41 +02:00
|
|
|
else if (option == QLatin1String("match-case")) {
|
2012-06-25 16:07:25 +02:00
|
|
|
m_caseSensitivity = Qt::CaseSensitive;
|
|
|
|
++handledOptions;
|
|
|
|
}
|
2012-09-04 11:24:41 +02:00
|
|
|
else if (option.endsWith(QLatin1String("third-party"))) {
|
2012-06-28 01:41:01 +02:00
|
|
|
m_thirdParty = true;
|
2012-09-04 11:24:41 +02:00
|
|
|
m_thirdPartyException = option.startsWith(QLatin1Char('~'));
|
2012-06-25 16:07:25 +02:00
|
|
|
++handledOptions;
|
|
|
|
}
|
2012-09-04 11:24:41 +02:00
|
|
|
else if (option.endsWith(QLatin1String("object"))) {
|
2012-07-01 12:07:00 +02:00
|
|
|
m_object = true;
|
2012-09-04 11:24:41 +02:00
|
|
|
m_objectException = option.startsWith(QLatin1Char('~'));
|
2012-07-01 12:07:00 +02:00
|
|
|
++handledOptions;
|
|
|
|
}
|
2012-09-04 11:24:41 +02:00
|
|
|
else if (option.endsWith(QLatin1String("subdocument"))) {
|
2012-07-01 12:07:00 +02:00
|
|
|
m_subdocument = true;
|
2012-09-04 12:42:45 +02:00
|
|
|
m_subdocumentException = option.startsWith(QLatin1Char('~'));
|
2012-07-01 12:07:00 +02:00
|
|
|
++handledOptions;
|
|
|
|
}
|
2012-09-04 11:24:41 +02:00
|
|
|
else if (option.endsWith(QLatin1String("xmlhttprequest"))) {
|
2012-07-01 12:07:00 +02:00
|
|
|
m_xmlhttprequest = true;
|
2012-09-04 11:24:41 +02:00
|
|
|
m_xmlhttprequestException = option.startsWith(QLatin1Char('~'));
|
2012-07-01 12:07:00 +02:00
|
|
|
++handledOptions;
|
|
|
|
}
|
2012-09-04 11:24:41 +02:00
|
|
|
else if (option == QLatin1String("document") && m_exception) {
|
2012-07-04 16:00:53 +02:00
|
|
|
m_document = true;
|
|
|
|
++handledOptions;
|
|
|
|
}
|
2012-09-04 11:24:41 +02:00
|
|
|
else if (option == QLatin1String("elemhide") && m_exception) {
|
2012-07-04 16:00:53 +02:00
|
|
|
m_elemhide = true;
|
|
|
|
++handledOptions;
|
|
|
|
}
|
2012-09-04 11:24:41 +02:00
|
|
|
else if (option == QLatin1String("collapse")) {
|
2012-07-04 10:08:55 +02:00
|
|
|
// Hiding placeholders of blocked elements
|
|
|
|
++handledOptions;
|
|
|
|
}
|
2012-06-25 16:07:25 +02:00
|
|
|
}
|
|
|
|
|
2012-07-01 12:07:00 +02:00
|
|
|
// If we don't handle all options, it's safer to just disable this rule
|
2012-06-25 16:07:25 +02:00
|
|
|
if (handledOptions != options.count()) {
|
|
|
|
m_internalDisabled = true;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
parsedLine = parsedLine.left(optionsIndex);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Rule is classic regexp
|
2012-09-04 11:24:41 +02:00
|
|
|
if (parsedLine.startsWith(QLatin1Char('/')) && parsedLine.endsWith(QLatin1Char('/'))) {
|
2012-06-25 16:07:25 +02:00
|
|
|
parsedLine = parsedLine.mid(1);
|
|
|
|
parsedLine = parsedLine.left(parsedLine.size() - 1);
|
|
|
|
|
|
|
|
m_useRegExp = true;
|
|
|
|
m_regExp = QRegExp(parsedLine, m_caseSensitivity, QRegExp::RegExp);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Remove starting and ending wildcards (*)
|
2012-09-04 11:24:41 +02:00
|
|
|
if (parsedLine.startsWith(QLatin1Char('*'))) {
|
2012-06-25 16:07:25 +02:00
|
|
|
parsedLine = parsedLine.mid(1);
|
|
|
|
}
|
|
|
|
|
2012-09-04 11:24:41 +02:00
|
|
|
if (parsedLine.endsWith(QLatin1Char('*'))) {
|
2012-06-25 16:07:25 +02:00
|
|
|
parsedLine = parsedLine.left(parsedLine.size() - 1);
|
|
|
|
}
|
|
|
|
|
2012-07-01 14:44:01 +02:00
|
|
|
// We can use fast string matching for domain here
|
2012-09-04 11:24:41 +02:00
|
|
|
if (parsedLine.startsWith(QLatin1String("||")) && parsedLine.endsWith(QLatin1Char('^'))
|
|
|
|
&& !parsedLine.contains(QRegExp("[/:?=&\\*]"))) {
|
2012-07-01 14:44:01 +02:00
|
|
|
parsedLine = parsedLine.mid(2);
|
|
|
|
parsedLine = parsedLine.left(parsedLine.size() - 1);
|
|
|
|
|
|
|
|
m_useDomainMatch = true;
|
|
|
|
m_matchString = parsedLine;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If rule contains only | at end, we can also use string matching
|
2012-09-04 11:24:41 +02:00
|
|
|
if (parsedLine.endsWith(QLatin1Char('|')) && !parsedLine.contains(QRegExp("[\\^\\*]"))
|
|
|
|
&& parsedLine.count(QLatin1Char('|')) == 1) {
|
2012-07-01 14:44:01 +02:00
|
|
|
parsedLine = parsedLine.left(parsedLine.size() - 1);
|
|
|
|
|
|
|
|
m_useEndsMatch = true;
|
|
|
|
m_matchString = parsedLine;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2012-06-28 01:41:01 +02:00
|
|
|
// If we still find a wildcard (*) or separator (^) or (|)
|
2012-06-25 16:07:25 +02:00
|
|
|
// we must modify parsedLine to comply with QRegExp
|
2012-09-04 11:24:41 +02:00
|
|
|
if (parsedLine.contains(QLatin1Char('*')) || parsedLine.contains(QLatin1Char('^'))
|
|
|
|
|| parsedLine.contains(QLatin1Char('|'))) {
|
2012-06-25 16:07:25 +02:00
|
|
|
parsedLine.replace(QRegExp(QLatin1String("\\*+")), QLatin1String("*")) // remove multiple wildcards
|
|
|
|
.replace(QRegExp(QLatin1String("\\^\\|$")), QLatin1String("^")) // remove anchors following separator placeholder
|
2012-09-02 11:42:41 +02:00
|
|
|
.replace(QRegExp(QLatin1String("^(\\*)")), QString()) // remove leading wildcards
|
|
|
|
.replace(QRegExp(QLatin1String("(\\*)$")), QString())
|
2012-06-25 16:07:25 +02:00
|
|
|
.replace(QRegExp(QLatin1String("(\\W)")), QLatin1String("\\\\1")) // escape special symbols
|
|
|
|
.replace(QRegExp(QLatin1String("^\\\\\\|\\\\\\|")),
|
|
|
|
QLatin1String("^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?")) // process extended anchor at expression start
|
|
|
|
.replace(QRegExp(QLatin1String("\\\\\\^")),
|
|
|
|
QLatin1String("(?:[^\\w\\d\\-.%]|$)")) // process separator placeholders
|
|
|
|
.replace(QRegExp(QLatin1String("^\\\\\\|")), QLatin1String("^")) // process anchor at expression start
|
|
|
|
.replace(QRegExp(QLatin1String("\\\\\\|$")), QLatin1String("$")) // process anchor at expression end
|
|
|
|
.replace(QRegExp(QLatin1String("\\\\\\*")), QLatin1String(".*")); // replace wildcards by .*
|
|
|
|
|
|
|
|
m_useRegExp = true;
|
|
|
|
m_regExp = QRegExp(parsedLine, m_caseSensitivity, QRegExp::RegExp);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// We haven't found anything that needs use of regexp, yay!
|
|
|
|
m_useRegExp = false;
|
|
|
|
m_matchString = parsedLine;
|
2011-03-27 21:59:40 +02:00
|
|
|
}
|
|
|
|
|
2012-06-25 16:07:25 +02:00
|
|
|
void AdBlockRule::parseDomains(const QString &domains, const QChar &separator)
|
|
|
|
{
|
|
|
|
QStringList domainsList = domains.split(separator);
|
|
|
|
|
|
|
|
foreach(const QString domain, domainsList) {
|
|
|
|
if (domain.isEmpty()) {
|
|
|
|
continue;
|
|
|
|
}
|
2012-09-04 12:42:45 +02:00
|
|
|
if (domain.startsWith(QLatin1Char('~'))) {
|
2012-06-25 16:07:25 +02:00
|
|
|
m_blockedDomains.append(domain.mid(1));
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
m_allowedDomains.append(domain);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
m_domainRestricted = (!m_blockedDomains.isEmpty() || !m_allowedDomains.isEmpty());
|
|
|
|
}
|
2012-09-01 11:41:12 +02:00
|
|
|
|
|
|
|
bool AdBlockRule::_matchDomain(const QString &domain, const QString &filter) const
|
|
|
|
{
|
|
|
|
if (!domain.endsWith(filter)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
int index = domain.indexOf(filter);
|
|
|
|
|
2012-09-04 11:24:41 +02:00
|
|
|
if (index == 0 || filter[0] == QLatin1Char('.')) {
|
2012-09-01 11:41:12 +02:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2012-09-04 11:24:41 +02:00
|
|
|
return domain[index - 1] == QLatin1Char('.');
|
2012-09-01 11:41:12 +02:00
|
|
|
}
|