• Skip to content
  • Skip to link menu
  • KDE API Reference
  • kdelibs-4.14.8 API Reference
  • KDE Home
  • Contact Us
 

KHTML

  • khtml
khtml_filter.cpp
Go to the documentation of this file.
1 /* This file is part of the KDE project
2 
3  Copyright (C) 2005 Ivor Hewitt <ivor@kde.org>
4  Copyright (C) 2008 Maksim Orlovich <maksim@kde.org>
5  Copyright (C) 2008 Vyacheslav Tokarev <tsjoker@gmail.com>
6 
7  This library is free software; you can redistribute it and/or
8  modify it under the terms of the GNU Library General Public
9  License as published by the Free Software Foundation; either
10  version 2 of the License, or (at your option) any later version.
11 
12  This library is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  Library General Public License for more details.
16 
17  You should have received a copy of the GNU Library General Public License
18  along with this library; see the file COPYING.LIB. If not, write to
19  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20  Boston, MA 02110-1301, USA.
21 */
22 
23 #include "khtml_filter_p.h"
24 #include <QDebug>
25 
26 // rolling hash parameters
27 #define HASH_P (1997)
28 #define HASH_Q (17509)
29 // HASH_MOD = (HASH_P^7) % HASH_Q
30 #define HASH_MOD (523)
31 
32 namespace khtml {
33 
34 // We only want a subset of features of wildcards -- just the
35 // star, so we escape the rest before passing to QRegExp.
36 // The \ is escaped due to a QRegExp bug.
37 // ### we really should rather parse it ourselves, in order to
38 // handle adblock-special things like | and ^ properly.
39 static QRegExp fromAdBlockWildcard(const QString& wcStr) {
40  QRegExp rx;
41  rx.setPatternSyntax(QRegExp::Wildcard);
42 
43  QString out;
44  for (int p = 0; p < wcStr.length(); ++p) {
45  QChar c = wcStr[p];
46  if (c == QLatin1Char('?'))
47  out += QLatin1String("[?]");
48  else if (c == QLatin1Char('['))
49  out += QLatin1String("[[]");
50  else if (c == QLatin1Char('\\'))
51  out += QLatin1String("[\\]");
52  else
53  out += c;
54  }
55 
56  rx.setPattern(out);
57  return rx;
58 }
59 
60 void FilterSet::addFilter(const QString& filterStr)
61 {
62  QString filter = filterStr.trimmed();
63  if (filter.isEmpty()) {
64  return;
65  }
66 
68  const QChar firstChar = filter.at(0);
69  if (firstChar == QLatin1Char('[') || firstChar == QLatin1Char('!') || firstChar == QLatin1Char('#') || filter.contains(QLatin1Char('#')))
70  return;
71 
72  // Strip leading @@
73  if (filter.startsWith(QLatin1String("@@"))) {
74  filter.remove(0, 2);
75  }
76 
77  // Strip options, we ignore them for now.
78  bool hadOptions = false;
79  int dollar = filter.lastIndexOf(QLatin1Char('$'));
80  if (dollar != -1) {
81  // Is it adblock's options delimiter or the special '$' char in a regular expression?
82  if (!filter.startsWith(QLatin1Char('/')) || !filter.endsWith(QLatin1Char('/'))) {
83  filter = filter.mid(0, dollar);
84  hadOptions = true;
85  }
86  }
87 
88  // Is it a regexp filter?
89  if (filter.length() > 2 && filter.startsWith(QLatin1Char('/')) && filter.endsWith(QLatin1Char('/'))) {
90  // Ignore regexp that had options as we do not support them
91  if (hadOptions) {
92  return;
93  }
94  QString inside = filter.mid(1, filter.length()-2);
95  QRegExp rx(inside);
96  reFilters.append(rx);
97  //qDebug() << "R:" << inside;
98  return;
99  }
100  // Nope, a wildcard one.
101 
102  // Disregard the rule if only one char is left or it contains unsupported adblock features ('|', "||", '^')
103  if (filter.length() < 2 || filter.contains(QLatin1Char('|')) || filter.contains(QLatin1Char('^'))) {
104  return;
105  }
106 
107  // Strip wildcards at the ends
108  int first = 0;
109  int last = filter.length() - 1;
110 
111  while (first < filter.length() && filter[first] == QLatin1Char('*'))
112  ++first;
113 
114  while (last >= 0 && filter[last] == QLatin1Char('*'))
115  --last;
116 
117  if (first > last)
118  filter = QLatin1String("*"); // erm... Well, they asked for it.
119  else
120  filter = filter.mid(first, last - first + 1);
121 
122  // Now, do we still have any wildcard stuff left?
123  int aPos = filter.indexOf('*');
124  if (aPos != -1) {
125  // check if we can use RK first (and then check full RE for the rest) for better performance
126  if (aPos > 7) {
127  QRegExp rx = fromAdBlockWildcard(filter.mid(aPos) + QLatin1Char('*'));
128  // We pad the final r.e. with * so we can check for an exact match
129  stringFiltersMatcher.addWildedString(filter.mid(0, aPos), rx);
130  } else {
131  QRegExp rx = fromAdBlockWildcard(filter);
132  reFilters.append(rx);
133  }
134  } else {
135  // Fast path
136  stringFiltersMatcher.addString(filter);
137  }
138 
139 }
140 
141 bool FilterSet::isUrlMatched(const QString& url)
142 {
143  if (stringFiltersMatcher.isMatched(url))
144  return true;
145 
146  for (int c = 0; c < reFilters.size(); ++c)
147  {
148  if (url.contains(reFilters[c]))
149  return true;
150  }
151 
152  return false;
153 }
154 
155 QString FilterSet::urlMatchedBy(const QString& url)
156 {
157  QString by;
158 
159  if (stringFiltersMatcher.isMatched(url, &by))
160  return by;
161 
162  for (int c = 0; c < reFilters.size(); ++c)
163  {
164  if (url.contains(reFilters[c]))
165  {
166  by = reFilters[c].pattern();
167  break;
168  }
169  }
170 
171  return by;
172 }
173 
174 void FilterSet::clear()
175 {
176  reFilters.clear();
177  stringFiltersMatcher.clear();
178 }
179 
180 
181 void StringsMatcher::addString(const QString& pattern)
182 {
183  if (pattern.length() < 8) {
184  // handle short string differently
185  shortStringFilters.append(pattern);
186  } else {
187  // use modified Rabin-Karp's algorithm with 8-length string hash
188  // i.e. store hash of first 8 chars in the HashMap for fast look-up
189  stringFilters.append(pattern);
190  int ind = stringFilters.size() - 1;
191  int current = 0;
192 
193  // compute hash using rolling hash
194  // hash for string: x0,x1,x2...xn-1 will be:
195  // (p^(n-1)*x0 + p^(n-2)*x1 + ... + p * xn-2 + xn-1) % q
196  // where p and q some wisely-chosen integers
197  /*for (int k = 0; k < 8; ++k)*/
198  int len = pattern.length();
199  for (int k = len - 8; k < len; ++k)
200  current = (current * HASH_P + pattern[k].unicode()) % HASH_Q;
201 
202  // insert computed hash value into HashMap
203  WTF::HashMap<int, QVector<int> >::iterator it = stringFiltersHash.find(current + 1);
204  if (it == stringFiltersHash.end()) {
205  QVector<int> list;
206  list.append(ind);
207  stringFiltersHash.add(current + 1, list);
208  fastLookUp.setBit(current);
209  } else {
210  it->second.append(ind);
211  }
212  }
213 }
214 
215 void StringsMatcher::addWildedString(const QString& prefix, const QRegExp& rx)
216 {
217  rePrefixes.append(prefix);
218  reFilters.append(rx);
219  int index = -rePrefixes.size();
220 
221  int current = 0;
222  for (int k = 0; k < 8; ++k)
223  current = (current * HASH_P + prefix[k].unicode()) % HASH_Q;
224 
225  // insert computed hash value into HashMap
226  WTF::HashMap<int, QVector<int> >::iterator it = stringFiltersHash.find(current + 1);
227  if (it == stringFiltersHash.end()) {
228  QVector<int> list;
229  list.append(index);
230  stringFiltersHash.add(current + 1, list);
231  fastLookUp.setBit(current);
232  } else {
233  it->second.append(index);
234  }
235 }
236 
237 bool StringsMatcher::isMatched(const QString& str, QString *by) const
238 {
239  // check short strings first
240  for (int i = 0; i < shortStringFilters.size(); ++i) {
241  if (str.contains(shortStringFilters[i]))
242  {
243  if (by != 0) *by = shortStringFilters[i];
244  return true;
245  }
246  }
247 
248  int len = str.length();
249  int k;
250 
251  int current = 0;
252  int next = 0;
253  // compute hash for first 8 characters
254  for (k = 0; k < 8 && k < len; ++k)
255  current = (current * HASH_P + str[k].unicode()) % HASH_Q;
256 
257  WTF::HashMap<int, QVector<int> >::const_iterator hashEnd = stringFiltersHash.end();
258  // main Rabin-Karp's algorithm loop
259  for (k = 7; k < len; ++k, current = next) {
260  // roll the hash if not at the end
261  // (calculate hash for the next iteration)
262  if (k + 1 < len)
263  next = (HASH_P * ((current + HASH_Q - ((HASH_MOD * str[k - 7].unicode()) % HASH_Q)) % HASH_Q) + str[k + 1].unicode()) % HASH_Q;
264 
265  if (!fastLookUp.testBit(current))
266  continue;
267 
268  // look-up the hash in the HashMap and check all strings
269  WTF::HashMap<int, QVector<int> >::const_iterator it = stringFiltersHash.find(current + 1);
270 
271  // check possible strings
272  if (it != hashEnd) {
273  for (int j = 0; j < it->second.size(); ++j) {
274  int index = it->second[j];
275  // check if we got simple string or REs prefix
276  if (index >= 0) {
277  int flen = stringFilters[index].length();
278  if (k - flen + 1 >= 0 && stringFilters[index] == str.midRef(k - flen + 1 , flen))
279  {
280  if (by != 0) *by = stringFilters[index];
281  return true;
282  }
283  } else {
284  index = -index - 1;
285  int flen = rePrefixes[index].length();
286  if (k - 8 + flen < len && rePrefixes[index] == str.midRef(k - 7, flen))
287  {
288  int remStart = k - 7 + flen;
289  QString remainder = QString::fromRawData(str.unicode() + remStart,
290  str.length() - remStart);
291  if (reFilters[index].exactMatch(remainder)) {
292  if (by != 0) *by = rePrefixes[index]+reFilters[index].pattern();
293  return true;
294  }
295  }
296  }
297  }
298  }
299  }
300 
301  return false;
302 }
303 
304 void StringsMatcher::clear()
305 {
306  stringFilters.clear();
307  shortStringFilters.clear();
308  reFilters.clear();
309  rePrefixes.clear();
310  stringFiltersHash.clear();
311  fastLookUp.resize(HASH_Q);
312  fastLookUp.fill(0, 0, HASH_Q);
313 }
314 
315 }
316 
317 // kate: indent-width 4; replace-tabs on; tab-width 4; space-indent on;
khtml::FilterSet::isUrlMatched
bool isUrlMatched(const QString &url)
Definition: khtml_filter.cpp:141
khtml::FilterSet::addFilter
void addFilter(const QString &filter)
Definition: khtml_filter.cpp:60
khtml::StringsMatcher::addWildedString
void addWildedString(const QString &prefix, const QRegExp &rx)
Definition: khtml_filter.cpp:215
khtml::StringsMatcher::addString
void addString(const QString &pattern)
Definition: khtml_filter.cpp:181
QString
QVector
Definition: khtmlview.h:36
khtml::FilterSet::urlMatchedBy
QString urlMatchedBy(const QString &url)
Definition: khtml_filter.cpp:155
khtml_filter_p.h
khtml::StringsMatcher::clear
void clear()
Definition: khtml_filter.cpp:304
HASH_Q
#define HASH_Q
Definition: khtml_filter.cpp:28
next
KAction * next(const QObject *recvr, const char *slot, QObject *parent)
HASH_P
#define HASH_P
Definition: khtml_filter.cpp:27
khtml::StringsMatcher::isMatched
bool isMatched(const QString &str, QString *by=0) const
Definition: khtml_filter.cpp:237
khtml::FilterSet::clear
void clear()
Definition: khtml_filter.cpp:174
HASH_MOD
#define HASH_MOD
Definition: khtml_filter.cpp:30
list
QStringList list(const QString &fileClass)
This file is part of the KDE documentation.
Documentation copyright © 1996-2015 The KDE developers.
Generated on Wed Nov 25 2015 21:19:45 by doxygen 1.8.5 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KHTML

Skip menu "KHTML"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Related Pages

kdelibs-4.14.8 API Reference

Skip menu "kdelibs-4.14.8 API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal