• Skip to content
  • Skip to link menu
  • KDE API Reference
  • kdelibs-4.14.38 API Reference
  • KDE Home
  • Contact Us
 

KDECore

  • kdecore
  • sycoca
ksycocadict.cpp
Go to the documentation of this file.
1/* This file is part of the KDE libraries
2 * Copyright (C) 1999 Waldo Bastian <bastian@kde.org>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License version 2 as published by the Free Software Foundation;
7 *
8 * This library is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Library General Public License for more details.
12 *
13 * You should have received a copy of the GNU Library General Public License
14 * along with this library; see the file COPYING.LIB. If not, write to
15 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
16 * Boston, MA 02110-1301, USA.
17 **/
18
19#include "ksycocadict_p.h"
20#include <kservice.h>
21#include "ksycocaentry.h"
22#include "ksycoca.h"
23#include "kdebug.h"
24
25#include <QtCore/QBitArray>
26
27namespace
28{
29struct string_entry {
30 string_entry(const QString& _key, const KSycocaEntry::Ptr& _payload)
31 : hash(0), length(_key.length()), keyStr(_key), key(keyStr.unicode()), payload(_payload)
32 {}
33 uint hash;
34 const int length;
35 const QString keyStr;
36 const QChar * const key; // always points to keyStr.unicode(); just an optimization
37 const KSycocaEntry::Ptr payload;
38};
39}
40
41class KSycocaDictStringList : public QList<string_entry*>
42{
43public:
44 ~KSycocaDictStringList() {
45 qDeleteAll(*this);
46 }
47};
48
49class KSycocaDict::Private
50{
51public:
52 Private()
53 : stringlist( 0 ),
54 stream( 0 ),
55 offset( 0 )
56 {
57 }
58
59 ~Private()
60 {
61 delete stringlist;
62 }
63
64 // Helper for find_string and findMultiString
65 qint32 offsetForKey(const QString& key) const;
66
67 // Calculate hash - can be used during loading and during saving.
68 quint32 hashKey(const QString & key) const;
69
70 KSycocaDictStringList *stringlist;
71 QDataStream *stream;
72 qint64 offset;
73 quint32 hashTableSize;
74 QList<qint32> hashList;
75};
76
77KSycocaDict::KSycocaDict()
78 : d( new Private )
79{
80}
81
82KSycocaDict::KSycocaDict(QDataStream *str, int offset)
83 : d( new Private )
84{
85 d->stream = str;
86 d->offset = offset;
87
88 quint32 test1, test2;
89 str->device()->seek(offset);
90 (*str) >> test1 >> test2;
91 if ((test1 > 0x000fffff) || (test2 > 1024))
92 {
93 KSycoca::flagError();
94 d->hashTableSize = 0;
95 d->offset = 0;
96 return;
97 }
98
99 str->device()->seek(offset);
100 (*str) >> d->hashTableSize;
101 (*str) >> d->hashList;
102 d->offset = str->device()->pos(); // Start of hashtable
103}
104
105KSycocaDict::~KSycocaDict()
106{
107 delete d;
108}
109
110void
111KSycocaDict::add(const QString &key, const KSycocaEntry::Ptr& payload)
112{
113 if (key.isEmpty()) return; // Not allowed (should never happen)
114 if (!payload) return; // Not allowed!
115 if (!d->stringlist)
116 {
117 d->stringlist = new KSycocaDictStringList;
118 }
119
120 string_entry *entry = new string_entry(key, payload);
121 d->stringlist->append(entry);
122}
123
124void
125KSycocaDict::remove(const QString &key)
126{
127 if (!d || !d->stringlist) {
128 return;
129 }
130
131 bool found = false;
132 for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it) {
133 string_entry* entry = *it;
134 if (entry->keyStr == key) {
135 d->stringlist->erase(it);
136 delete entry;
137 found = true;
138 break;
139 }
140 }
141 if (!found) {
142 kWarning(7011) << "key not found:" << key;
143 }
144}
145
146int KSycocaDict::find_string(const QString &key ) const
147{
148 Q_ASSERT(d);
149
150 //kDebug(7011) << QString("KSycocaDict::find_string(%1)").arg(key);
151 qint32 offset = d->offsetForKey(key);
152
153 //kDebug(7011) << QString("offset is %1").arg(offset,8,16);
154 if (offset == 0)
155 return 0;
156
157 if (offset > 0)
158 return offset; // Positive ID
159
160 // Lookup duplicate list.
161 offset = -offset;
162
163 d->stream->device()->seek(offset);
164 //kDebug(7011) << QString("Looking up duplicate list at %1").arg(offset,8,16);
165
166 while(true)
167 {
168 (*d->stream) >> offset;
169 if (offset == 0) break;
170 QString dupkey;
171 (*d->stream) >> dupkey;
172 //kDebug(7011) << QString(">> %1 %2").arg(offset,8,16).arg(dupkey);
173 if (dupkey == key) return offset;
174 }
175 //kWarning(7011) << "Not found!";
176
177 return 0;
178}
179
180
181QList<int> KSycocaDict::findMultiString(const QString &key ) const
182{
183 qint32 offset = d->offsetForKey(key);
184 QList<int> offsetList;
185 if (offset == 0)
186 return offsetList;
187
188 if (offset > 0) { // Positive ID: one entry found
189 offsetList.append(offset);
190 return offsetList;
191 }
192
193 // Lookup duplicate list.
194 offset = -offset;
195
196 d->stream->device()->seek(offset);
197 //kDebug(7011) << QString("Looking up duplicate list at %1").arg(offset,8,16);
198
199 while(true)
200 {
201 (*d->stream) >> offset;
202 if (offset == 0) break;
203 QString dupkey;
204 (*d->stream) >> dupkey;
205 //kDebug(7011) << QString(">> %1 %2").arg(offset,8,16).arg(dupkey);
206 if (dupkey == key)
207 offsetList.append(offset);
208 }
209 return offsetList;
210}
211
212uint KSycocaDict::count() const
213{
214 if ( !d || !d->stringlist ) return 0;
215
216 return d->stringlist->count();
217}
218
219void
220KSycocaDict::clear()
221{
222 delete d;
223 d = 0;
224}
225
226uint KSycocaDict::Private::hashKey( const QString &key) const
227{
228 int len = key.length();
229 uint h = 0;
230
231 for(int i = 0; i < hashList.count(); i++)
232 {
233 int pos = hashList[i];
234 if (pos == 0) {
235 continue;
236 } else if (pos < 0) {
237 pos = -pos;
238 if (pos < len)
239 h = ((h * 13) + (key[len-pos].cell() % 29)) & 0x3ffffff;
240 } else {
241 pos = pos-1;
242 if (pos < len)
243 h = ((h * 13) + (key[pos].cell() % 29)) & 0x3ffffff;
244 }
245 }
246 return h;
247}
248
249// If we have the strings
250// hello
251// world
252// kde
253// Then we end up with
254// ABCDE
255// where A = diversity of 'h' + 'w' + 'k' etc.
256// Also, diversity(-2) == 'l'+'l'+'d' (second character from the end)
257
258// The hasList is used for hashing:
259// hashList = (-2, 1, 3) means that the hash key comes from
260// the 2nd character from the right, then the 1st from the left, then the 3rd from the left.
261
262// Calculate the diversity of the strings at position 'pos'
263// NOTE: this code is slow, it takes 12% of the _overall_ `kbuildsycoca4 --noincremental` running time
264static int
265calcDiversity(KSycocaDictStringList *stringlist, int inPos, uint sz)
266{
267 if (inPos == 0) return 0;
268 QBitArray matrix(sz);
269 int pos;
270
271 //static const int s_maxItems = 50;
272 //int numItem = 0;
273
274 if (inPos < 0) {
275 pos = -inPos;
276 for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(), end = stringlist->constEnd(); it != end; ++it)
277 {
278 string_entry* entry = *it;
279 int len = entry->length;
280 if (pos < len) {
281 uint hash = ((entry->hash * 13) + (entry->key[len-pos].cell() % 29)) & 0x3ffffff;
282 matrix.setBit( hash % sz, true );
283 }
284 //if (++numItem == s_maxItems)
285 // break;
286 }
287 } else {
288 pos = inPos-1;
289 for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(), end = stringlist->constEnd(); it != end; ++it)
290 {
291 string_entry* entry = *it;
292 if (pos < entry->length) {
293 uint hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3ffffff;
294 matrix.setBit( hash % sz, true );
295 }
296 //if (++numItem == s_maxItems)
297 // break;
298 }
299 }
300 return matrix.count(true);
301}
302
303//
304// Add the diversity of the strings at position 'pos'
305static void
306addDiversity(KSycocaDictStringList *stringlist, int pos)
307{
308 if (pos == 0) return;
309 if (pos < 0) {
310 pos = -pos;
311 for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(), end = stringlist->constEnd(); it != end; ++it)
312 {
313 string_entry* entry = *it;
314 int len = entry->length;
315 if (pos < len)
316 entry->hash = ((entry->hash * 13) + (entry->key[len-pos].cell() % 29)) & 0x3fffffff;
317 }
318 } else {
319 pos = pos - 1;
320 for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(), end = stringlist->constEnd(); it != end; ++it)
321 {
322 string_entry* entry = *it;
323 if (pos < entry->length)
324 entry->hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3fffffff;
325 }
326 }
327}
328
329
330void
331KSycocaDict::save(QDataStream &str)
332{
333 if (count() == 0)
334 {
335 d->hashTableSize = 0;
336 d->hashList.clear();
337 str << d->hashTableSize;
338 str << d->hashList;
339 return;
340 }
341
342 d->offset = str.device()->pos();
343
344 //kDebug(7011) << "KSycocaDict:" << count() << "entries.";
345
346 //kDebug(7011) << "Calculating hash keys..";
347
348 int maxLength = 0;
349 //kDebug(7011) << "Finding maximum string length";
350 for(KSycocaDictStringList::const_iterator it = d->stringlist->constBegin(); it != d->stringlist->constEnd(); ++it)
351 {
352 string_entry* entry = *it;
353 entry->hash = 0;
354 if (entry->length > maxLength)
355 maxLength = entry->length;
356 }
357
358 //kDebug(7011) << "Max string length=" << maxLength << "existing hashList=" << d->hashList;
359
360 // use "almost prime" number for sz (to calculate diversity) and later
361 // for the table size of big tables
362 // int sz = d->stringlist->count()*5-1;
363 register unsigned int sz = count()*4 + 1;
364 while(!(((sz % 3) && (sz % 5) && (sz % 7) && (sz % 11) && (sz % 13))))
365 sz+=2;
366
367 d->hashList.clear();
368
369 // Times (with warm caches, i.e. after multiple runs)
370 // kbuildsycoca4 --noincremental 2.83s user 0.20s system 95% cpu 3.187 total
371 // kbuildsycoca4 --noincremental 2.74s user 0.25s system 93% cpu 3.205 total
372 // unittest: 0.50-60 msec per iteration / 0.40-50 msec per iteration
373
374 // Now that MimeTypes are not parsed anymore:
375 // kbuildsycoca4 --noincremental 2.18s user 0.30s system 91% cpu 2.719 total
376 // kbuildsycoca4 --noincremental 2.07s user 0.34s system 89% cpu 2.681 total
377
378 // If I enabled s_maxItems = 50, it goes down to
379 // but I don't know if that's a good idea.
380 // kbuildsycoca4 --noincremental 1.73s user 0.31s system 85% cpu 2.397 total
381 // kbuildsycoca4 --noincremental 1.84s user 0.29s system 95% cpu 2.230 total
382
383 // try to limit diversity scan by "predicting" positions
384 // with high diversity
385 QVector<int> oldvec(maxLength*2+1);
386 oldvec.fill(0);
387 int mindiv=0;
388 int lastDiv = 0;
389
390 while(true)
391 {
392 int divsum=0,divnum=0;
393
394 int maxDiv = 0;
395 int maxPos = 0;
396 for (int pos = -maxLength; pos <= maxLength; ++pos) {
397 // cut off
398 if (oldvec[pos+maxLength] < mindiv) { oldvec[pos+maxLength]=0; continue; }
399
400 const int diversity = calcDiversity(d->stringlist, pos, sz);
401 if (diversity > maxDiv) {
402 maxDiv = diversity;
403 maxPos = pos;
404 }
405 oldvec[pos + maxLength] = diversity;
406 divsum += diversity;
407 ++divnum;
408 }
409 // arbitrary cut-off value 3/4 of average seems to work
410 if (divnum)
411 mindiv=(3*divsum)/(4*divnum);
412
413 if (maxDiv <= lastDiv)
414 break;
415 //kDebug() << "Max Div=" << maxDiv << "at pos" << maxPos;
416 lastDiv = maxDiv;
417 addDiversity(d->stringlist, maxPos);
418 d->hashList.append(maxPos);
419 }
420
421
422 for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it) {
423 (*it)->hash = d->hashKey((*it)->keyStr);
424 }
425// fprintf(stderr, "Calculating minimum table size..\n");
426
427 d->hashTableSize = sz;
428
429 //kDebug() << "hashTableSize=" << sz << "hashList=" << d->hashList << "oldvec=" << oldvec;
430
431 struct hashtable_entry {
432 string_entry *entry;
433 QList<string_entry*>* duplicates;
434 qint64 duplicate_offset;
435 };
436
437 hashtable_entry *hashTable = new hashtable_entry[ sz ];
438
439 //kDebug(7011) << "Clearing hashtable...";
440 for (unsigned int i=0; i < sz; i++)
441 {
442 hashTable[i].entry = 0;
443 hashTable[i].duplicates = 0;
444 }
445
446 //kDebug(7011) << "Filling hashtable...";
447 for(KSycocaDictStringList::const_iterator it = d->stringlist->constBegin(); it != d->stringlist->constEnd(); ++it)
448 {
449 string_entry* entry = *it;
450 //kDebug(7011) << "entry keyStr=" << entry->keyStr << entry->payload.data() << entry->payload->entryPath();
451 int hash = entry->hash % sz;
452 if (!hashTable[hash].entry)
453 { // First entry
454 hashTable[hash].entry = entry;
455 }
456 else
457 {
458 if (!hashTable[hash].duplicates)
459 { // Second entry, build duplicate list.
460 hashTable[hash].duplicates = new QList<string_entry*>;
461 hashTable[hash].duplicates->append(hashTable[hash].entry);
462 hashTable[hash].duplicate_offset = 0;
463 }
464 hashTable[hash].duplicates->append(entry);
465 }
466 }
467
468 str << d->hashTableSize;
469 str << d->hashList;
470
471 d->offset = str.device()->pos(); // d->offset points to start of hashTable
472 //kDebug(7011) << QString("Start of Hash Table, offset = %1").arg(d->offset,8,16);
473
474 // Write the hashtable + the duplicates twice.
475 // The duplicates are after the normal hashtable, but the offset of each
476 // duplicate entry is written into the normal hashtable.
477 for(int pass = 1; pass <= 2; pass++)
478 {
479 str.device()->seek(d->offset);
480 //kDebug(7011) << QString("Writing hash table (pass #%1)").arg(pass);
481 for(uint i=0; i < d->hashTableSize; i++)
482 {
483 qint32 tmpid;
484 if (!hashTable[i].entry)
485 tmpid = (qint32) 0;
486 else if (!hashTable[i].duplicates)
487 tmpid = (qint32) hashTable[i].entry->payload->offset(); // Positive ID
488 else
489 tmpid = (qint32) -hashTable[i].duplicate_offset; // Negative ID
490 str << tmpid;
491 //kDebug(7011) << QString("Hash table : %1").arg(tmpid,8,16);
492 }
493 //kDebug(7011) << QString("End of Hash Table, offset = %1").arg(str.device()->at(),8,16);
494
495 //kDebug(7011) << QString("Writing duplicate lists (pass #%1)").arg(pass);
496 for(uint i=0; i < d->hashTableSize; i++)
497 {
498 const QList<string_entry*> *dups = hashTable[i].duplicates;
499 if (dups)
500 {
501 hashTable[i].duplicate_offset = str.device()->pos();
502
503 /*kDebug(7011) << QString("Duplicate lists: Offset = %1 list_size = %2") .arg(hashTable[i].duplicate_offset,8,16).arg(dups->count());
504*/
505 for(QList<string_entry*>::ConstIterator dup = dups->begin(); dup != dups->end(); ++dup)
506 {
507 const qint32 offset = (*dup)->payload->offset();
508 if (!offset) {
509 const QString storageId = (*dup)->payload->storageId();
510 kDebug() << "about to assert! dict=" << this << "storageId=" << storageId << (*dup)->payload.data();
511 if ((*dup)->payload->isType(KST_KService)) {
512 KService::Ptr service = KService::Ptr::staticCast((*dup)->payload);
513 kDebug() << service->storageId() << service->entryPath();
514 }
515 // save() must have been called on the entry
516 Q_ASSERT_X( offset, "KSycocaDict::save",
517 QByteArray("entry offset is 0, save() was not called on "
518 + (*dup)->payload->storageId().toLatin1()
519 + " entryPath="
520 + (*dup)->payload->entryPath().toLatin1())
521 );
522 }
523 str << offset ; // Positive ID
524 str << (*dup)->keyStr; // Key (QString)
525 }
526 str << (qint32) 0; // End of list marker (0)
527 }
528 }
529 //kDebug(7011) << QString("End of Dict, offset = %1").arg(str.device()->at(),8,16);
530 }
531
532 //kDebug(7011) << "Cleaning up hash table.";
533 for(uint i=0; i < d->hashTableSize; i++)
534 {
535 delete hashTable[i].duplicates;
536 }
537 delete [] hashTable;
538}
539
540qint32 KSycocaDict::Private::offsetForKey(const QString& key) const
541{
542 if ( !stream || !offset )
543 {
544 kError() << "No ksycoca4 database available!" << endl;
545 return 0;
546 }
547
548 if (hashTableSize == 0)
549 return 0; // Unlikely to find anything :-]
550
551 // Read hash-table data
552 const uint hash = hashKey(key) % hashTableSize;
553 //kDebug(7011) << "hash is" << hash;
554
555 const qint32 off = offset+sizeof(qint32)*hash;
556 //kDebug(7011) << QString("off is %1").arg(off,8,16);
557 stream->device()->seek( off );
558
559 qint32 retOffset;
560 (*stream) >> retOffset;
561 return retOffset;
562}
KService::storageId
QString storageId() const
Returns a normalized ID suitable for storing in configuration files.
Definition: kservice.cpp:783
KSharedPtr
Can be used to control the lifetime of an object that has derived QSharedData.
Definition: ksharedptr.h:64
KSharedPtr< KService >::staticCast
static KSharedPtr< KService > staticCast(const KSharedPtr< U > &o)
Convert KSharedPtr to KSharedPtr<T>, using a static_cast.
Definition: ksharedptr.h:173
KSycocaDict::save
void save(QDataStream &str)
Save the dictionary to the stream A reasonable fast hash algorithm will be created.
Definition: ksycocadict.cpp:331
KSycocaDict::add
void add(const QString &key, const KSycocaEntry::Ptr &payload)
Adds a 'payload' to the dictionary with key 'key'.
Definition: ksycocadict.cpp:111
KSycocaDict::KSycocaDict
KSycocaDict()
Create an empty dict, for building the database.
Definition: ksycocadict.cpp:77
KSycocaDict::~KSycocaDict
~KSycocaDict()
Definition: ksycocadict.cpp:105
KSycocaDict::count
uint count() const
The number of entries in the dictionary.
Definition: ksycocadict.cpp:212
KSycocaDict::find_string
int find_string(const QString &key) const
Looks up an entry identified by 'key'.
Definition: ksycocadict.cpp:146
KSycocaDict::findMultiString
QList< int > findMultiString(const QString &key) const
Looks up all entries identified by 'key'.
Definition: ksycocadict.cpp:181
KSycocaDict::remove
void remove(const QString &key)
Removes the 'payload' from the dictionary with key 'key'.
Definition: ksycocadict.cpp:125
KSycocaDict::clear
void clear()
Reset the dictionary.
Definition: ksycocadict.cpp:220
KSycocaEntry::entryPath
QString entryPath() const
Definition: ksycocaentry.cpp:104
KSycoca::flagError
static void flagError()
A read error occurs.
Definition: ksycoca.cpp:559
QList
Definition: kaboutdata.h:33
QString
qint32
qint64
quint32
kDebug
#define kDebug
Definition: kdebug.h:316
kWarning
#define kWarning
Definition: kdebug.h:322
kError
static QDebug kError(bool cond, int area=KDE_DEFAULT_DEBUG_AREA)
Definition: kdebug.h:187
kdebug.h
kservice.h
ksycoca.h
addDiversity
static void addDiversity(KSycocaDictStringList *stringlist, int pos)
Definition: ksycocadict.cpp:306
calcDiversity
static int calcDiversity(KSycocaDictStringList *stringlist, int inPos, uint sz)
Definition: ksycocadict.cpp:265
ksycocadict_p.h
ksycocaentry.h
KST_KService
@ KST_KService
Definition: ksycocatype.h:31
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Mon Feb 20 2023 00:00:00 by doxygen 1.9.6 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KDECore

Skip menu "KDECore"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs-4.14.38 API Reference

Skip menu "kdelibs-4.14.38 API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal