KDECore
ksycocadict.cpp
Go to the documentation of this file.
00001 /* This file is part of the KDE libraries 00002 * Copyright (C) 1999 Waldo Bastian <bastian@kde.org> 00003 * 00004 * This library is free software; you can redistribute it and/or 00005 * modify it under the terms of the GNU Library General Public 00006 * License version 2 as published by the Free Software Foundation; 00007 * 00008 * This library is distributed in the hope that it will be useful, 00009 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00010 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00011 * Library General Public License for more details. 00012 * 00013 * You should have received a copy of the GNU Library General Public License 00014 * along with this library; see the file COPYING.LIB. If not, write to 00015 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00016 * Boston, MA 02110-1301, USA. 00017 **/ 00018 00019 #include "ksycocadict_p.h" 00020 #include <kservice.h> 00021 #include "ksycocaentry.h" 00022 #include "ksycoca.h" 00023 #include "kdebug.h" 00024 00025 #include <QtCore/QBitArray> 00026 00027 namespace 00028 { 00029 struct string_entry { 00030 string_entry(const QString& _key, const KSycocaEntry::Ptr& _payload) 00031 : hash(0), length(_key.length()), keyStr(_key), key(keyStr.unicode()), payload(_payload) 00032 {} 00033 uint hash; 00034 const int length; 00035 const QString keyStr; 00036 const QChar * const key; // always points to keyStr.unicode(); just an optimization 00037 const KSycocaEntry::Ptr payload; 00038 }; 00039 } 00040 00041 class KSycocaDictStringList : public QList<string_entry*> 00042 { 00043 public: 00044 ~KSycocaDictStringList() { 00045 qDeleteAll(*this); 00046 } 00047 }; 00048 00049 class KSycocaDict::Private 00050 { 00051 public: 00052 Private() 00053 : stringlist( 0 ), 00054 stream( 0 ), 00055 offset( 0 ) 00056 { 00057 } 00058 00059 ~Private() 00060 { 00061 delete stringlist; 00062 } 00063 00064 // Helper for find_string and findMultiString 00065 qint32 offsetForKey(const QString& key) const; 00066 00067 // Calculate hash - can be used during loading and during saving. 00068 quint32 hashKey(const QString & key) const; 00069 00070 KSycocaDictStringList *stringlist; 00071 QDataStream *stream; 00072 qint64 offset; 00073 quint32 hashTableSize; 00074 QList<qint32> hashList; 00075 }; 00076 00077 KSycocaDict::KSycocaDict() 00078 : d( new Private ) 00079 { 00080 } 00081 00082 KSycocaDict::KSycocaDict(QDataStream *str, int offset) 00083 : d( new Private ) 00084 { 00085 d->stream = str; 00086 d->offset = offset; 00087 00088 quint32 test1, test2; 00089 str->device()->seek(offset); 00090 (*str) >> test1 >> test2; 00091 if ((test1 > 0x000fffff) || (test2 > 1024)) 00092 { 00093 KSycoca::flagError(); 00094 d->hashTableSize = 0; 00095 d->offset = 0; 00096 return; 00097 } 00098 00099 str->device()->seek(offset); 00100 (*str) >> d->hashTableSize; 00101 (*str) >> d->hashList; 00102 d->offset = str->device()->pos(); // Start of hashtable 00103 } 00104 00105 KSycocaDict::~KSycocaDict() 00106 { 00107 delete d; 00108 } 00109 00110 void 00111 KSycocaDict::add(const QString &key, const KSycocaEntry::Ptr& payload) 00112 { 00113 if (key.isEmpty()) return; // Not allowed (should never happen) 00114 if (!payload) return; // Not allowed! 00115 if (!d->stringlist) 00116 { 00117 d->stringlist = new KSycocaDictStringList; 00118 } 00119 00120 string_entry *entry = new string_entry(key, payload); 00121 d->stringlist->append(entry); 00122 } 00123 00124 void 00125 KSycocaDict::remove(const QString &key) 00126 { 00127 if (!d || !d->stringlist) { 00128 return; 00129 } 00130 00131 bool found = false; 00132 for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it) { 00133 string_entry* entry = *it; 00134 if (entry->keyStr == key) { 00135 d->stringlist->erase(it); 00136 delete entry; 00137 found = true; 00138 break; 00139 } 00140 } 00141 if (!found) { 00142 kWarning(7011) << "key not found:" << key; 00143 } 00144 } 00145 00146 int KSycocaDict::find_string(const QString &key ) const 00147 { 00148 Q_ASSERT(d); 00149 00150 //kDebug(7011) << QString("KSycocaDict::find_string(%1)").arg(key); 00151 qint32 offset = d->offsetForKey(key); 00152 00153 //kDebug(7011) << QString("offset is %1").arg(offset,8,16); 00154 if (offset == 0) 00155 return 0; 00156 00157 if (offset > 0) 00158 return offset; // Positive ID 00159 00160 // Lookup duplicate list. 00161 offset = -offset; 00162 00163 d->stream->device()->seek(offset); 00164 //kDebug(7011) << QString("Looking up duplicate list at %1").arg(offset,8,16); 00165 00166 while(true) 00167 { 00168 (*d->stream) >> offset; 00169 if (offset == 0) break; 00170 QString dupkey; 00171 (*d->stream) >> dupkey; 00172 //kDebug(7011) << QString(">> %1 %2").arg(offset,8,16).arg(dupkey); 00173 if (dupkey == key) return offset; 00174 } 00175 //kWarning(7011) << "Not found!"; 00176 00177 return 0; 00178 } 00179 00180 00181 QList<int> KSycocaDict::findMultiString(const QString &key ) const 00182 { 00183 qint32 offset = d->offsetForKey(key); 00184 QList<int> offsetList; 00185 if (offset == 0) 00186 return offsetList; 00187 00188 if (offset > 0) { // Positive ID: one entry found 00189 offsetList.append(offset); 00190 return offsetList; 00191 } 00192 00193 // Lookup duplicate list. 00194 offset = -offset; 00195 00196 d->stream->device()->seek(offset); 00197 //kDebug(7011) << QString("Looking up duplicate list at %1").arg(offset,8,16); 00198 00199 while(true) 00200 { 00201 (*d->stream) >> offset; 00202 if (offset == 0) break; 00203 QString dupkey; 00204 (*d->stream) >> dupkey; 00205 //kDebug(7011) << QString(">> %1 %2").arg(offset,8,16).arg(dupkey); 00206 if (dupkey == key) 00207 offsetList.append(offset); 00208 } 00209 return offsetList; 00210 } 00211 00212 uint KSycocaDict::count() const 00213 { 00214 if ( !d || !d->stringlist ) return 0; 00215 00216 return d->stringlist->count(); 00217 } 00218 00219 void 00220 KSycocaDict::clear() 00221 { 00222 delete d; 00223 d = 0; 00224 } 00225 00226 uint KSycocaDict::Private::hashKey( const QString &key) const 00227 { 00228 int len = key.length(); 00229 uint h = 0; 00230 00231 for(int i = 0; i < hashList.count(); i++) 00232 { 00233 int pos = hashList[i]; 00234 if (pos == 0) { 00235 continue; 00236 } else if (pos < 0) { 00237 pos = -pos; 00238 if (pos < len) 00239 h = ((h * 13) + (key[len-pos].cell() % 29)) & 0x3ffffff; 00240 } else { 00241 pos = pos-1; 00242 if (pos < len) 00243 h = ((h * 13) + (key[pos].cell() % 29)) & 0x3ffffff; 00244 } 00245 } 00246 return h; 00247 } 00248 00249 // If we have the strings 00250 // hello 00251 // world 00252 // kde 00253 // Then we end up with 00254 // ABCDE 00255 // where A = diversity of 'h' + 'w' + 'k' etc. 00256 // Also, diversity(-2) == 'l'+'l'+'d' (second character from the end) 00257 00258 // The hasList is used for hashing: 00259 // hashList = (-2, 1, 3) means that the hash key comes from 00260 // the 2nd character from the right, then the 1st from the left, then the 3rd from the left. 00261 00262 // Calculate the diversity of the strings at position 'pos' 00263 // NOTE: this code is slow, it takes 12% of the _overall_ `kbuildsycoca4 --noincremental` running time 00264 static int 00265 calcDiversity(KSycocaDictStringList *stringlist, int inPos, uint sz) 00266 { 00267 if (inPos == 0) return 0; 00268 QBitArray matrix(sz); 00269 int pos; 00270 00271 //static const int s_maxItems = 50; 00272 //int numItem = 0; 00273 00274 if (inPos < 0) { 00275 pos = -inPos; 00276 for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(), end = stringlist->constEnd(); it != end; ++it) 00277 { 00278 string_entry* entry = *it; 00279 int len = entry->length; 00280 if (pos < len) { 00281 uint hash = ((entry->hash * 13) + (entry->key[len-pos].cell() % 29)) & 0x3ffffff; 00282 matrix.setBit( hash % sz, true ); 00283 } 00284 //if (++numItem == s_maxItems) 00285 // break; 00286 } 00287 } else { 00288 pos = inPos-1; 00289 for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(), end = stringlist->constEnd(); it != end; ++it) 00290 { 00291 string_entry* entry = *it; 00292 if (pos < entry->length) { 00293 uint hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3ffffff; 00294 matrix.setBit( hash % sz, true ); 00295 } 00296 //if (++numItem == s_maxItems) 00297 // break; 00298 } 00299 } 00300 return matrix.count(true); 00301 } 00302 00303 // 00304 // Add the diversity of the strings at position 'pos' 00305 static void 00306 addDiversity(KSycocaDictStringList *stringlist, int pos) 00307 { 00308 if (pos == 0) return; 00309 if (pos < 0) { 00310 pos = -pos; 00311 for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(), end = stringlist->constEnd(); it != end; ++it) 00312 { 00313 string_entry* entry = *it; 00314 int len = entry->length; 00315 if (pos < len) 00316 entry->hash = ((entry->hash * 13) + (entry->key[len-pos].cell() % 29)) & 0x3fffffff; 00317 } 00318 } else { 00319 pos = pos - 1; 00320 for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(), end = stringlist->constEnd(); it != end; ++it) 00321 { 00322 string_entry* entry = *it; 00323 if (pos < entry->length) 00324 entry->hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3fffffff; 00325 } 00326 } 00327 } 00328 00329 00330 void 00331 KSycocaDict::save(QDataStream &str) 00332 { 00333 if (count() == 0) 00334 { 00335 d->hashTableSize = 0; 00336 d->hashList.clear(); 00337 str << d->hashTableSize; 00338 str << d->hashList; 00339 return; 00340 } 00341 00342 d->offset = str.device()->pos(); 00343 00344 //kDebug(7011) << "KSycocaDict:" << count() << "entries."; 00345 00346 //kDebug(7011) << "Calculating hash keys.."; 00347 00348 int maxLength = 0; 00349 //kDebug(7011) << "Finding maximum string length"; 00350 for(KSycocaDictStringList::const_iterator it = d->stringlist->constBegin(); it != d->stringlist->constEnd(); ++it) 00351 { 00352 string_entry* entry = *it; 00353 entry->hash = 0; 00354 if (entry->length > maxLength) 00355 maxLength = entry->length; 00356 } 00357 00358 //kDebug(7011) << "Max string length=" << maxLength << "existing hashList=" << d->hashList; 00359 00360 // use "almost prime" number for sz (to calculate diversity) and later 00361 // for the table size of big tables 00362 // int sz = d->stringlist->count()*5-1; 00363 register unsigned int sz = count()*4 + 1; 00364 while(!(((sz % 3) && (sz % 5) && (sz % 7) && (sz % 11) && (sz % 13)))) 00365 sz+=2; 00366 00367 d->hashList.clear(); 00368 00369 // Times (with warm caches, i.e. after multiple runs) 00370 // kbuildsycoca4 --noincremental 2.83s user 0.20s system 95% cpu 3.187 total 00371 // kbuildsycoca4 --noincremental 2.74s user 0.25s system 93% cpu 3.205 total 00372 // unittest: 0.50-60 msec per iteration / 0.40-50 msec per iteration 00373 00374 // Now that MimeTypes are not parsed anymore: 00375 // kbuildsycoca4 --noincremental 2.18s user 0.30s system 91% cpu 2.719 total 00376 // kbuildsycoca4 --noincremental 2.07s user 0.34s system 89% cpu 2.681 total 00377 00378 // If I enabled s_maxItems = 50, it goes down to 00379 // but I don't know if that's a good idea. 00380 // kbuildsycoca4 --noincremental 1.73s user 0.31s system 85% cpu 2.397 total 00381 // kbuildsycoca4 --noincremental 1.84s user 0.29s system 95% cpu 2.230 total 00382 00383 // try to limit diversity scan by "predicting" positions 00384 // with high diversity 00385 QVector<int> oldvec(maxLength*2+1); 00386 oldvec.fill(0); 00387 int mindiv=0; 00388 int lastDiv = 0; 00389 00390 while(true) 00391 { 00392 int divsum=0,divnum=0; 00393 00394 int maxDiv = 0; 00395 int maxPos = 0; 00396 for (int pos = -maxLength; pos <= maxLength; ++pos) { 00397 // cut off 00398 if (oldvec[pos+maxLength] < mindiv) { oldvec[pos+maxLength]=0; continue; } 00399 00400 const int diversity = calcDiversity(d->stringlist, pos, sz); 00401 if (diversity > maxDiv) { 00402 maxDiv = diversity; 00403 maxPos = pos; 00404 } 00405 oldvec[pos + maxLength] = diversity; 00406 divsum += diversity; 00407 ++divnum; 00408 } 00409 // arbitrary cut-off value 3/4 of average seems to work 00410 if (divnum) 00411 mindiv=(3*divsum)/(4*divnum); 00412 00413 if (maxDiv <= lastDiv) 00414 break; 00415 //kDebug() << "Max Div=" << maxDiv << "at pos" << maxPos; 00416 lastDiv = maxDiv; 00417 addDiversity(d->stringlist, maxPos); 00418 d->hashList.append(maxPos); 00419 } 00420 00421 00422 for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it) { 00423 (*it)->hash = d->hashKey((*it)->keyStr); 00424 } 00425 // fprintf(stderr, "Calculating minimum table size..\n"); 00426 00427 d->hashTableSize = sz; 00428 00429 //kDebug() << "hashTableSize=" << sz << "hashList=" << d->hashList << "oldvec=" << oldvec; 00430 00431 struct hashtable_entry { 00432 string_entry *entry; 00433 QList<string_entry*>* duplicates; 00434 qint64 duplicate_offset; 00435 }; 00436 00437 hashtable_entry *hashTable = new hashtable_entry[ sz ]; 00438 00439 //kDebug(7011) << "Clearing hashtable..."; 00440 for (unsigned int i=0; i < sz; i++) 00441 { 00442 hashTable[i].entry = 0; 00443 hashTable[i].duplicates = 0; 00444 } 00445 00446 //kDebug(7011) << "Filling hashtable..."; 00447 for(KSycocaDictStringList::const_iterator it = d->stringlist->constBegin(); it != d->stringlist->constEnd(); ++it) 00448 { 00449 string_entry* entry = *it; 00450 //kDebug(7011) << "entry keyStr=" << entry->keyStr << entry->payload.data() << entry->payload->entryPath(); 00451 int hash = entry->hash % sz; 00452 if (!hashTable[hash].entry) 00453 { // First entry 00454 hashTable[hash].entry = entry; 00455 } 00456 else 00457 { 00458 if (!hashTable[hash].duplicates) 00459 { // Second entry, build duplicate list. 00460 hashTable[hash].duplicates = new QList<string_entry*>; 00461 hashTable[hash].duplicates->append(hashTable[hash].entry); 00462 hashTable[hash].duplicate_offset = 0; 00463 } 00464 hashTable[hash].duplicates->append(entry); 00465 } 00466 } 00467 00468 str << d->hashTableSize; 00469 str << d->hashList; 00470 00471 d->offset = str.device()->pos(); // d->offset points to start of hashTable 00472 //kDebug(7011) << QString("Start of Hash Table, offset = %1").arg(d->offset,8,16); 00473 00474 // Write the hashtable + the duplicates twice. 00475 // The duplicates are after the normal hashtable, but the offset of each 00476 // duplicate entry is written into the normal hashtable. 00477 for(int pass = 1; pass <= 2; pass++) 00478 { 00479 str.device()->seek(d->offset); 00480 //kDebug(7011) << QString("Writing hash table (pass #%1)").arg(pass); 00481 for(uint i=0; i < d->hashTableSize; i++) 00482 { 00483 qint32 tmpid; 00484 if (!hashTable[i].entry) 00485 tmpid = (qint32) 0; 00486 else if (!hashTable[i].duplicates) 00487 tmpid = (qint32) hashTable[i].entry->payload->offset(); // Positive ID 00488 else 00489 tmpid = (qint32) -hashTable[i].duplicate_offset; // Negative ID 00490 str << tmpid; 00491 //kDebug(7011) << QString("Hash table : %1").arg(tmpid,8,16); 00492 } 00493 //kDebug(7011) << QString("End of Hash Table, offset = %1").arg(str.device()->at(),8,16); 00494 00495 //kDebug(7011) << QString("Writing duplicate lists (pass #%1)").arg(pass); 00496 for(uint i=0; i < d->hashTableSize; i++) 00497 { 00498 const QList<string_entry*> *dups = hashTable[i].duplicates; 00499 if (dups) 00500 { 00501 hashTable[i].duplicate_offset = str.device()->pos(); 00502 00503 /*kDebug(7011) << QString("Duplicate lists: Offset = %1 list_size = %2") .arg(hashTable[i].duplicate_offset,8,16).arg(dups->count()); 00504 */ 00505 for(QList<string_entry*>::ConstIterator dup = dups->begin(); dup != dups->end(); ++dup) 00506 { 00507 const qint32 offset = (*dup)->payload->offset(); 00508 if (!offset) { 00509 const QString storageId = (*dup)->payload->storageId(); 00510 kDebug() << "about to assert! dict=" << this << "storageId=" << storageId << (*dup)->payload.data(); 00511 if ((*dup)->payload->isType(KST_KService)) { 00512 KService::Ptr service = KService::Ptr::staticCast((*dup)->payload); 00513 kDebug() << service->storageId() << service->entryPath(); 00514 } 00515 // save() must have been called on the entry 00516 Q_ASSERT_X( offset, "KSycocaDict::save", 00517 QByteArray("entry offset is 0, save() was not called on " 00518 + (*dup)->payload->storageId().toLatin1() 00519 + " entryPath=" 00520 + (*dup)->payload->entryPath().toLatin1()) 00521 ); 00522 } 00523 str << offset ; // Positive ID 00524 str << (*dup)->keyStr; // Key (QString) 00525 } 00526 str << (qint32) 0; // End of list marker (0) 00527 } 00528 } 00529 //kDebug(7011) << QString("End of Dict, offset = %1").arg(str.device()->at(),8,16); 00530 } 00531 00532 //kDebug(7011) << "Cleaning up hash table."; 00533 for(uint i=0; i < d->hashTableSize; i++) 00534 { 00535 delete hashTable[i].duplicates; 00536 } 00537 delete [] hashTable; 00538 } 00539 00540 qint32 KSycocaDict::Private::offsetForKey(const QString& key) const 00541 { 00542 if ( !stream || !offset ) 00543 { 00544 kError() << "No ksycoca4 database available!" << endl; 00545 return 0; 00546 } 00547 00548 if (hashTableSize == 0) 00549 return 0; // Unlikely to find anything :-] 00550 00551 // Read hash-table data 00552 const uint hash = hashKey(key) % hashTableSize; 00553 //kDebug(7011) << "hash is" << hash; 00554 00555 const qint32 off = offset+sizeof(qint32)*hash; 00556 //kDebug(7011) << QString("off is %1").arg(off,8,16); 00557 stream->device()->seek( off ); 00558 00559 qint32 retOffset; 00560 (*stream) >> retOffset; 00561 return retOffset; 00562 }
This file is part of the KDE documentation.
Documentation copyright © 1996-2012 The KDE developers.
Generated on Wed May 2 2012 17:05:20 by doxygen 1.8.0 written by Dimitri van Heesch, © 1997-2006
Documentation copyright © 1996-2012 The KDE developers.
Generated on Wed May 2 2012 17:05:20 by doxygen 1.8.0 written by Dimitri van Heesch, © 1997-2006
KDE's Doxygen guidelines are available online.