• Skip to content
  • Skip to link menu
  • KDE API Reference
  • kdelibs-4.8.3 API Reference
  • KDE Home
  • Contact Us
 

KDECore

ksycocadict.cpp
Go to the documentation of this file.
00001 /*  This file is part of the KDE libraries
00002  *  Copyright (C) 1999 Waldo Bastian <bastian@kde.org>
00003  *
00004  *  This library is free software; you can redistribute it and/or
00005  *  modify it under the terms of the GNU Library General Public
00006  *  License version 2 as published by the Free Software Foundation;
00007  *
00008  *  This library is distributed in the hope that it will be useful,
00009  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00010  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00011  *  Library General Public License for more details.
00012  *
00013  *  You should have received a copy of the GNU Library General Public License
00014  *  along with this library; see the file COPYING.LIB.  If not, write to
00015  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00016  *  Boston, MA 02110-1301, USA.
00017  **/
00018 
00019 #include "ksycocadict_p.h"
00020 #include <kservice.h>
00021 #include "ksycocaentry.h"
00022 #include "ksycoca.h"
00023 #include "kdebug.h"
00024 
00025 #include <QtCore/QBitArray>
00026 
00027 namespace
00028 {
00029 struct string_entry {
00030     string_entry(const QString& _key, const KSycocaEntry::Ptr& _payload)
00031       : hash(0), length(_key.length()), keyStr(_key), key(keyStr.unicode()), payload(_payload)
00032     {}
00033     uint hash;
00034     const int length;
00035     const QString keyStr;
00036     const QChar * const key; // always points to keyStr.unicode(); just an optimization
00037     const KSycocaEntry::Ptr payload;
00038 };
00039 }
00040 
00041 class KSycocaDictStringList : public QList<string_entry*>
00042 {
00043 public:
00044    ~KSycocaDictStringList() {
00045        qDeleteAll(*this);
00046    }
00047 };
00048 
00049 class KSycocaDict::Private
00050 {
00051 public:
00052     Private()
00053         : stringlist( 0 ),
00054           stream( 0 ),
00055           offset( 0 )
00056     {
00057     }
00058 
00059     ~Private()
00060     {
00061         delete stringlist;
00062     }
00063 
00064     // Helper for find_string and findMultiString
00065     qint32 offsetForKey(const QString& key) const;
00066 
00067     // Calculate hash - can be used during loading and during saving.
00068     quint32 hashKey(const QString & key) const;
00069 
00070     KSycocaDictStringList *stringlist;
00071     QDataStream *stream;
00072     qint64 offset;
00073     quint32 hashTableSize;
00074     QList<qint32> hashList;
00075 };
00076 
00077 KSycocaDict::KSycocaDict()
00078   : d( new Private )
00079 {
00080 }
00081 
00082 KSycocaDict::KSycocaDict(QDataStream *str, int offset)
00083   : d( new Private )
00084 {
00085    d->stream = str;
00086    d->offset = offset;
00087 
00088    quint32 test1, test2;
00089    str->device()->seek(offset);
00090    (*str) >> test1 >> test2;
00091    if ((test1 > 0x000fffff) || (test2 > 1024))
00092    {
00093        KSycoca::flagError();
00094        d->hashTableSize = 0;
00095        d->offset = 0;
00096        return;
00097    }
00098 
00099    str->device()->seek(offset);
00100    (*str) >> d->hashTableSize;
00101    (*str) >> d->hashList;
00102    d->offset = str->device()->pos(); // Start of hashtable
00103 }
00104 
00105 KSycocaDict::~KSycocaDict()
00106 {
00107    delete d;
00108 }
00109 
00110 void
00111 KSycocaDict::add(const QString &key, const KSycocaEntry::Ptr& payload)
00112 {
00113    if (key.isEmpty()) return; // Not allowed (should never happen)
00114    if (!payload) return; // Not allowed!
00115    if (!d->stringlist)
00116    {
00117        d->stringlist = new KSycocaDictStringList;
00118    }
00119 
00120    string_entry *entry = new string_entry(key, payload);
00121    d->stringlist->append(entry);
00122 }
00123 
00124 void
00125 KSycocaDict::remove(const QString &key)
00126 {
00127     if (!d || !d->stringlist) {
00128         return;
00129     }
00130 
00131    bool found = false;
00132    for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it) {
00133       string_entry* entry = *it;
00134       if (entry->keyStr == key) {
00135          d->stringlist->erase(it);
00136          delete entry;
00137          found = true;
00138          break;
00139       }
00140    }
00141    if (!found) {
00142        kWarning(7011) << "key not found:" << key;
00143    }
00144 }
00145 
00146 int KSycocaDict::find_string(const QString &key ) const
00147 {
00148     Q_ASSERT(d);
00149 
00150     //kDebug(7011) << QString("KSycocaDict::find_string(%1)").arg(key);
00151     qint32 offset = d->offsetForKey(key);
00152 
00153     //kDebug(7011) << QString("offset is %1").arg(offset,8,16);
00154     if (offset == 0)
00155         return 0;
00156 
00157     if (offset > 0)
00158         return offset; // Positive ID
00159 
00160     // Lookup duplicate list.
00161     offset = -offset;
00162 
00163     d->stream->device()->seek(offset);
00164     //kDebug(7011) << QString("Looking up duplicate list at %1").arg(offset,8,16);
00165 
00166    while(true)
00167    {
00168        (*d->stream) >> offset;
00169        if (offset == 0) break;
00170        QString dupkey;
00171        (*d->stream) >> dupkey;
00172        //kDebug(7011) << QString(">> %1 %2").arg(offset,8,16).arg(dupkey);
00173        if (dupkey == key) return offset;
00174    }
00175    //kWarning(7011) << "Not found!";
00176 
00177    return 0;
00178 }
00179 
00180 
00181 QList<int> KSycocaDict::findMultiString(const QString &key ) const
00182 {
00183     qint32 offset = d->offsetForKey(key);
00184     QList<int> offsetList;
00185     if (offset == 0)
00186         return offsetList;
00187 
00188     if (offset > 0) { // Positive ID: one entry found
00189         offsetList.append(offset);
00190         return offsetList;
00191     }
00192 
00193     // Lookup duplicate list.
00194     offset = -offset;
00195 
00196     d->stream->device()->seek(offset);
00197     //kDebug(7011) << QString("Looking up duplicate list at %1").arg(offset,8,16);
00198 
00199     while(true)
00200     {
00201         (*d->stream) >> offset;
00202         if (offset == 0) break;
00203         QString dupkey;
00204         (*d->stream) >> dupkey;
00205         //kDebug(7011) << QString(">> %1 %2").arg(offset,8,16).arg(dupkey);
00206         if (dupkey == key)
00207             offsetList.append(offset);
00208     }
00209     return offsetList;
00210 }
00211 
00212 uint KSycocaDict::count() const
00213 {
00214    if ( !d || !d->stringlist ) return 0;
00215 
00216    return d->stringlist->count();
00217 }
00218 
00219 void
00220 KSycocaDict::clear()
00221 {
00222    delete d;
00223    d = 0;
00224 }
00225 
00226 uint KSycocaDict::Private::hashKey( const QString &key) const
00227 {
00228    int len = key.length();
00229    uint h = 0;
00230 
00231    for(int i = 0; i < hashList.count(); i++)
00232    {
00233       int pos = hashList[i];
00234       if (pos == 0) {
00235           continue;
00236       } else if (pos < 0) {
00237           pos = -pos;
00238           if (pos < len)
00239               h = ((h * 13) + (key[len-pos].cell() % 29)) & 0x3ffffff;
00240       } else {
00241           pos = pos-1;
00242           if (pos < len)
00243               h = ((h * 13) + (key[pos].cell() % 29)) & 0x3ffffff;
00244       }
00245    }
00246    return h;
00247 }
00248 
00249 // If we have the strings
00250 //    hello
00251 //    world
00252 //    kde
00253 // Then we end up with
00254 //    ABCDE
00255 // where A = diversity of 'h' + 'w' + 'k' etc.
00256 // Also, diversity(-2) == 'l'+'l'+'d' (second character from the end)
00257 
00258 // The hasList is used for hashing:
00259 //  hashList = (-2, 1, 3) means that the hash key comes from
00260 //  the 2nd character from the right, then the 1st from the left, then the 3rd from the left.
00261 
00262 // Calculate the diversity of the strings at position 'pos'
00263 // NOTE: this code is slow, it takes 12% of the _overall_ `kbuildsycoca4 --noincremental` running time
00264 static int
00265 calcDiversity(KSycocaDictStringList *stringlist, int inPos, uint sz)
00266 {
00267     if (inPos == 0) return 0;
00268     QBitArray matrix(sz);
00269     int pos;
00270 
00271     //static const int s_maxItems = 50;
00272     //int numItem = 0;
00273 
00274     if (inPos < 0) {
00275         pos = -inPos;
00276         for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(), end = stringlist->constEnd(); it != end; ++it)
00277         {
00278             string_entry* entry = *it;
00279             int len = entry->length;
00280             if (pos < len) {
00281                 uint hash = ((entry->hash * 13) + (entry->key[len-pos].cell() % 29)) & 0x3ffffff;
00282                 matrix.setBit( hash % sz, true );
00283             }
00284             //if (++numItem == s_maxItems)
00285             //    break;
00286         }
00287     } else {
00288         pos = inPos-1;
00289         for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(), end = stringlist->constEnd(); it != end; ++it)
00290         {
00291             string_entry* entry = *it;
00292             if (pos < entry->length) {
00293                 uint hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3ffffff;
00294                 matrix.setBit( hash % sz, true );
00295             }
00296             //if (++numItem == s_maxItems)
00297             //    break;
00298         }
00299     }
00300     return matrix.count(true);
00301 }
00302 
00303 //
00304 // Add the diversity of the strings at position 'pos'
00305 static void
00306 addDiversity(KSycocaDictStringList *stringlist, int pos)
00307 {
00308    if (pos == 0) return;
00309    if (pos < 0) {
00310       pos = -pos;
00311       for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(), end = stringlist->constEnd(); it != end; ++it)
00312       {
00313          string_entry* entry = *it;
00314          int len = entry->length;
00315          if (pos < len)
00316             entry->hash = ((entry->hash * 13) + (entry->key[len-pos].cell() % 29)) & 0x3fffffff;
00317       }
00318    } else {
00319       pos = pos - 1;
00320       for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(), end = stringlist->constEnd(); it != end; ++it)
00321       {
00322          string_entry* entry = *it;
00323          if (pos < entry->length)
00324             entry->hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3fffffff;
00325       }
00326    }
00327 }
00328 
00329 
00330 void
00331 KSycocaDict::save(QDataStream &str)
00332 {
00333    if (count() == 0)
00334    {
00335       d->hashTableSize = 0;
00336       d->hashList.clear();
00337       str << d->hashTableSize;
00338       str << d->hashList;
00339       return;
00340    }
00341 
00342    d->offset = str.device()->pos();
00343 
00344    //kDebug(7011) << "KSycocaDict:" << count() << "entries.";
00345 
00346    //kDebug(7011) << "Calculating hash keys..";
00347 
00348    int maxLength = 0;
00349    //kDebug(7011) << "Finding maximum string length";
00350    for(KSycocaDictStringList::const_iterator it = d->stringlist->constBegin(); it != d->stringlist->constEnd(); ++it)
00351    {
00352       string_entry* entry = *it;
00353       entry->hash = 0;
00354       if (entry->length > maxLength)
00355          maxLength = entry->length;
00356    }
00357 
00358    //kDebug(7011) << "Max string length=" << maxLength << "existing hashList=" << d->hashList;
00359 
00360    // use "almost prime" number for sz (to calculate diversity) and later
00361    // for the table size of big tables
00362    // int sz = d->stringlist->count()*5-1;
00363    register unsigned int sz = count()*4 + 1;
00364    while(!(((sz % 3) && (sz % 5) && (sz % 7) && (sz % 11) && (sz % 13))))
00365       sz+=2;
00366 
00367    d->hashList.clear();
00368 
00369    // Times (with warm caches, i.e. after multiple runs)
00370    // kbuildsycoca4 --noincremental  2.83s user 0.20s system 95% cpu 3.187 total
00371    // kbuildsycoca4 --noincremental  2.74s user 0.25s system 93% cpu 3.205 total
00372    // unittest: 0.50-60 msec per iteration / 0.40-50 msec per iteration
00373 
00374    // Now that MimeTypes are not parsed anymore:
00375    // kbuildsycoca4 --noincremental  2.18s user 0.30s system 91% cpu 2.719 total
00376    // kbuildsycoca4 --noincremental  2.07s user 0.34s system 89% cpu 2.681 total
00377 
00378    // If I enabled s_maxItems = 50, it goes down to
00379    // but I don't know if that's a good idea.
00380    // kbuildsycoca4 --noincremental  1.73s user 0.31s system 85% cpu 2.397 total
00381    // kbuildsycoca4 --noincremental  1.84s user 0.29s system 95% cpu 2.230 total
00382 
00383    // try to limit diversity scan by "predicting" positions
00384    // with high diversity
00385    QVector<int> oldvec(maxLength*2+1);
00386    oldvec.fill(0);
00387    int mindiv=0;
00388    int lastDiv = 0;
00389 
00390    while(true)
00391    {
00392       int divsum=0,divnum=0;
00393 
00394       int maxDiv = 0;
00395       int maxPos = 0;
00396       for (int pos = -maxLength; pos <= maxLength; ++pos) {
00397          // cut off
00398          if (oldvec[pos+maxLength] < mindiv) { oldvec[pos+maxLength]=0; continue; }
00399 
00400          const int diversity = calcDiversity(d->stringlist, pos, sz);
00401          if (diversity > maxDiv) {
00402             maxDiv = diversity;
00403             maxPos = pos;
00404          }
00405          oldvec[pos + maxLength] = diversity;
00406          divsum += diversity;
00407          ++divnum;
00408       }
00409       // arbitrary cut-off value 3/4 of average seems to work
00410       if (divnum)
00411          mindiv=(3*divsum)/(4*divnum);
00412 
00413       if (maxDiv <= lastDiv)
00414          break;
00415       //kDebug() << "Max Div=" << maxDiv << "at pos" << maxPos;
00416       lastDiv = maxDiv;
00417       addDiversity(d->stringlist, maxPos);
00418       d->hashList.append(maxPos);
00419    }
00420 
00421 
00422    for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it) {
00423       (*it)->hash = d->hashKey((*it)->keyStr);
00424    }
00425 // fprintf(stderr, "Calculating minimum table size..\n");
00426 
00427    d->hashTableSize = sz;
00428 
00429    //kDebug() << "hashTableSize=" << sz << "hashList=" << d->hashList << "oldvec=" << oldvec;
00430 
00431    struct hashtable_entry {
00432       string_entry *entry;
00433       QList<string_entry*>* duplicates;
00434       qint64 duplicate_offset;
00435    };
00436 
00437    hashtable_entry *hashTable = new hashtable_entry[ sz ];
00438 
00439    //kDebug(7011) << "Clearing hashtable...";
00440    for (unsigned int i=0; i < sz; i++)
00441    {
00442       hashTable[i].entry = 0;
00443       hashTable[i].duplicates = 0;
00444    }
00445 
00446    //kDebug(7011) << "Filling hashtable...";
00447    for(KSycocaDictStringList::const_iterator it = d->stringlist->constBegin(); it != d->stringlist->constEnd(); ++it)
00448    {
00449       string_entry* entry = *it;
00450       //kDebug(7011) << "entry keyStr=" << entry->keyStr << entry->payload.data() << entry->payload->entryPath();
00451       int hash = entry->hash % sz;
00452       if (!hashTable[hash].entry)
00453       { // First entry
00454          hashTable[hash].entry = entry;
00455       }
00456       else
00457       {
00458          if (!hashTable[hash].duplicates)
00459          { // Second entry, build duplicate list.
00460             hashTable[hash].duplicates = new QList<string_entry*>;
00461             hashTable[hash].duplicates->append(hashTable[hash].entry);
00462             hashTable[hash].duplicate_offset = 0;
00463          }
00464          hashTable[hash].duplicates->append(entry);
00465       }
00466    }
00467 
00468    str << d->hashTableSize;
00469    str << d->hashList;
00470 
00471    d->offset = str.device()->pos(); // d->offset points to start of hashTable
00472    //kDebug(7011) << QString("Start of Hash Table, offset = %1").arg(d->offset,8,16);
00473 
00474    // Write the hashtable + the duplicates twice.
00475    // The duplicates are after the normal hashtable, but the offset of each
00476    // duplicate entry is written into the normal hashtable.
00477    for(int pass = 1; pass <= 2; pass++)
00478    {
00479       str.device()->seek(d->offset);
00480       //kDebug(7011) << QString("Writing hash table (pass #%1)").arg(pass);
00481       for(uint i=0; i < d->hashTableSize; i++)
00482       {
00483          qint32 tmpid;
00484          if (!hashTable[i].entry)
00485             tmpid = (qint32) 0;
00486          else if (!hashTable[i].duplicates)
00487             tmpid = (qint32) hashTable[i].entry->payload->offset(); // Positive ID
00488          else
00489             tmpid = (qint32) -hashTable[i].duplicate_offset; // Negative ID
00490          str << tmpid;
00491          //kDebug(7011) << QString("Hash table : %1").arg(tmpid,8,16);
00492       }
00493       //kDebug(7011) << QString("End of Hash Table, offset = %1").arg(str.device()->at(),8,16);
00494 
00495       //kDebug(7011) << QString("Writing duplicate lists (pass #%1)").arg(pass);
00496       for(uint i=0; i < d->hashTableSize; i++)
00497       {
00498          const QList<string_entry*> *dups = hashTable[i].duplicates;
00499          if (dups)
00500          {
00501             hashTable[i].duplicate_offset = str.device()->pos();
00502 
00503             /*kDebug(7011) << QString("Duplicate lists: Offset = %1 list_size = %2")                           .arg(hashTable[i].duplicate_offset,8,16).arg(dups->count());
00504 */
00505         for(QList<string_entry*>::ConstIterator dup = dups->begin(); dup != dups->end(); ++dup)
00506             {
00507                const qint32 offset = (*dup)->payload->offset();
00508                if (!offset) {
00509                    const QString storageId = (*dup)->payload->storageId();
00510                    kDebug() << "about to assert! dict=" << this << "storageId=" << storageId << (*dup)->payload.data();
00511                    if ((*dup)->payload->isType(KST_KService)) {
00512                        KService::Ptr service = KService::Ptr::staticCast((*dup)->payload);
00513                        kDebug() << service->storageId() << service->entryPath();
00514                    }
00515                    // save() must have been called on the entry
00516                    Q_ASSERT_X( offset, "KSycocaDict::save",
00517                                QByteArray("entry offset is 0, save() was not called on "
00518                                + (*dup)->payload->storageId().toLatin1()
00519                                + " entryPath="
00520                                + (*dup)->payload->entryPath().toLatin1())
00521                        );
00522                }
00523                str << offset ;                       // Positive ID
00524                str << (*dup)->keyStr;                // Key (QString)
00525             }
00526             str << (qint32) 0;               // End of list marker (0)
00527          }
00528       }
00529       //kDebug(7011) << QString("End of Dict, offset = %1").arg(str.device()->at(),8,16);
00530    }
00531 
00532    //kDebug(7011) << "Cleaning up hash table.";
00533    for(uint i=0; i < d->hashTableSize; i++)
00534    {
00535       delete hashTable[i].duplicates;
00536    }
00537    delete [] hashTable;
00538 }
00539 
00540 qint32 KSycocaDict::Private::offsetForKey(const QString& key) const
00541 {
00542    if ( !stream || !offset )
00543    {
00544       kError() << "No ksycoca4 database available!" << endl;
00545       return 0;
00546    }
00547 
00548    if (hashTableSize == 0)
00549       return 0; // Unlikely to find anything :-]
00550 
00551    // Read hash-table data
00552    const uint hash = hashKey(key) % hashTableSize;
00553    //kDebug(7011) << "hash is" << hash;
00554 
00555    const qint32 off = offset+sizeof(qint32)*hash;
00556    //kDebug(7011) << QString("off is %1").arg(off,8,16);
00557    stream->device()->seek( off );
00558 
00559    qint32 retOffset;
00560    (*stream) >> retOffset;
00561    return retOffset;
00562 }
This file is part of the KDE documentation.
Documentation copyright © 1996-2012 The KDE developers.
Generated on Thu May 10 2012 20:49:32 by doxygen 1.8.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KDECore

Skip menu "KDECore"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs-4.8.3 API Reference

Skip menu "kdelibs-4.8.3 API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal