• Skip to content
  • Skip to link menu
KDE 4.3 API Reference
  • KDE API Reference
  • kdelibs
  • Sitemap
  • Contact Us
 

KDEUI

kcharselectdata.cpp

Go to the documentation of this file.
00001 /* This file is part of the KDE libraries
00002 
00003    Copyright (C) 2007 Daniel Laidig <d.laidig@gmx.de>
00004 
00005    This library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Library General Public
00007    License as published by the Free Software Foundation; either
00008    version 2 of the License, or (at your option) any later version.
00009 
00010    This library is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013    Library General Public License for more details.
00014 
00015    You should have received a copy of the GNU Library General Public License
00016    along with this library; see the file COPYING.LIB.  If not, write to
00017    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00018    Boston, MA 02110-1301, USA.
00019 */
00020 
00021 #include "kcharselectdata_p.h"
00022 
00023 #include <QStringList>
00024 #include <QFile>
00025 #include <qendian.h>
00026 #include <QtConcurrentRun>
00027 
00028 #include <string.h>
00029 #include <klocalizedstring.h>
00030 #include <kstandarddirs.h>
00031 
00032 /* constants for hangul (de)composition, see UAX #15 */
00033 #define SBase 0xAC00
00034 #define LBase 0x1100
00035 #define VBase 0x1161
00036 #define TBase 0x11A7
00037 #define LCount 19
00038 #define VCount 21
00039 #define TCount 28
00040 #define NCount (VCount * TCount)
00041 #define SCount (LCount * NCount)
00042 
00043 static const char JAMO_L_TABLE[][4] =
00044     {
00045         "G", "GG", "N", "D", "DD", "R", "M", "B", "BB",
00046         "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H"
00047     };
00048 
00049 static const char JAMO_V_TABLE[][4] =
00050     {
00051         "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O",
00052         "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI",
00053         "YU", "EU", "YI", "I"
00054     };
00055 
00056 static const char JAMO_T_TABLE[][4] =
00057     {
00058         "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM",
00059         "LB", "LS", "LT", "LP", "LH", "M", "B", "BS",
00060         "S", "SS", "NG", "J", "C", "K", "T", "P", "H"
00061     };
00062 
00063 bool KCharSelectData::openDataFile()
00064 {
00065     if(!dataFile.isEmpty()) {
00066         return true;
00067     } else {
00068         QFile file(KStandardDirs::locate("data", "kcharselect/kcharselect-data"));
00069         if (!file.open(QIODevice::ReadOnly)) {
00070             return false;
00071         }
00072         dataFile = file.readAll();
00073         file.close();
00074         futureIndex = QtConcurrent::run(this, &KCharSelectData::createIndex, dataFile);
00075         return true;
00076     }
00077 }
00078 
00079 quint32 KCharSelectData::getDetailIndex(const QChar& c) const
00080 {
00081     const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00082     // Convert from little-endian, so that this code works on PPC too.
00083     // http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=482286
00084     const quint32 offsetBegin = qFromLittleEndian<quint32>(data+12);
00085     const quint32 offsetEnd = qFromLittleEndian<quint32>(data+16);
00086 
00087     int min = 0;
00088     int mid;
00089     int max = ((offsetEnd - offsetBegin) / 27) - 1;
00090 
00091     quint16 unicode = c.unicode();
00092 
00093     static quint16 most_recent_searched;
00094     static quint32 most_recent_result;
00095 
00096 
00097     if (unicode == most_recent_searched)
00098         return most_recent_result;
00099 
00100     most_recent_searched = unicode;
00101 
00102     while (max >= min) {
00103         mid = (min + max) / 2;
00104         const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*27);
00105         if (unicode > midUnicode)
00106             min = mid + 1;
00107         else if (unicode < midUnicode)
00108             max = mid - 1;
00109         else {
00110             most_recent_result = offsetBegin + mid*27;
00111 
00112             return most_recent_result;
00113         }
00114     }
00115 
00116     most_recent_result = 0;
00117     return 0;
00118 }
00119 
00120 QString KCharSelectData::formatCode(ushort code, int length, const QString& prefix, int base)
00121 {
00122     QString s = QString::number(code, base).toUpper();
00123     while (s.size() < length)
00124         s.prepend('0');
00125     s.prepend(prefix);
00126     return s;
00127 }
00128 
00129 QList<QChar> KCharSelectData::blockContents(int block)
00130 {
00131     if(!openDataFile()) {
00132         return QList<QChar>();
00133     }
00134 
00135     const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00136     const quint32 offsetBegin = qFromLittleEndian<quint32>(data+20);
00137     const quint32 offsetEnd = qFromLittleEndian<quint32>(data+24);
00138 
00139     int max = ((offsetEnd - offsetBegin) / 4) - 1;
00140 
00141     QList<QChar> res;
00142 
00143     if(block > max)
00144         return res;
00145 
00146     quint16 unicodeBegin = qFromLittleEndian<quint16>(data + offsetBegin + block*4);
00147     quint16 unicodeEnd = qFromLittleEndian<quint16>(data + offsetBegin + block*4 + 2);
00148 
00149     while(unicodeBegin < unicodeEnd) {
00150         res.append(unicodeBegin);
00151         unicodeBegin++;
00152     }
00153     res.append(unicodeBegin); // Be carefull when unicodeEnd==0xffff
00154 
00155     return res;
00156 }
00157 
00158 QList<int> KCharSelectData::sectionContents(int section)
00159 {
00160     if(!openDataFile()) {
00161         return QList<int>();
00162     }
00163 
00164     const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00165     const quint32 offsetBegin = qFromLittleEndian<quint32>(data+28);
00166     const quint32 offsetEnd = qFromLittleEndian<quint32>(data+32);
00167 
00168     int max = ((offsetEnd - offsetBegin) / 4) - 1;
00169 
00170     QList<int> res;
00171 
00172     if(section > max)
00173         return res;
00174 
00175     for(int i = 0; i <= max; i++) {
00176         const quint16 currSection = qFromLittleEndian<quint16>(data + offsetBegin + i*4);
00177         if(currSection == section) {
00178             res.append( qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) );
00179         }
00180     }
00181 
00182     return res;
00183 }
00184 
00185 QStringList KCharSelectData::sectionList()
00186 {
00187     if(!openDataFile()) {
00188         return QStringList();
00189     }
00190 
00191     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00192     const quint32 stringBegin = qFromLittleEndian<quint32>(udata+24);
00193     const quint32 stringEnd = qFromLittleEndian<quint32>(udata+28);
00194 
00195     const char* data = dataFile.constData();
00196     QStringList list;
00197     quint32 i = stringBegin;
00198     while(i < stringEnd) {
00199         list.append(i18nc("KCharSelect section name", data + i));
00200         i += strlen(data + i) + 1;
00201     }
00202 
00203     return list;
00204 }
00205 
00206 QString KCharSelectData::block(const QChar& c)
00207 {
00208     return blockName(blockIndex(c));
00209 }
00210 
00211 QString KCharSelectData::section(const QChar& c)
00212 {
00213     return sectionName(sectionIndex(blockIndex(c)));
00214 }
00215 
00216 QString KCharSelectData::name(const QChar& c)
00217 {
00218     if(!openDataFile()) {
00219         return QString();
00220     }
00221 
00222     ushort unicode = c.unicode();
00223     if ((unicode >= 0x3400 && unicode <= 0x4DB5)
00224             || (unicode >= 0x4e00 && unicode <= 0x9fa5)) {
00225         // || (unicode >= 0x20000 && unicode <= 0x2A6D6) // useless, since limited to 16 bit
00226         return "CJK UNIFIED IDEOGRAPH-" + QString::number(unicode, 16);
00227     } else if (c >= 0xac00 && c <= 0xd7af) {
00228         /* compute hangul syllable name as per UAX #15 */
00229         int SIndex = c.unicode() - SBase;
00230         int LIndex, VIndex, TIndex;
00231 
00232         if (SIndex < 0 || SIndex >= SCount)
00233             return QString();
00234 
00235         LIndex = SIndex / NCount;
00236         VIndex = (SIndex % NCount) / TCount;
00237         TIndex = SIndex % TCount;
00238 
00239         return QString("HANGUL SYLLABLE ") + JAMO_L_TABLE[LIndex] + JAMO_V_TABLE[VIndex] + JAMO_T_TABLE[TIndex];
00240     } else if (unicode >= 0xD800 && unicode <= 0xDB7F)
00241         return i18n("<Non Private Use High Surrogate>");
00242     else if (unicode >= 0xDB80 && unicode <= 0xDBFF)
00243         return i18n("<Private Use High Surrogate>");
00244     else if (unicode >= 0xDC00 && unicode <= 0xDFFF)
00245         return i18n("<Low Surrogate>");
00246     else if (unicode >= 0xE000 && unicode <= 0xF8FF)
00247         return i18n("<Private Use>");
00248 //  else if (unicode >= 0xF0000 && unicode <= 0xFFFFD) // 16 bit!
00249 //   return i18n("<Plane 15 Private Use>");
00250 //  else if (unicode >= 0x100000 && unicode <= 0x10FFFD)
00251 //   return i18n("<Plane 16 Private Use>");
00252     else {
00253         const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00254         const quint32 offsetBegin = qFromLittleEndian<quint32>(data+4);
00255         const quint32 offsetEnd = qFromLittleEndian<quint32>(data+8);
00256 
00257         int min = 0;
00258         int mid;
00259         int max = ((offsetEnd - offsetBegin) / 6) - 1;
00260         QString s;
00261 
00262         while (max >= min) {
00263             mid = (min + max) / 2;
00264             const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*6);
00265             if (unicode > midUnicode)
00266                 min = mid + 1;
00267             else if (unicode < midUnicode)
00268                 max = mid - 1;
00269             else {
00270                 quint32 offset = qFromLittleEndian<quint32>(data + offsetBegin + mid*6 + 2);
00271                 s = QString(dataFile.constData() + offset);
00272                 break;
00273             }
00274         }
00275 
00276         if (s.isNull()) {
00277             return i18n("<not assigned>");
00278         } else {
00279             return s;
00280         }
00281     }
00282 }
00283 
00284 int KCharSelectData::blockIndex(const QChar& c)
00285 {
00286     if(!openDataFile()) {
00287         return 0;
00288     }
00289 
00290     const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00291     const quint32 offsetBegin = qFromLittleEndian<quint32>(data+20);
00292     const quint32 offsetEnd = qFromLittleEndian<quint32>(data+24);
00293     const quint16 unicode = c.unicode();
00294 
00295     int max = ((offsetEnd - offsetBegin) / 4) - 1;
00296 
00297     int i = 0;
00298 
00299     while (unicode > qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) && i < max) {
00300         i++;
00301     }
00302 
00303     return i;
00304 }
00305 
00306 int KCharSelectData::sectionIndex(int block)
00307 {
00308     if(!openDataFile()) {
00309         return 0;
00310     }
00311 
00312     const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00313     const quint32 offsetBegin = qFromLittleEndian<quint32>(data+28);
00314     const quint32 offsetEnd = qFromLittleEndian<quint32>(data+32);
00315 
00316     int max = ((offsetEnd - offsetBegin) / 4) - 1;
00317 
00318     for(int i = 0; i <= max; i++) {
00319         if( qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) == block) {
00320             return qFromLittleEndian<quint16>(data + offsetBegin + i*4);
00321         }
00322     }
00323 
00324     return 0;
00325 }
00326 
00327 QString KCharSelectData::blockName(int index)
00328 {
00329     if(!openDataFile()) {
00330         return QString();
00331     }
00332 
00333     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00334     const quint32 stringBegin = qFromLittleEndian<quint32>(udata+16);
00335     const quint32 stringEnd = qFromLittleEndian<quint32>(udata+20);
00336 
00337     quint32 i = stringBegin;
00338     int currIndex = 0;
00339 
00340     const char* data = dataFile.constData();
00341     while(i < stringEnd && currIndex < index) {
00342         i += strlen(data + i) + 1;
00343         currIndex++;
00344     }
00345 
00346     return i18nc("KCharselect unicode block name", data + i);
00347 }
00348 
00349 QString KCharSelectData::sectionName(int index)
00350 {
00351     if(!openDataFile()) {
00352         return QString();
00353     }
00354 
00355     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00356     const quint32 stringBegin = qFromLittleEndian<quint32>(udata+24);
00357     const quint32 stringEnd = qFromLittleEndian<quint32>(udata+28);
00358 
00359     quint32 i = stringBegin;
00360     int currIndex = 0;
00361 
00362     const char* data = dataFile.constData();
00363     while(i < stringEnd && currIndex < index) {
00364         i += strlen(data + i) + 1;
00365         currIndex++;
00366     }
00367 
00368     return i18nc("KCharselect unicode section name", data + i);
00369 }
00370 
00371 QStringList KCharSelectData::aliases(const QChar& c)
00372 {
00373     if(!openDataFile()) {
00374         return QStringList();
00375     }
00376     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00377     const int detailIndex = getDetailIndex(c);
00378     if(detailIndex == 0) {
00379         return QStringList();
00380     }
00381 
00382     const quint8 count = * (quint8 *)(udata + detailIndex + 6);
00383     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 2);
00384 
00385     QStringList aliases;
00386 
00387     const char* data = dataFile.constData();
00388     for (int i = 0;  i < count;  i++) {
00389         aliases.append(QString::fromUtf8(data + offset));
00390         offset += strlen(data + offset) + 1;
00391     }
00392     return aliases;
00393 }
00394 
00395 QStringList KCharSelectData::notes(const QChar& c)
00396 {
00397     if(!openDataFile()) {
00398         return QStringList();
00399     }
00400     const int detailIndex = getDetailIndex(c);
00401     if(detailIndex == 0) {
00402         return QStringList();
00403     }
00404 
00405     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00406     const quint8 count = * (quint8 *)(udata + detailIndex + 11);
00407     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 7);
00408 
00409     QStringList notes;
00410 
00411     const char* data = dataFile.constData();
00412     for (int i = 0;  i < count;  i++) {
00413         notes.append(QString::fromLatin1(data + offset));
00414         offset += strlen(data + offset) + 1;
00415     }
00416 
00417     return notes;
00418 }
00419 
00420 QList<QChar> KCharSelectData::seeAlso(const QChar& c)
00421 {
00422     if(!openDataFile()) {
00423         return QList<QChar>();
00424     }
00425     const int detailIndex = getDetailIndex(c);
00426     if(detailIndex == 0) {
00427         return QList<QChar>();
00428     }
00429 
00430     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00431     const quint8 count = * (quint8 *)(udata + detailIndex + 26);
00432     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 22);
00433 
00434     QList<QChar> seeAlso;
00435 
00436     for (int i = 0;  i < count;  i++) {
00437         seeAlso.append(qFromLittleEndian<quint16> (udata + offset));
00438         offset += 2;
00439     }
00440 
00441     return seeAlso;
00442 }
00443 
00444 QStringList KCharSelectData::equivalents(const QChar& c)
00445 {
00446     if(!openDataFile()) {
00447         return QStringList();
00448     }
00449     const int detailIndex = getDetailIndex(c);
00450     if(detailIndex == 0) {
00451         return QStringList();
00452     }
00453 
00454     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00455     const quint8 count = * (quint8 *)(udata + detailIndex + 21);
00456     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 17);
00457 
00458     QStringList equivalents;
00459 
00460     const char* data = dataFile.constData();
00461     for (int i = 0;  i < count;  i++) {
00462         equivalents.append(QString::fromUtf8(data + offset));
00463         offset += strlen(data + offset) + 1;
00464     }
00465 
00466     return equivalents;
00467 }
00468 
00469 QStringList KCharSelectData::approximateEquivalents(const QChar& c)
00470 {
00471     if(!openDataFile()) {
00472         return QStringList();
00473     }
00474     const int detailIndex = getDetailIndex(c);
00475     if(detailIndex == 0) {
00476         return QStringList();
00477     }
00478 
00479     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00480     const quint8 count = * (quint8 *)(udata + detailIndex + 16);
00481     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 12);
00482 
00483     QStringList approxEquivalents;
00484 
00485     const char* data = dataFile.constData();
00486     for (int i = 0;  i < count;  i++) {
00487         approxEquivalents.append(QString::fromUtf8(data + offset));
00488         offset += strlen(data + offset) + 1;
00489     }
00490 
00491     return approxEquivalents;
00492 }
00493 
00494 QStringList KCharSelectData::unihanInfo(const QChar& c)
00495 {
00496     if(!openDataFile()) {
00497         return QStringList();
00498     }
00499 
00500     const char* data = dataFile.constData();
00501     const uchar* udata = reinterpret_cast<const uchar*>(data);
00502     const quint32 offsetBegin = qFromLittleEndian<quint32>(udata+36);
00503     const quint32 offsetEnd = dataFile.size();
00504 
00505     int min = 0;
00506     int mid;
00507     int max = ((offsetEnd - offsetBegin) / 30) - 1;
00508     quint16 unicode = c.unicode();
00509 
00510     while (max >= min) {
00511         mid = (min + max) / 2;
00512         const quint16 midUnicode = qFromLittleEndian<quint16>(udata + offsetBegin + mid*30);
00513         if (unicode > midUnicode)
00514             min = mid + 1;
00515         else if (unicode < midUnicode)
00516             max = mid - 1;
00517         else {
00518             QStringList res;
00519             for(int i = 0; i < 7; i++) {
00520                 quint32 offset = qFromLittleEndian<quint32>(udata + offsetBegin + mid*30 + 2 + i*4);
00521                 if(offset != 0) {
00522                     res.append(QString::fromUtf8(data + offset));
00523                 } else {
00524                     res.append(QString());
00525                 }
00526             }
00527             return res;
00528         }
00529     }
00530 
00531     return QStringList();
00532 }
00533 
00534 bool KCharSelectData::isDisplayable(const QChar& c)
00535 {
00536     // Qt internally uses U+FDD0 and U+FDD1 to mark the beginning and the end of frames.
00537     // They should be seen as non-printable characters, as trying to display them leads
00538     //  to a crash caused by a Qt "noBlockInString" assertion.
00539     if(c == 0xFDD0 || c == 0xFDD1)
00540         return false;
00541     
00542     return c.isPrint() && !isIgnorable(c);
00543 }
00544 
00545 bool KCharSelectData::isIgnorable(const QChar& c)
00546 {
00547     /*
00548      * According to the Unicode standard, Default Ignorable Code Points
00549      * should be ignored unless explicitly supported. For example, U+202E
00550      * RIGHT-TO-LEFT-OVERRIDE ir printable according to Qt, but displaying
00551      * it gives the undesired effect of all text being turned RTL. We do not
00552      * have a way to "explicitly" support it, so we will treat it as
00553      * non-printable.
00554      *
00555      * There is a list of these on
00556      * http://unicode.org/Public/UNIDATA/DerivedCoreProperties.txt under the
00557      * property Default_Ignorable_Code_Point.
00558      */
00559 
00560     //NOTE: not very nice to hardcode these here; is it worth it to modify
00561     //      the binary data file to hold them?
00562     return c == 0x00AD || c == 0x034F || c == 0x115F || c == 0x1160 ||
00563            c == 0x17B4 || c == 0x17B5 || (c >= 0x180B && c <= 0x180D) ||
00564            (c >= 0x200B && c <= 0x200F) || (c >= 0x202A && c <= 0x202E) ||
00565            (c >= 0x2060 && c <= 0x206F) || c == 0x3164 ||
00566            (c >= 0xFE00 && c <= 0xFE0F) || c == 0xFEFF || c == 0xFFA0 ||
00567            (c >= 0xFFF0 && c <= 0xFFF8);
00568 }
00569 
00570 bool KCharSelectData::isCombining(const QChar &c)
00571 {
00572     return section(c) == i18nc("KCharSelect section name", "Combining Diacritical Marks");
00573     //FIXME: this is an imperfect test. There are many combining characters 
00574     //       that are outside of this section. See Grapheme_Extend in
00575     //       http://www.unicode.org/Public/UNIDATA/DerivedCoreProperties.txt
00576 }
00577 
00578 QString KCharSelectData::display(const QChar &c, const QFont &font)
00579 {
00580     if (!isDisplayable(c)) {
00581         return QString("<b>") + i18n("Non-printable") + "</b>";
00582     } else {
00583         QString s = QString("<font size=\"+4\" face=\"") + font.family() + "\">";
00584         if (isCombining(c)) {
00585             s += displayCombining(c);
00586         } else {
00587             s += "&#" + QString::number(c.unicode()) + ';';
00588         }
00589         s += "</font>";
00590         return s;
00591     }
00592 }
00593 
00594 QString KCharSelectData::displayCombining(const QChar &c)
00595 {
00596     /*
00597      * The purpose of this is to make it easier to see how a combining
00598      * character affects the text around it.
00599      * The initial plan was to use U+25CC DOTTED CIRCLE for this purpose,
00600      * as seen in pdfs from Unicode, but there seem to be a lot of alignment
00601      * problems with that.
00602      *
00603      * Eventually, it would be nice to determine whether the character
00604      * combines to the left or to the right, etc.
00605      */
00606     QString s = "&nbsp;&#" + QString::number(c.unicode()) + ";&nbsp;" +
00607                 " (ab&#" + QString::number(c.unicode()) + ";c)";
00608     return s;
00609 }
00610 
00611 QString KCharSelectData::categoryText(QChar::Category category)
00612 {
00613     switch (category) {
00614     case QChar::Other_Control: return i18n("Other, Control");
00615     case QChar::Other_Format: return i18n("Other, Format");
00616     case QChar::Other_NotAssigned: return i18n("Other, Not Assigned");
00617     case QChar::Other_PrivateUse: return i18n("Other, Private Use");
00618     case QChar::Other_Surrogate: return i18n("Other, Surrogate");
00619     case QChar::Letter_Lowercase: return i18n("Letter, Lowercase");
00620     case QChar::Letter_Modifier: return i18n("Letter, Modifier");
00621     case QChar::Letter_Other: return i18n("Letter, Other");
00622     case QChar::Letter_Titlecase: return i18n("Letter, Titlecase");
00623     case QChar::Letter_Uppercase: return i18n("Letter, Uppercase");
00624     case QChar::Mark_SpacingCombining: return i18n("Mark, Spacing Combining");
00625     case QChar::Mark_Enclosing: return i18n("Mark, Enclosing");
00626     case QChar::Mark_NonSpacing: return i18n("Mark, Non-Spacing");
00627     case QChar::Number_DecimalDigit: return i18n("Number, Decimal Digit");
00628     case QChar::Number_Letter: return i18n("Number, Letter");
00629     case QChar::Number_Other: return i18n("Number, Other");
00630     case QChar::Punctuation_Connector: return i18n("Punctuation, Connector");
00631     case QChar::Punctuation_Dash: return i18n("Punctuation, Dash");
00632     case QChar::Punctuation_Close: return i18n("Punctuation, Close");
00633     case QChar::Punctuation_FinalQuote: return i18n("Punctuation, Final Quote");
00634     case QChar::Punctuation_InitialQuote: return i18n("Punctuation, Initial Quote");
00635     case QChar::Punctuation_Other: return i18n("Punctuation, Other");
00636     case QChar::Punctuation_Open: return i18n("Punctuation, Open");
00637     case QChar::Symbol_Currency: return i18n("Symbol, Currency");
00638     case QChar::Symbol_Modifier: return i18n("Symbol, Modifier");
00639     case QChar::Symbol_Math: return i18n("Symbol, Math");
00640     case QChar::Symbol_Other: return i18n("Symbol, Other");
00641     case QChar::Separator_Line: return i18n("Separator, Line");
00642     case QChar::Separator_Paragraph: return i18n("Separator, Paragraph");
00643     case QChar::Separator_Space: return i18n("Separator, Space");
00644     default: return i18n("Unknown");
00645     }
00646 }
00647 
00648 QList<QChar> KCharSelectData::find(const QString& needle)
00649 {
00650     QSet<quint16> result;
00651 
00652     QList<QChar> returnRes;
00653     QString simplified = needle.simplified();
00654     QStringList searchStrings = splitString(needle.simplified());
00655 
00656     if(simplified.length() == 1) {
00657         // search for hex representation of the character
00658         searchStrings = QStringList(formatCode(simplified.at(0).unicode()));
00659     }
00660 
00661     if (searchStrings.count() == 0) {
00662         return returnRes;
00663     }
00664 
00665     QRegExp regExp("^(|u\\+|U\\+|0x|0X)([A-Fa-f0-9]{4})$");
00666     foreach(const QString &s, searchStrings) {
00667         if(regExp.exactMatch(s)) {
00668             returnRes.append(regExp.cap(2).toInt(0, 16));
00669             // search for "1234" instead of "0x1234"
00670             if (s.length() == 6) {
00671                 searchStrings[searchStrings.indexOf(s)] = regExp.cap(2);
00672             }
00673         }
00674         // try to parse string as decimal number
00675         bool ok;
00676         int unicode = s.toInt(&ok);
00677         if (ok && unicode >= 0 && unicode <= 0xFFFF) {
00678             returnRes.append(unicode);
00679         }
00680     }
00681 
00682     bool firstSubString = true;
00683     foreach(const QString &s, searchStrings) {
00684         QSet<quint16> partResult = getMatchingChars(s.toLower());
00685         if (firstSubString) {
00686             result = partResult;
00687             firstSubString = false;
00688         } else {
00689             result = result.intersect(partResult);
00690         }
00691     }
00692 
00693     // remove results found by matching the code point to prevent duplicate results
00694     // while letting these characters stay at the beginning
00695     foreach(const QChar &c, returnRes) {
00696         result.remove(c.unicode());
00697     }
00698 
00699     QList<quint16> sortedResult = result.toList();
00700     qSort(sortedResult);
00701 
00702     foreach(const quint16 &c, sortedResult) {
00703         returnRes.append(c);
00704     }
00705 
00706     return returnRes;
00707 }
00708 
00709 QSet<quint16> KCharSelectData::getMatchingChars(const QString& s)
00710 {
00711     futureIndex.waitForFinished();
00712     const Index index = futureIndex;
00713     Index::const_iterator pos = index.lowerBound(s);
00714     QSet<quint16> result;
00715 
00716     while (pos != index.constEnd() && pos.key().startsWith(s)) {
00717         foreach (const quint16 &c, pos.value()) {
00718             result.insert(c);
00719         }
00720         ++pos;
00721     }
00722 
00723     return result;
00724 }
00725 
00726 QStringList KCharSelectData::splitString(const QString& s)
00727 {
00728     QStringList result;
00729     int start = 0;
00730     int end = 0;
00731     int length = s.length();
00732     while (end < length) {
00733         while (end < length && (s[end].isLetterOrNumber() || s[end] == '+')) {
00734             end++;
00735         }
00736         if (start != end) {
00737             result.append(s.mid(start, end - start));
00738         }
00739         start = end;
00740         while (end < length && !(s[end].isLetterOrNumber() || s[end] == '+')) {
00741             end++;
00742             start++;
00743         }
00744     }
00745     return result;
00746 }
00747 
00748 void KCharSelectData::appendToIndex(Index *index, quint16 unicode, const QString& s)
00749 {
00750     const QStringList strings = splitString(s);
00751     foreach(const QString &s, strings) {
00752         (*index)[s.toLower()].append(unicode);
00753     }
00754 }
00755 
00756 Index KCharSelectData::createIndex(const QByteArray& dataFile)
00757 {
00758     Index i;
00759 
00760     // character names
00761     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00762     const char* data = dataFile.constData();
00763     const quint32 nameOffsetBegin = qFromLittleEndian<quint32>(udata+4);
00764     const quint32 nameOffsetEnd = qFromLittleEndian<quint32>(udata+8);
00765 
00766     int max = ((nameOffsetEnd - nameOffsetBegin) / 6) - 1;
00767 
00768     for (int pos = 0; pos <= max; pos++) {
00769         const quint16 unicode = qFromLittleEndian<quint16>(udata + nameOffsetBegin + pos*6);
00770         quint32 offset = qFromLittleEndian<quint32>(udata + nameOffsetBegin + pos*6 + 2);
00771         appendToIndex(&i, unicode, QString(data + offset));
00772     }
00773 
00774     // details
00775     const quint32 detailsOffsetBegin = qFromLittleEndian<quint32>(udata+12);
00776     const quint32 detailsOffsetEnd = qFromLittleEndian<quint32>(udata+16);
00777 
00778     max = ((detailsOffsetEnd - detailsOffsetBegin) / 27) - 1;
00779 
00780     for (int pos = 0; pos <= max; pos++) {
00781         const quint16 unicode = qFromLittleEndian<quint16>(udata + detailsOffsetBegin + pos*27);
00782 
00783         // aliases
00784         const quint8 aliasCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 6);
00785         quint32 aliasOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 2);
00786 
00787         for (int j = 0;  j < aliasCount;  j++) {
00788             appendToIndex(&i, unicode, QString::fromUtf8(data + aliasOffset));
00789             aliasOffset += strlen(data + aliasOffset) + 1;
00790         }
00791 
00792         // notes
00793         const quint8 notesCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 11);
00794         quint32 notesOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 7);
00795 
00796         for (int j = 0;  j < notesCount;  j++) {
00797             appendToIndex(&i, unicode, QString::fromUtf8(data + notesOffset));
00798             notesOffset += strlen(data + notesOffset) + 1;
00799         }
00800 
00801         // approximate equivalents
00802         const quint8 apprCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 16);
00803         quint32 apprOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 12);
00804 
00805         for (int j = 0;  j < apprCount;  j++) {
00806             appendToIndex(&i, unicode, QString::fromUtf8(data + apprOffset));
00807             apprOffset += strlen(data + apprOffset) + 1;
00808         }
00809 
00810         // equivalents
00811         const quint8 equivCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 21);
00812         quint32 equivOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 17);
00813 
00814         for (int j = 0;  j < equivCount;  j++) {
00815             appendToIndex(&i, unicode, QString::fromUtf8(data + equivOffset));
00816             equivOffset += strlen(data + equivOffset) + 1;
00817         }
00818 
00819         // see also - convert to string (hex)
00820         const quint8 seeAlsoCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 26);
00821         quint32 seeAlsoOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 22);
00822 
00823         for (int j = 0;  j < seeAlsoCount;  j++) {
00824             quint16 unicode = qFromLittleEndian<quint16> (udata + seeAlsoOffset);
00825             appendToIndex(&i, unicode, QString::number(unicode, 16));
00826             equivOffset += strlen(data + equivOffset) + 1;
00827         }
00828     }
00829 
00830     // unihan data
00831     // temporary disabled due to the huge amount of data
00832 //     const quint32 unihanOffsetBegin = qFromLittleEndian<quint32>(udata+36);
00833 //     const quint32 unihanOffsetEnd = dataFile.size();
00834 //     max = ((unihanOffsetEnd - unihanOffsetBegin) / 30) - 1;
00835 //
00836 //     for (int pos = 0; pos <= max; pos++) {
00837 //         const quint16 unicode = qFromLittleEndian<quint16>(udata + unihanOffsetBegin + pos*30);
00838 //         for(int j = 0; j < 7; j++) {
00839 //             quint32 offset = qFromLittleEndian<quint32>(udata + unihanOffsetBegin + pos*30 + 2 + j*4);
00840 //             if(offset != 0) {
00841 //                 appendToIndex(&i, unicode, QString::fromUtf8(data + offset));
00842 //             }
00843 //         }
00844 //     }
00845 
00846     return i;
00847 }

KDEUI

Skip menu "KDEUI"
  • Main Page
  • Modules
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • Kate
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.6.1
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal