• Skip to content
  • Skip to link menu
  • KDE API Reference
  • kdelibs-4.14.38 API Reference
  • KDE Home
  • Contact Us
 

KDEUI

  • kdeui
  • widgets
kcharselectdata.cpp
Go to the documentation of this file.
1/* This file is part of the KDE libraries
2
3 Copyright (C) 2007 Daniel Laidig <d.laidig@gmx.de>
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public
7 License as published by the Free Software Foundation; either
8 version 2 of the License, or (at your option) any later version.
9
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public License
16 along with this library; see the file COPYING.LIB. If not, write to
17 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 Boston, MA 02110-1301, USA.
19*/
20
21#include "kcharselectdata_p.h"
22
23#include <QStringList>
24#include <QFile>
25#include <qendian.h>
26#include <QtConcurrentRun>
27
28#include <string.h>
29#include <klocalizedstring.h>
30#include <kstandarddirs.h>
31
32/* constants for hangul (de)composition, see UAX #15 */
33#define SBase 0xAC00
34#define LBase 0x1100
35#define VBase 0x1161
36#define TBase 0x11A7
37#define LCount 19
38#define VCount 21
39#define TCount 28
40#define NCount (VCount * TCount)
41#define SCount (LCount * NCount)
42
43static const char JAMO_L_TABLE[][4] =
44 {
45 "G", "GG", "N", "D", "DD", "R", "M", "B", "BB",
46 "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H"
47 };
48
49static const char JAMO_V_TABLE[][4] =
50 {
51 "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O",
52 "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI",
53 "YU", "EU", "YI", "I"
54 };
55
56static const char JAMO_T_TABLE[][4] =
57 {
58 "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM",
59 "LB", "LS", "LT", "LP", "LH", "M", "B", "BS",
60 "S", "SS", "NG", "J", "C", "K", "T", "P", "H"
61 };
62
63bool KCharSelectData::openDataFile()
64{
65 if(!dataFile.isEmpty()) {
66 return true;
67 } else {
68 QFile file(KStandardDirs::locate("data", "kcharselect/kcharselect-data"));
69 if (!file.open(QIODevice::ReadOnly)) {
70 return false;
71 }
72 dataFile = file.readAll();
73 file.close();
74 futureIndex = QtConcurrent::run(this, &KCharSelectData::createIndex, dataFile);
75 return true;
76 }
77}
78
79quint32 KCharSelectData::getDetailIndex(const QChar& c) const
80{
81 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
82 // Convert from little-endian, so that this code works on PPC too.
83 // http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=482286
84 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+12);
85 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+16);
86
87 int min = 0;
88 int mid;
89 int max = ((offsetEnd - offsetBegin) / 27) - 1;
90
91 quint16 unicode = c.unicode();
92
93 static quint16 most_recent_searched;
94 static quint32 most_recent_result;
95
96
97 if (unicode == most_recent_searched)
98 return most_recent_result;
99
100 most_recent_searched = unicode;
101
102 while (max >= min) {
103 mid = (min + max) / 2;
104 const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*27);
105 if (unicode > midUnicode)
106 min = mid + 1;
107 else if (unicode < midUnicode)
108 max = mid - 1;
109 else {
110 most_recent_result = offsetBegin + mid*27;
111
112 return most_recent_result;
113 }
114 }
115
116 most_recent_result = 0;
117 return 0;
118}
119
120QString KCharSelectData::formatCode(ushort code, int length, const QString& prefix, int base)
121{
122 QString s = QString::number(code, base).toUpper();
123 while (s.size() < length)
124 s.prepend('0');
125 s.prepend(prefix);
126 return s;
127}
128
129QList<QChar> KCharSelectData::blockContents(int block)
130{
131 if(!openDataFile()) {
132 return QList<QChar>();
133 }
134
135 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
136 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+20);
137 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+24);
138
139 int max = ((offsetEnd - offsetBegin) / 4) - 1;
140
141 QList<QChar> res;
142
143 if(block > max)
144 return res;
145
146 quint16 unicodeBegin = qFromLittleEndian<quint16>(data + offsetBegin + block*4);
147 quint16 unicodeEnd = qFromLittleEndian<quint16>(data + offsetBegin + block*4 + 2);
148
149 while(unicodeBegin < unicodeEnd) {
150 res.append(unicodeBegin);
151 unicodeBegin++;
152 }
153 res.append(unicodeBegin); // Be carefull when unicodeEnd==0xffff
154
155 return res;
156}
157
158QList<int> KCharSelectData::sectionContents(int section)
159{
160 if(!openDataFile()) {
161 return QList<int>();
162 }
163
164 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
165 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+28);
166 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+32);
167
168 int max = ((offsetEnd - offsetBegin) / 4) - 1;
169
170 QList<int> res;
171
172 if(section > max)
173 return res;
174
175 for(int i = 0; i <= max; i++) {
176 const quint16 currSection = qFromLittleEndian<quint16>(data + offsetBegin + i*4);
177 if(currSection == section) {
178 res.append( qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) );
179 }
180 }
181
182 return res;
183}
184
185QStringList KCharSelectData::sectionList()
186{
187 if(!openDataFile()) {
188 return QStringList();
189 }
190
191 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
192 const quint32 stringBegin = qFromLittleEndian<quint32>(udata+24);
193 const quint32 stringEnd = qFromLittleEndian<quint32>(udata+28);
194
195 const char* data = dataFile.constData();
196 QStringList list;
197 quint32 i = stringBegin;
198 while(i < stringEnd) {
199 list.append(i18nc("KCharSelect section name", data + i));
200 i += strlen(data + i) + 1;
201 }
202
203 return list;
204}
205
206QString KCharSelectData::block(const QChar& c)
207{
208 return blockName(blockIndex(c));
209}
210
211QString KCharSelectData::section(const QChar& c)
212{
213 return sectionName(sectionIndex(blockIndex(c)));
214}
215
216QString KCharSelectData::name(const QChar& c)
217{
218 if(!openDataFile()) {
219 return QString();
220 }
221
222 ushort unicode = c.unicode();
223 if ((unicode >= 0x3400 && unicode <= 0x4DB5)
224 || (unicode >= 0x4e00 && unicode <= 0x9fa5)) {
225 // || (unicode >= 0x20000 && unicode <= 0x2A6D6) // useless, since limited to 16 bit
226 return "CJK UNIFIED IDEOGRAPH-" + QString::number(unicode, 16);
227 } else if (c >= 0xac00 && c <= 0xd7af) {
228 /* compute hangul syllable name as per UAX #15 */
229 int SIndex = c.unicode() - SBase;
230 int LIndex, VIndex, TIndex;
231
232 if (SIndex < 0 || SIndex >= SCount)
233 return QString();
234
235 LIndex = SIndex / NCount;
236 VIndex = (SIndex % NCount) / TCount;
237 TIndex = SIndex % TCount;
238
239 return QLatin1String("HANGUL SYLLABLE ") + QLatin1String(JAMO_L_TABLE[LIndex])
240 + QLatin1String(JAMO_V_TABLE[VIndex]) + QLatin1String(JAMO_T_TABLE[TIndex]);
241 } else if (unicode >= 0xD800 && unicode <= 0xDB7F)
242 return i18n("<Non Private Use High Surrogate>");
243 else if (unicode >= 0xDB80 && unicode <= 0xDBFF)
244 return i18n("<Private Use High Surrogate>");
245 else if (unicode >= 0xDC00 && unicode <= 0xDFFF)
246 return i18n("<Low Surrogate>");
247 else if (unicode >= 0xE000 && unicode <= 0xF8FF)
248 return i18n("<Private Use>");
249// else if (unicode >= 0xF0000 && unicode <= 0xFFFFD) // 16 bit!
250// return i18n("<Plane 15 Private Use>");
251// else if (unicode >= 0x100000 && unicode <= 0x10FFFD)
252// return i18n("<Plane 16 Private Use>");
253 else {
254 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
255 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+4);
256 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+8);
257
258 int min = 0;
259 int mid;
260 int max = ((offsetEnd - offsetBegin) / 6) - 1;
261 QString s;
262
263 while (max >= min) {
264 mid = (min + max) / 2;
265 const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*6);
266 if (unicode > midUnicode)
267 min = mid + 1;
268 else if (unicode < midUnicode)
269 max = mid - 1;
270 else {
271 quint32 offset = qFromLittleEndian<quint32>(data + offsetBegin + mid*6 + 2);
272 s = QString(dataFile.constData() + offset + 1);
273 break;
274 }
275 }
276
277 if (s.isNull()) {
278 return i18n("<not assigned>");
279 } else {
280 return s;
281 }
282 }
283}
284
285int KCharSelectData::blockIndex(const QChar& c)
286{
287 if(!openDataFile()) {
288 return 0;
289 }
290
291 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
292 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+20);
293 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+24);
294 const quint16 unicode = c.unicode();
295
296 int max = ((offsetEnd - offsetBegin) / 4) - 1;
297
298 int i = 0;
299
300 while (unicode > qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) && i < max) {
301 i++;
302 }
303
304 return i;
305}
306
307int KCharSelectData::sectionIndex(int block)
308{
309 if(!openDataFile()) {
310 return 0;
311 }
312
313 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
314 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+28);
315 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+32);
316
317 int max = ((offsetEnd - offsetBegin) / 4) - 1;
318
319 for(int i = 0; i <= max; i++) {
320 if( qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) == block) {
321 return qFromLittleEndian<quint16>(data + offsetBegin + i*4);
322 }
323 }
324
325 return 0;
326}
327
328QString KCharSelectData::blockName(int index)
329{
330 if(!openDataFile()) {
331 return QString();
332 }
333
334 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
335 const quint32 stringBegin = qFromLittleEndian<quint32>(udata+16);
336 const quint32 stringEnd = qFromLittleEndian<quint32>(udata+20);
337
338 quint32 i = stringBegin;
339 int currIndex = 0;
340
341 const char* data = dataFile.constData();
342 while(i < stringEnd && currIndex < index) {
343 i += strlen(data + i) + 1;
344 currIndex++;
345 }
346
347 return i18nc("KCharselect unicode block name", data + i);
348}
349
350QString KCharSelectData::sectionName(int index)
351{
352 if(!openDataFile()) {
353 return QString();
354 }
355
356 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
357 const quint32 stringBegin = qFromLittleEndian<quint32>(udata+24);
358 const quint32 stringEnd = qFromLittleEndian<quint32>(udata+28);
359
360 quint32 i = stringBegin;
361 int currIndex = 0;
362
363 const char* data = dataFile.constData();
364 while(i < stringEnd && currIndex < index) {
365 i += strlen(data + i) + 1;
366 currIndex++;
367 }
368
369 return i18nc("KCharselect unicode section name", data + i);
370}
371
372QStringList KCharSelectData::aliases(const QChar& c)
373{
374 if(!openDataFile()) {
375 return QStringList();
376 }
377 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
378 const int detailIndex = getDetailIndex(c);
379 if(detailIndex == 0) {
380 return QStringList();
381 }
382
383 const quint8 count = * (quint8 *)(udata + detailIndex + 6);
384 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 2);
385
386 QStringList aliases;
387
388 const char* data = dataFile.constData();
389 for (int i = 0; i < count; i++) {
390 aliases.append(QString::fromLatin1(data + offset));
391 offset += strlen(data + offset) + 1;
392 }
393 return aliases;
394}
395
396QStringList KCharSelectData::notes(const QChar& c)
397{
398 if(!openDataFile()) {
399 return QStringList();
400 }
401 const int detailIndex = getDetailIndex(c);
402 if(detailIndex == 0) {
403 return QStringList();
404 }
405
406 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
407 const quint8 count = * (quint8 *)(udata + detailIndex + 11);
408 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 7);
409
410 QStringList notes;
411
412 const char* data = dataFile.constData();
413 for (int i = 0; i < count; i++) {
414 notes.append(QString::fromLatin1(data + offset));
415 offset += strlen(data + offset) + 1;
416 }
417
418 return notes;
419}
420
421QList<QChar> KCharSelectData::seeAlso(const QChar& c)
422{
423 if(!openDataFile()) {
424 return QList<QChar>();
425 }
426 const int detailIndex = getDetailIndex(c);
427 if(detailIndex == 0) {
428 return QList<QChar>();
429 }
430
431 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
432 const quint8 count = * (quint8 *)(udata + detailIndex + 26);
433 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 22);
434
435 QList<QChar> seeAlso;
436
437 for (int i = 0; i < count; i++) {
438 seeAlso.append(qFromLittleEndian<quint16> (udata + offset));
439 offset += 2;
440 }
441
442 return seeAlso;
443}
444
445QStringList KCharSelectData::equivalents(const QChar& c)
446{
447 if(!openDataFile()) {
448 return QStringList();
449 }
450 const int detailIndex = getDetailIndex(c);
451 if(detailIndex == 0) {
452 return QStringList();
453 }
454
455 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
456 const quint8 count = * (quint8 *)(udata + detailIndex + 21);
457 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 17);
458
459 QStringList equivalents;
460
461 const char* data = dataFile.constData();
462 for (int i = 0; i < count; i++) {
463 equivalents.append(QString::fromLatin1(data + offset));
464 offset += strlen(data + offset) + 1;
465 }
466
467 return equivalents;
468}
469
470QStringList KCharSelectData::approximateEquivalents(const QChar& c)
471{
472 if(!openDataFile()) {
473 return QStringList();
474 }
475 const int detailIndex = getDetailIndex(c);
476 if(detailIndex == 0) {
477 return QStringList();
478 }
479
480 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
481 const quint8 count = * (quint8 *)(udata + detailIndex + 16);
482 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 12);
483
484 QStringList approxEquivalents;
485
486 const char* data = dataFile.constData();
487 for (int i = 0; i < count; i++) {
488 approxEquivalents.append(QString::fromLatin1(data + offset));
489 offset += strlen(data + offset) + 1;
490 }
491
492 return approxEquivalents;
493}
494
495QStringList KCharSelectData::unihanInfo(const QChar& c)
496{
497 if(!openDataFile()) {
498 return QStringList();
499 }
500
501 const char* data = dataFile.constData();
502 const uchar* udata = reinterpret_cast<const uchar*>(data);
503 const quint32 offsetBegin = qFromLittleEndian<quint32>(udata+36);
504 const quint32 offsetEnd = dataFile.size();
505
506 int min = 0;
507 int mid;
508 int max = ((offsetEnd - offsetBegin) / 30) - 1;
509 quint16 unicode = c.unicode();
510
511 while (max >= min) {
512 mid = (min + max) / 2;
513 const quint16 midUnicode = qFromLittleEndian<quint16>(udata + offsetBegin + mid*30);
514 if (unicode > midUnicode)
515 min = mid + 1;
516 else if (unicode < midUnicode)
517 max = mid - 1;
518 else {
519 QStringList res;
520 for(int i = 0; i < 7; i++) {
521 quint32 offset = qFromLittleEndian<quint32>(udata + offsetBegin + mid*30 + 2 + i*4);
522 if(offset != 0) {
523 res.append(QString::fromLatin1(data + offset));
524 } else {
525 res.append(QString());
526 }
527 }
528 return res;
529 }
530 }
531
532 return QStringList();
533}
534
535QChar::Category KCharSelectData::category(const QChar& c)
536{
537 if(!openDataFile()) {
538 return c.category();
539 }
540
541 ushort unicode = c.unicode();
542
543 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
544 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+4);
545 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+8);
546
547 int min = 0;
548 int mid;
549 int max = ((offsetEnd - offsetBegin) / 6) - 1;
550 QString s;
551
552 while (max >= min) {
553 mid = (min + max) / 2;
554 const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*6);
555 if (unicode > midUnicode)
556 min = mid + 1;
557 else if (unicode < midUnicode)
558 max = mid - 1;
559 else {
560 quint32 offset = qFromLittleEndian<quint32>(data + offsetBegin + mid*6 + 2);
561 const quint8 categoryCode = * (quint8 *)(data + offset);
562 return QChar::Category(categoryCode);
563 }
564 }
565
566 return c.category();
567}
568
569bool KCharSelectData::isPrint(const QChar& c)
570{
571 QChar::Category cat = category(c);
572 return !(cat == QChar::Other_Control || cat == QChar::Other_NotAssigned);
573}
574
575bool KCharSelectData::isDisplayable(const QChar& c)
576{
577 // Qt internally uses U+FDD0 and U+FDD1 to mark the beginning and the end of frames.
578 // They should be seen as non-printable characters, as trying to display them leads
579 // to a crash caused by a Qt "noBlockInString" assertion.
580 if(c == 0xFDD0 || c == 0xFDD1)
581 return false;
582
583 return !isIgnorable(c) && isPrint(c);
584}
585
586bool KCharSelectData::isIgnorable(const QChar& c)
587{
588 /*
589 * According to the Unicode standard, Default Ignorable Code Points
590 * should be ignored unless explicitly supported. For example, U+202E
591 * RIGHT-TO-LEFT-OVERRIDE ir printable according to Qt, but displaying
592 * it gives the undesired effect of all text being turned RTL. We do not
593 * have a way to "explicitly" support it, so we will treat it as
594 * non-printable.
595 *
596 * There is a list of these on
597 * http://unicode.org/Public/UNIDATA/DerivedCoreProperties.txt under the
598 * property Default_Ignorable_Code_Point.
599 */
600
601 //NOTE: not very nice to hardcode these here; is it worth it to modify
602 // the binary data file to hold them?
603 return c == 0x00AD || c == 0x034F || c == 0x115F || c == 0x1160 ||
604 c == 0x17B4 || c == 0x17B5 || (c >= 0x180B && c <= 0x180D) ||
605 (c >= 0x200B && c <= 0x200F) || (c >= 0x202A && c <= 0x202E) ||
606 (c >= 0x2060 && c <= 0x206F) || c == 0x3164 ||
607 (c >= 0xFE00 && c <= 0xFE0F) || c == 0xFEFF || c == 0xFFA0 ||
608 (c >= 0xFFF0 && c <= 0xFFF8);
609}
610
611bool KCharSelectData::isCombining(const QChar &c)
612{
613 return section(c) == i18nc("KCharSelect section name", "Combining Diacritical Marks");
614 //FIXME: this is an imperfect test. There are many combining characters
615 // that are outside of this section. See Grapheme_Extend in
616 // http://www.unicode.org/Public/UNIDATA/DerivedCoreProperties.txt
617}
618
619QString KCharSelectData::display(const QChar &c, const QFont &font)
620{
621 if (!isDisplayable(c)) {
622 return QString("<b>") + i18n("Non-printable") + "</b>";
623 } else {
624 QString s = QString("<font size=\"+4\" face=\"") + font.family() + "\">";
625 if (isCombining(c)) {
626 s += displayCombining(c);
627 } else {
628 s += "&#" + QString::number(c.unicode()) + ';';
629 }
630 s += "</font>";
631 return s;
632 }
633}
634
635QString KCharSelectData::displayCombining(const QChar &c)
636{
637 /*
638 * The purpose of this is to make it easier to see how a combining
639 * character affects the text around it.
640 * The initial plan was to use U+25CC DOTTED CIRCLE for this purpose,
641 * as seen in pdfs from Unicode, but there seem to be a lot of alignment
642 * problems with that.
643 *
644 * Eventually, it would be nice to determine whether the character
645 * combines to the left or to the right, etc.
646 */
647 QString s = "&nbsp;&#" + QString::number(c.unicode()) + ";&nbsp;" +
648 " (ab&#" + QString::number(c.unicode()) + ";c)";
649 return s;
650}
651
652QString KCharSelectData::categoryText(QChar::Category category)
653{
654 switch (category) {
655 case QChar::Other_Control: return i18n("Other, Control");
656 case QChar::Other_Format: return i18n("Other, Format");
657 case QChar::Other_NotAssigned: return i18n("Other, Not Assigned");
658 case QChar::Other_PrivateUse: return i18n("Other, Private Use");
659 case QChar::Other_Surrogate: return i18n("Other, Surrogate");
660 case QChar::Letter_Lowercase: return i18n("Letter, Lowercase");
661 case QChar::Letter_Modifier: return i18n("Letter, Modifier");
662 case QChar::Letter_Other: return i18n("Letter, Other");
663 case QChar::Letter_Titlecase: return i18n("Letter, Titlecase");
664 case QChar::Letter_Uppercase: return i18n("Letter, Uppercase");
665 case QChar::Mark_SpacingCombining: return i18n("Mark, Spacing Combining");
666 case QChar::Mark_Enclosing: return i18n("Mark, Enclosing");
667 case QChar::Mark_NonSpacing: return i18n("Mark, Non-Spacing");
668 case QChar::Number_DecimalDigit: return i18n("Number, Decimal Digit");
669 case QChar::Number_Letter: return i18n("Number, Letter");
670 case QChar::Number_Other: return i18n("Number, Other");
671 case QChar::Punctuation_Connector: return i18n("Punctuation, Connector");
672 case QChar::Punctuation_Dash: return i18n("Punctuation, Dash");
673 case QChar::Punctuation_Close: return i18n("Punctuation, Close");
674 case QChar::Punctuation_FinalQuote: return i18n("Punctuation, Final Quote");
675 case QChar::Punctuation_InitialQuote: return i18n("Punctuation, Initial Quote");
676 case QChar::Punctuation_Other: return i18n("Punctuation, Other");
677 case QChar::Punctuation_Open: return i18n("Punctuation, Open");
678 case QChar::Symbol_Currency: return i18n("Symbol, Currency");
679 case QChar::Symbol_Modifier: return i18n("Symbol, Modifier");
680 case QChar::Symbol_Math: return i18n("Symbol, Math");
681 case QChar::Symbol_Other: return i18n("Symbol, Other");
682 case QChar::Separator_Line: return i18n("Separator, Line");
683 case QChar::Separator_Paragraph: return i18n("Separator, Paragraph");
684 case QChar::Separator_Space: return i18n("Separator, Space");
685 default: return i18n("Unknown");
686 }
687}
688
689QList<QChar> KCharSelectData::find(const QString& needle)
690{
691 QSet<quint16> result;
692
693 QList<QChar> returnRes;
694 QString simplified = needle.simplified();
695 QStringList searchStrings = splitString(needle.simplified());
696
697 if(simplified.length() == 1) {
698 // search for hex representation of the character
699 searchStrings = QStringList(formatCode(simplified.at(0).unicode()));
700 }
701
702 if (searchStrings.count() == 0) {
703 return returnRes;
704 }
705
706 QRegExp regExp("^(|u\\+|U\\+|0x|0X)([A-Fa-f0-9]{4})$");
707 foreach(const QString &s, searchStrings) {
708 if(regExp.exactMatch(s)) {
709 returnRes.append(regExp.cap(2).toInt(0, 16));
710 // search for "1234" instead of "0x1234"
711 if (s.length() == 6) {
712 searchStrings[searchStrings.indexOf(s)] = regExp.cap(2);
713 }
714 }
715 // try to parse string as decimal number
716 bool ok;
717 int unicode = s.toInt(&ok);
718 if (ok && unicode >= 0 && unicode <= 0xFFFF) {
719 returnRes.append(unicode);
720 }
721 }
722
723 bool firstSubString = true;
724 foreach(const QString &s, searchStrings) {
725 QSet<quint16> partResult = getMatchingChars(s.toLower());
726 if (firstSubString) {
727 result = partResult;
728 firstSubString = false;
729 } else {
730 result = result.intersect(partResult);
731 }
732 }
733
734 // remove results found by matching the code point to prevent duplicate results
735 // while letting these characters stay at the beginning
736 foreach(const QChar &c, returnRes) {
737 result.remove(c.unicode());
738 }
739
740 QList<quint16> sortedResult = result.toList();
741 qSort(sortedResult);
742
743 foreach(const quint16 &c, sortedResult) {
744 returnRes.append(c);
745 }
746
747 return returnRes;
748}
749
750QSet<quint16> KCharSelectData::getMatchingChars(const QString& s)
751{
752 futureIndex.waitForFinished();
753 const Index index = futureIndex;
754 Index::const_iterator pos = index.lowerBound(s);
755 QSet<quint16> result;
756
757 while (pos != index.constEnd() && pos.key().startsWith(s)) {
758 foreach (const quint16 &c, pos.value()) {
759 result.insert(c);
760 }
761 ++pos;
762 }
763
764 return result;
765}
766
767QStringList KCharSelectData::splitString(const QString& s)
768{
769 QStringList result;
770 int start = 0;
771 int end = 0;
772 int length = s.length();
773 while (end < length) {
774 while (end < length && (s[end].isLetterOrNumber() || s[end] == '+')) {
775 end++;
776 }
777 if (start != end) {
778 result.append(s.mid(start, end - start));
779 }
780 start = end;
781 while (end < length && !(s[end].isLetterOrNumber() || s[end] == '+')) {
782 end++;
783 start++;
784 }
785 }
786 return result;
787}
788
789void KCharSelectData::appendToIndex(Index *index, quint16 unicode, const QString& s)
790{
791 const QStringList strings = splitString(s);
792 foreach(const QString &s, strings) {
793 (*index)[s.toLower()].append(unicode);
794 }
795}
796
797Index KCharSelectData::createIndex(const QByteArray& dataFile)
798{
799 Index i;
800
801 // character names
802 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
803 const char* data = dataFile.constData();
804 const quint32 nameOffsetBegin = qFromLittleEndian<quint32>(udata+4);
805 const quint32 nameOffsetEnd = qFromLittleEndian<quint32>(udata+8);
806
807 int max = ((nameOffsetEnd - nameOffsetBegin) / 6) - 1;
808
809 for (int pos = 0; pos <= max; pos++) {
810 const quint16 unicode = qFromLittleEndian<quint16>(udata + nameOffsetBegin + pos*6);
811 quint32 offset = qFromLittleEndian<quint32>(udata + nameOffsetBegin + pos*6 + 2);
812 appendToIndex(&i, unicode, QString(data + offset + 1));
813 }
814
815 // details
816 const quint32 detailsOffsetBegin = qFromLittleEndian<quint32>(udata+12);
817 const quint32 detailsOffsetEnd = qFromLittleEndian<quint32>(udata+16);
818
819 max = ((detailsOffsetEnd - detailsOffsetBegin) / 27) - 1;
820
821 for (int pos = 0; pos <= max; pos++) {
822 const quint16 unicode = qFromLittleEndian<quint16>(udata + detailsOffsetBegin + pos*27);
823
824 // aliases
825 const quint8 aliasCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 6);
826 quint32 aliasOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 2);
827
828 for (int j = 0; j < aliasCount; j++) {
829 appendToIndex(&i, unicode, QString::fromLatin1(data + aliasOffset));
830 aliasOffset += strlen(data + aliasOffset) + 1;
831 }
832
833 // notes
834 const quint8 notesCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 11);
835 quint32 notesOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 7);
836
837 for (int j = 0; j < notesCount; j++) {
838 appendToIndex(&i, unicode, QString::fromLatin1(data + notesOffset));
839 notesOffset += strlen(data + notesOffset) + 1;
840 }
841
842 // approximate equivalents
843 const quint8 apprCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 16);
844 quint32 apprOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 12);
845
846 for (int j = 0; j < apprCount; j++) {
847 appendToIndex(&i, unicode, QString::fromLatin1(data + apprOffset));
848 apprOffset += strlen(data + apprOffset) + 1;
849 }
850
851 // equivalents
852 const quint8 equivCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 21);
853 quint32 equivOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 17);
854
855 for (int j = 0; j < equivCount; j++) {
856 appendToIndex(&i, unicode, QString::fromLatin1(data + equivOffset));
857 equivOffset += strlen(data + equivOffset) + 1;
858 }
859
860 // see also - convert to string (hex)
861 const quint8 seeAlsoCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 26);
862 quint32 seeAlsoOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 22);
863
864 for (int j = 0; j < seeAlsoCount; j++) {
865 quint16 seeAlso = qFromLittleEndian<quint16> (udata + seeAlsoOffset);
866 appendToIndex(&i, unicode, formatCode(seeAlso, 4, QString()));
867 equivOffset += strlen(data + equivOffset) + 1;
868 }
869 }
870
871 // unihan data
872 // temporary disabled due to the huge amount of data
873// const quint32 unihanOffsetBegin = qFromLittleEndian<quint32>(udata+36);
874// const quint32 unihanOffsetEnd = dataFile.size();
875// max = ((unihanOffsetEnd - unihanOffsetBegin) / 30) - 1;
876//
877// for (int pos = 0; pos <= max; pos++) {
878// const quint16 unicode = qFromLittleEndian<quint16>(udata + unihanOffsetBegin + pos*30);
879// for(int j = 0; j < 7; j++) {
880// quint32 offset = qFromLittleEndian<quint32>(udata + unihanOffsetBegin + pos*30 + 2 + j*4);
881// if(offset != 0) {
882// appendToIndex(&i, unicode, QString::fromUtf8(data + offset));
883// }
884// }
885// }
886
887 return i;
888}
KStandardDirs::locate
static QString locate(const char *type, const QString &filename, const KComponentData &cData=KGlobal::mainComponent())
QList
QSet
JAMO_V_TABLE
static const char JAMO_V_TABLE[][4]
Definition: kcharselectdata.cpp:49
SCount
#define SCount
Definition: kcharselectdata.cpp:41
JAMO_T_TABLE
static const char JAMO_T_TABLE[][4]
Definition: kcharselectdata.cpp:56
JAMO_L_TABLE
static const char JAMO_L_TABLE[][4]
Definition: kcharselectdata.cpp:43
NCount
#define NCount
Definition: kcharselectdata.cpp:40
SBase
#define SBase
Definition: kcharselectdata.cpp:33
TCount
#define TCount
Definition: kcharselectdata.cpp:39
klocalizedstring.h
i18n
QString i18n(const char *text)
i18nc
QString i18nc(const char *ctxt, const char *text)
kstandarddirs.h
KStandardGuiItem::ok
KGuiItem ok()
Returns the 'Ok' gui item.
Definition: kstandardguiitem.cpp:107
KStandardShortcut::end
const KShortcut & end()
Goto end of the document.
Definition: kstandardshortcut.cpp:348
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Mon Feb 20 2023 00:00:00 by doxygen 1.9.6 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KDEUI

Skip menu "KDEUI"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs-4.14.38 API Reference

Skip menu "kdelibs-4.14.38 API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal