• Skip to content
  • Skip to link menu
  • KDE API Reference
  • kdelibs-4.14.38 API Reference
  • KDE Home
  • Contact Us
 

KDECore

  • kdecore
  • text
kstringhandler.cpp
Go to the documentation of this file.
1/* This file is part of the KDE libraries
2 Copyright (C) 1999 Ian Zepp (icszepp@islc.net)
3 Copyright (C) 2006 by Dominic Battre <dominic@battre.de>
4 Copyright (C) 2006 by Martin Pool <mbp@canonical.com>
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public
8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
15
16 You should have received a copy of the GNU Library General Public License
17 along with this library; see the file COPYING.LIB. If not, write to
18 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 Boston, MA 02110-1301, USA.
20*/
21
22#include "kstringhandler.h"
23
24#include <stdlib.h> // random()
25
26#include <kglobal.h>
27
28#include <QtCore/QRegExp> // for the word ranges
29#include <QtCore/QCharRef>
30#include <QtCore/QMutableStringListIterator>
31
32
33
34//
35// Capitalization routines
36//
37QString KStringHandler::capwords( const QString &text )
38{
39 if ( text.isEmpty() ) {
40 return text;
41 }
42
43 const QString strippedText = text.trimmed();
44 const QString space = QString(QLatin1Char(' '));
45 const QStringList words = capwords(strippedText.split(space));
46
47 QString result = text;
48 result.replace(strippedText, words.join(space));
49 return result;
50}
51
52QStringList KStringHandler::capwords( const QStringList &list )
53{
54 QStringList tmp = list;
55 for ( QStringList::Iterator it = tmp.begin(); it != tmp.end(); ++it ) {
56 *it = ( *it )[ 0 ].toUpper() + ( *it ).mid( 1 );
57 }
58 return tmp;
59}
60
61
62QString KStringHandler::lsqueeze( const QString & str, int maxlen )
63{
64 if (str.length() > maxlen) {
65 int part = maxlen-3;
66 return QString::fromLatin1("...") + str.right(part);
67 }
68 else return str;
69}
70
71QString KStringHandler::csqueeze( const QString & str, int maxlen )
72{
73 if (str.length() > maxlen && maxlen > 3) {
74 const int part = (maxlen-3)/2;
75 return str.left(part) + QLatin1String("...") + str.right(part);
76 }
77 else return str;
78}
79
80QString KStringHandler::rsqueeze( const QString & str, int maxlen )
81{
82 if (str.length() > maxlen) {
83 int part = maxlen-3;
84 return str.left(part) + QLatin1String("...");
85 }
86 else return str;
87}
88
89QStringList KStringHandler::perlSplit(const QString & sep, const QString & s, int max)
90{
91 bool ignoreMax = 0 == max;
92
93 QStringList l;
94
95 int searchStart = 0;
96
97 int tokenStart = s.indexOf(sep, searchStart);
98
99 while (-1 != tokenStart && (ignoreMax || l.count() < max - 1))
100 {
101 if (!s.mid(searchStart, tokenStart - searchStart).isEmpty())
102 l << s.mid(searchStart, tokenStart - searchStart);
103
104 searchStart = tokenStart + sep.length();
105 tokenStart = s.indexOf(sep, searchStart);
106 }
107
108 if (!s.mid(searchStart, s.length() - searchStart).isEmpty())
109 l << s.mid(searchStart, s.length() - searchStart);
110
111 return l;
112}
113
114QStringList KStringHandler::perlSplit(const QChar & sep, const QString & s, int max)
115{
116 bool ignoreMax = 0 == max;
117
118 QStringList l;
119
120 int searchStart = 0;
121
122 int tokenStart = s.indexOf(sep, searchStart);
123
124 while (-1 != tokenStart && (ignoreMax || l.count() < max - 1))
125 {
126 if (!s.mid(searchStart, tokenStart - searchStart).isEmpty())
127 l << s.mid(searchStart, tokenStart - searchStart);
128
129 searchStart = tokenStart + 1;
130 tokenStart = s.indexOf(sep, searchStart);
131 }
132
133 if (!s.mid(searchStart, s.length() - searchStart).isEmpty())
134 l << s.mid(searchStart, s.length() - searchStart);
135
136 return l;
137}
138
139QStringList KStringHandler::perlSplit(const QRegExp & sep, const QString & s, int max)
140{
141 bool ignoreMax = 0 == max;
142
143 QStringList l;
144
145 int searchStart = 0;
146 int tokenStart = sep.indexIn(s, searchStart);
147 int len = sep.matchedLength();
148
149 while (-1 != tokenStart && (ignoreMax || l.count() < max - 1))
150 {
151 if (!s.mid(searchStart, tokenStart - searchStart).isEmpty())
152 l << s.mid(searchStart, tokenStart - searchStart);
153
154 searchStart = tokenStart + len;
155 tokenStart = sep.indexIn(s, searchStart);
156 len = sep.matchedLength();
157 }
158
159 if (!s.mid(searchStart, s.length() - searchStart).isEmpty())
160 l << s.mid(searchStart, s.length() - searchStart);
161
162 return l;
163}
164
165QString KStringHandler::tagUrls( const QString& text )
166{
167 /*static*/ QRegExp urlEx(QLatin1String("(www\\.(?!\\.)|(fish|(f|ht)tp(|s))://)[\\d\\w\\./,:_~\\?=&;#@\\-\\+\\%\\$]+[\\d\\w/]"));
168
169 QString richText( text );
170 int urlPos = 0, urlLen;
171 while ((urlPos = urlEx.indexIn(richText, urlPos)) >= 0)
172 {
173 urlLen = urlEx.matchedLength();
174 QString href = richText.mid( urlPos, urlLen );
175 // Qt doesn't support (?<=pattern) so we do it here
176 if((urlPos > 0) && richText[urlPos-1].isLetterOrNumber()){
177 urlPos++;
178 continue;
179 }
180 // Don't use QString::arg since %01, %20, etc could be in the string
181 QString anchor = QString::fromLatin1("<a href=\"") + href + QLatin1String("\">") + href + QLatin1String("</a>");
182 richText.replace( urlPos, urlLen, anchor );
183
184
185 urlPos += anchor.length();
186 }
187 return richText;
188}
189
190QString KStringHandler::obscure( const QString &str )
191{
192 QString result;
193 const QChar *unicode = str.unicode();
194 for ( int i = 0; i < str.length(); ++i )
195 // yes, no typo. can't encode ' ' or '!' because
196 // they're the unicode BOM. stupid scrambling. stupid.
197 result += ( unicode[ i ].unicode() <= 0x21 ) ? unicode[ i ] :
198 QChar( 0x1001F - unicode[ i ].unicode() );
199
200 return result;
201}
202
203
204bool KStringHandler::isUtf8( const char *buf )
205{
206 int i, n;
207 register unsigned char c;
208 bool gotone = false;
209
210 if (!buf)
211 return true; // whatever, just don't crash
212
213#define F 0 /* character never appears in text */
214#define T 1 /* character appears in plain ASCII text */
215#define I 2 /* character appears in ISO-8859 text */
216#define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
217
218 static const unsigned char text_chars[256] = {
219 /* BEL BS HT LF FF CR */
220 F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */
221 /* ESC */
222 F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
223 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
224 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
225 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
226 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
227 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
228 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
229 /* NEL */
230 X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */
231 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */
232 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */
233 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */
234 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */
235 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */
236 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */
237 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */
238 };
239
240 /* *ulen = 0; */
241 for (i = 0; (c = buf[i]); ++i) {
242 if ((c & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */
243 /*
244 * Even if the whole file is valid UTF-8 sequences,
245 * still reject it if it uses weird control characters.
246 */
247
248 if (text_chars[c] != T)
249 return false;
250
251 } else if ((c & 0x40) == 0) { /* 10xxxxxx never 1st byte */
252 return false;
253 } else { /* 11xxxxxx begins UTF-8 */
254 int following;
255
256 if ((c & 0x20) == 0) { /* 110xxxxx */
257 following = 1;
258 } else if ((c & 0x10) == 0) { /* 1110xxxx */
259 following = 2;
260 } else if ((c & 0x08) == 0) { /* 11110xxx */
261 following = 3;
262 } else if ((c & 0x04) == 0) { /* 111110xx */
263 following = 4;
264 } else if ((c & 0x02) == 0) { /* 1111110x */
265 following = 5;
266 } else
267 return false;
268
269 for (n = 0; n < following; ++n) {
270 i++;
271 if (!(c = buf[i]))
272 goto done;
273
274 if ((c & 0x80) == 0 || (c & 0x40))
275 return false;
276 }
277 gotone = true;
278 }
279 }
280done:
281 return gotone; /* don't claim it's UTF-8 if it's all 7-bit */
282}
283
284#undef F
285#undef T
286#undef I
287#undef X
288
289QString KStringHandler::from8Bit( const char *str )
290{
291 if (!str)
292 return QString();
293 if (!*str) {
294 static const QString &emptyString = KGlobal::staticQString("");
295 return emptyString;
296 }
297 return KStringHandler::isUtf8( str ) ?
298 QString::fromUtf8( str ) :
299 QString::fromLocal8Bit( str );
300}
301
302int KStringHandler::naturalCompare(const QString &_a, const QString &_b, Qt::CaseSensitivity caseSensitivity)
303{
304 // This method chops the input a and b into pieces of
305 // digits and non-digits (a1.05 becomes a | 1 | . | 05)
306 // and compares these pieces of a and b to each other
307 // (first with first, second with second, ...).
308 //
309 // This is based on the natural sort order code code by Martin Pool
310 // http://sourcefrog.net/projects/natsort/
311 // Martin Pool agreed to license this under LGPL or GPL.
312
313 // FIXME: Using toLower() to implement case insensitive comparison is
314 // sub-optimal, but is needed because we compare strings with
315 // localeAwareCompare(), which does not know about case sensitivity.
316 // A task has been filled for this in Qt Task Tracker with ID 205990.
317 // http://trolltech.com/developer/task-tracker/index_html?method=entry&id=205990
318 QString a;
319 QString b;
320 if (caseSensitivity == Qt::CaseSensitive) {
321 a = _a;
322 b = _b;
323 } else {
324 a = _a.toLower();
325 b = _b.toLower();
326 }
327
328 const QChar* currA = a.unicode(); // iterator over a
329 const QChar* currB = b.unicode(); // iterator over b
330
331 if (currA == currB) {
332 return 0;
333 }
334
335 while (!currA->isNull() && !currB->isNull()) {
336 const QChar* begSeqA = currA; // beginning of a new character sequence of a
337 const QChar* begSeqB = currB;
338 if (currA->unicode() == QChar::ObjectReplacementCharacter) {
339 return 1;
340 }
341
342 if (currB->unicode() == QChar::ObjectReplacementCharacter) {
343 return -1;
344 }
345
346 if (currA->unicode() == QChar::ReplacementCharacter) {
347 return 1;
348 }
349
350 if (currB->unicode() == QChar::ReplacementCharacter) {
351 return -1;
352 }
353
354 // find sequence of characters ending at the first non-character
355 while (!currA->isNull() && !currA->isDigit() && !currA->isPunct() && !currA->isSpace()) {
356 ++currA;
357 }
358
359 while (!currB->isNull() && !currB->isDigit() && !currB->isPunct() && !currB->isSpace()) {
360 ++currB;
361 }
362
363 // compare these sequences
364 const QStringRef& subA(a.midRef(begSeqA - a.unicode(), currA - begSeqA));
365 const QStringRef& subB(b.midRef(begSeqB - b.unicode(), currB - begSeqB));
366 const int cmp = QStringRef::localeAwareCompare(subA, subB);
367 if (cmp != 0) {
368 return cmp < 0 ? -1 : +1;
369 }
370
371 if (currA->isNull() || currB->isNull()) {
372 break;
373 }
374
375 // find sequence of characters ending at the first non-character
376 while ((currA->isPunct() || currA->isSpace()) && (currB->isPunct() || currB->isSpace())) {
377 if (*currA != *currB) {
378 return (*currA < *currB) ? -1 : +1;
379 }
380 ++currA;
381 ++currB;
382 if (currA->isNull() || currB->isNull()) {
383 break;
384 }
385 }
386
387 // now some digits follow...
388 if ((*currA == QLatin1Char('0')) || (*currB == QLatin1Char('0'))) {
389 // one digit-sequence starts with 0 -> assume we are in a fraction part
390 // do left aligned comparison (numbers are considered left aligned)
391 while (1) {
392 if (!currA->isDigit() && !currB->isDigit()) {
393 break;
394 } else if (!currA->isDigit()) {
395 return +1;
396 } else if (!currB->isDigit()) {
397 return -1;
398 } else if (*currA < *currB) {
399 return -1;
400 } else if (*currA > *currB) {
401 return + 1;
402 }
403 ++currA;
404 ++currB;
405 }
406 } else {
407 // No digit-sequence starts with 0 -> assume we are looking at some integer
408 // do right aligned comparison.
409 //
410 // The longest run of digits wins. That aside, the greatest
411 // value wins, but we can't know that it will until we've scanned
412 // both numbers to know that they have the same magnitude.
413
414 bool isFirstRun = true;
415 int weight = 0;
416 while (1) {
417 if (!currA->isDigit() && !currB->isDigit()) {
418 if (weight != 0) {
419 return weight;
420 }
421 break;
422 } else if (!currA->isDigit()) {
423 if (isFirstRun) {
424 return *currA < *currB ? -1 : +1;
425 } else {
426 return -1;
427 }
428 } else if (!currB->isDigit()) {
429 if (isFirstRun) {
430 return *currA < *currB ? -1 : +1;
431 } else {
432 return +1;
433 }
434 } else if ((*currA < *currB) && (weight == 0)) {
435 weight = -1;
436 } else if ((*currA > *currB) && (weight == 0)) {
437 weight = + 1;
438 }
439 ++currA;
440 ++currB;
441 isFirstRun = false;
442 }
443 }
444 }
445
446 if (currA->isNull() && currB->isNull()) {
447 return 0;
448 }
449
450 return currA->isNull() ? -1 : + 1;
451}
452
453QString KStringHandler::preProcessWrap(const QString &text)
454{
455 const QChar zwsp(0x200b);
456
457 QString result;
458 result.reserve(text.length());
459
460 for (int i = 0; i < text.length(); i++) {
461 const QChar c = text[i];
462 bool openingParens = (c == QLatin1Char('(') || c == QLatin1Char('{') || c == QLatin1Char('['));
463 bool singleQuote = (c == QLatin1Char('\'') );
464 bool closingParens = (c == QLatin1Char(')') || c == QLatin1Char('}') || c == QLatin1Char(']'));
465 bool breakAfter = (closingParens || c.isPunct() || c.isSymbol());
466 bool nextIsSpace = (i == (text.length() - 1) || text[i + 1].isSpace());
467 bool prevIsSpace = (i == 0 || text[i - 1].isSpace() || result[result.length() - 1] == zwsp);
468
469 // Provide a breaking opportunity before opening parenthesis
470 if (openingParens && !prevIsSpace)
471 result += zwsp;
472
473 // Provide a word joiner before the single quote
474 if (singleQuote && !prevIsSpace)
475 result += QChar(0x2060);
476
477 result += c;
478
479 if (breakAfter && !openingParens && !nextIsSpace && !singleQuote)
480 result += zwsp;
481 }
482
483 return result;
484}
485
QStringList
QString
kglobal.h
T
#define T
X
#define X
F
#define F
I
#define I
kstringhandler.h
KGlobal::staticQString
const QString & staticQString(const char *str)
Creates a static QString.
Definition: kglobal.cpp:271
KStringHandler::tagUrls
QString tagUrls(const QString &text)
This method auto-detects URLs in strings, and adds HTML markup to them so that richtext or HTML-enabl...
Definition: kstringhandler.cpp:165
KStringHandler::preProcessWrap
QString preProcessWrap(const QString &text)
Preprocesses the given string in order to provide additional line breaking opportunities for QTextLay...
Definition: kstringhandler.cpp:453
KStringHandler::lsqueeze
QString lsqueeze(const QString &str, int maxlen=40)
Substitute characters at the beginning of a string by "...".
Definition: kstringhandler.cpp:62
KStringHandler::perlSplit
QStringList perlSplit(const QString &sep, const QString &s, int max=0)
Split a QString into a QStringList in a similar fashion to the static QStringList function in Qt,...
Definition: kstringhandler.cpp:89
KStringHandler::from8Bit
QString from8Bit(const char *str)
Construct QString from a c string, guessing whether it is UTF8- or Local8Bit-encoded.
Definition: kstringhandler.cpp:289
KStringHandler::isUtf8
bool isUtf8(const char *str)
Guess whether a string is UTF8 encoded.
Definition: kstringhandler.cpp:204
KStringHandler::naturalCompare
int naturalCompare(const QString &a, const QString &b, Qt::CaseSensitivity caseSensitivity=Qt::CaseSensitive)
Does a natural comparing of the strings.
Definition: kstringhandler.cpp:302
KStringHandler::rsqueeze
QString rsqueeze(const QString &str, int maxlen=40)
Substitute characters at the end of a string by "...".
Definition: kstringhandler.cpp:80
KStringHandler::csqueeze
QString csqueeze(const QString &str, int maxlen=40)
Substitute characters at the middle of a string by "...".
Definition: kstringhandler.cpp:71
KStringHandler::capwords
QString capwords(const QString &text)
Capitalizes each word in the string "hello there" becomes "Hello There" (string)
Definition: kstringhandler.cpp:37
KStringHandler::obscure
QString obscure(const QString &str)
Obscure string by using a simple symmetric encryption.
Definition: kstringhandler.cpp:190
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Mon Feb 20 2023 00:00:00 by doxygen 1.9.6 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KDECore

Skip menu "KDECore"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs-4.14.38 API Reference

Skip menu "kdelibs-4.14.38 API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal