• Skip to content
  • Skip to link menu
  • KDE API Reference
  • kdelibs-4.14.38 API Reference
  • KDE Home
  • Contact Us
 

KDECore

  • kdecore
  • localization
  • probers
UnicodeGroupProber.cpp
Go to the documentation of this file.
1/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2/* -*- C++ -*-
3* Copyright (C) 2008 <wkai@gmail.com>
4*
5*
6* Permission is hereby granted, free of charge, to any person obtaining
7* a copy of this software and associated documentation files (the
8* "Software"), to deal in the Software without restriction, including
9* without limitation the rights to use, copy, modify, merge, publish,
10* distribute, sublicense, and/or sell copies of the Software, and to
11* permit persons to whom the Software is furnished to do so, subject to
12* the following conditions:
13*
14* The above copyright notice and this permission notice shall be included
15* in all copies or substantial portions of the Software.
16*
17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24*/
25
26#include "UnicodeGroupProber.h"
27
28#include "ctype_test_p.h"
29
30#include <QtAlgorithms>
31#include <math.h>
32
33namespace kencodingprober {
34UnicodeGroupProber::UnicodeGroupProber(void)
35{
36 mCodingSM[0] = new nsCodingStateMachine(&UTF8SMModel);
37 mCodingSM[1] = new nsCodingStateMachine(&UCS2LESMModel);
38 mCodingSM[2] = new nsCodingStateMachine(&UCS2BESMModel);
39 mActiveSM = NUM_OF_UNICODE_CHARSETS;
40 mState = eDetecting;
41 mDetectedCharset = "UTF-8";
42}
43
44UnicodeGroupProber::~UnicodeGroupProber(void)
45{
46 for (unsigned int i = 0; i < NUM_OF_UNICODE_CHARSETS; i++)
47 delete mCodingSM[i];
48}
49
50void UnicodeGroupProber::Reset(void)
51{
52 mState = eDetecting;
53 for (unsigned int i = 0; i < NUM_OF_UNICODE_CHARSETS; i++)
54 mCodingSM[i]->Reset();
55 mActiveSM = NUM_OF_UNICODE_CHARSETS;
56 mDetectedCharset = "UTF-8";
57}
58
59nsProbingState UnicodeGroupProber::HandleData(const char* aBuf, unsigned int aLen)
60{
61 nsSMState codingState;
62 int j;
63 uint i, weight_BOM, counts[5];
64 static bool disableUTF16LE = false;
65 static bool disableUTF16BE = false;
66 double weight_zero;
67
68 if (mActiveSM <= 0 || aLen < 2) {
69 mState = eNotMe;
70 return mState;
71 }
72
73 if (! (disableUTF16LE || disableUTF16BE)) {
74 if (aLen%2 != 0) {
75 disableUTF16LE = true;
76 disableUTF16BE = true;
77 }
78 weight_BOM = (uint)(sqrt((double)aLen) + aLen/10.0);
79 for (uint i = 0; i < 5; i++)
80 qCount(aBuf, aBuf+aLen, char(i), counts[i]);
81 weight_zero = (2.0*(counts[0] + counts[1] + counts[2] + counts[3] + counts[4]) + weight_BOM)/aLen;
82 if (weight_zero < log(1.4142)) {
83 disableUTF16LE = true;
84 disableUTF16BE = true;
85 }
86 if (4 >= aBuf[1] && aBuf[1] >= 0 && isprint(aBuf[0]))
87 disableUTF16BE = true;
88 else
89 disableUTF16LE = true;
90 if (disableUTF16BE)
91 mActiveSM--;
92 if (disableUTF16LE) {
93 nsCodingStateMachine* t;
94 t = mCodingSM[1];
95 mCodingSM[1] = mCodingSM[2];
96 mCodingSM[2] = t;
97 mActiveSM--;
98 }
99 }
100
101 for (i = 0; i < aLen; ++i) {
102 for (j = mActiveSM-1; j>= 0; --j)
103 {
104 //byte is feed to all active state machine
105 codingState = mCodingSM[j]->NextState(aBuf[i]);
106 if (codingState == eError)
107 {
108 //got negative answer for this state machine, make it inactive
109 mActiveSM--;
110 if (mActiveSM == 0)
111 {
112 mState = eNotMe;
113 return mState;
114 }
115 else if (j != (int)mActiveSM)
116 {
117 nsCodingStateMachine* t;
118 t = mCodingSM[mActiveSM];
119 mCodingSM[mActiveSM] = mCodingSM[j];
120 mCodingSM[j] = t;
121 }
122 }
123 else if (codingState == eItsMe)
124 {
125 mState = eFoundIt;
126 mDetectedCharset = mCodingSM[j]->GetCodingStateMachine();
127 return mState;
128 } else if (mState == eDetecting)
129 mDetectedCharset = mCodingSM[j]->GetCodingStateMachine();;
130 }
131 }
132 return mState;
133}
134
135float UnicodeGroupProber::GetConfidence()
136{
137 if (mState == eFoundIt)
138 return 0.99f;
139 else
140 return 0.0f;
141}
142
143#ifdef DEBUG_PROBE
144void UnicodeGroupProber::DumpStatus()
145{
146 GetConfidence();
147 for (uint i = 0; i < mActiveSM; i++)
148 {
149 kDebug(180) << "Unicode group" << mCodingSM[i]->DumpCurrentState() << mCodingSM[i]->GetCodingStateMachine() ;
150 }
151}
152#endif
153
154}
155
156
UnicodeGroupProber.h
NUM_OF_UNICODE_CHARSETS
#define NUM_OF_UNICODE_CHARSETS
Definition: UnicodeGroupProber.h:32
kencodingprober::UnicodeGroupProber::UnicodeGroupProber
UnicodeGroupProber(void)
Definition: UnicodeGroupProber.cpp:34
kencodingprober::UnicodeGroupProber::mState
nsProbingState mState
Definition: UnicodeGroupProber.h:53
kencodingprober::UnicodeGroupProber::mActiveSM
unsigned int mActiveSM
Definition: UnicodeGroupProber.h:52
kencodingprober::UnicodeGroupProber::Reset
void Reset(void)
Definition: UnicodeGroupProber.cpp:50
kencodingprober::UnicodeGroupProber::GetConfidence
float GetConfidence()
Definition: UnicodeGroupProber.cpp:135
kencodingprober::UnicodeGroupProber::HandleData
nsProbingState HandleData(const char *aBuf, unsigned int aLen)
Definition: UnicodeGroupProber.cpp:59
kencodingprober::UnicodeGroupProber::~UnicodeGroupProber
virtual ~UnicodeGroupProber(void)
Definition: UnicodeGroupProber.cpp:44
kencodingprober::UnicodeGroupProber::mDetectedCharset
const char * mDetectedCharset
Definition: UnicodeGroupProber.h:54
kencodingprober::UnicodeGroupProber::mCodingSM
nsCodingStateMachine * mCodingSM[NUM_OF_UNICODE_CHARSETS]
Definition: UnicodeGroupProber.h:51
kencodingprober::nsCodingStateMachine
Definition: nsCodingStateMachine.h:53
kencodingprober::nsCodingStateMachine::NextState
nsSMState NextState(char c)
Definition: nsCodingStateMachine.h:59
kencodingprober::nsCodingStateMachine::GetCodingStateMachine
const char * GetCodingStateMachine()
Definition: nsCodingStateMachine.h:75
ctype_test_p.h
isprint
#define isprint(c)
Definition: ctype_test_p.h:90
kDebug
#define kDebug
Definition: kdebug.h:316
kencodingprober
Definition: CharDistribution.cpp:37
kencodingprober::UCS2LESMModel
KDE_NO_EXPORT SMModel UCS2LESMModel
Definition: nsMBCSSM.cpp:475
kencodingprober::nsProbingState
nsProbingState
Definition: nsCharSetProber.h:34
kencodingprober::eNotMe
@ eNotMe
Definition: nsCharSetProber.h:37
kencodingprober::eFoundIt
@ eFoundIt
Definition: nsCharSetProber.h:36
kencodingprober::eDetecting
@ eDetecting
Definition: nsCharSetProber.h:35
kencodingprober::UTF8SMModel
KDE_NO_EXPORT SMModel UTF8SMModel
Definition: nsMBCSSM.cpp:553
kencodingprober::UCS2BESMModel
KDE_NO_EXPORT SMModel UCS2BESMModel
Definition: nsMBCSSM.cpp:419
kencodingprober::nsSMState
nsSMState
Definition: nsCodingStateMachine.h:35
kencodingprober::eItsMe
@ eItsMe
Definition: nsCodingStateMachine.h:38
kencodingprober::eError
@ eError
Definition: nsCodingStateMachine.h:37
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Mon Feb 20 2023 00:00:00 by doxygen 1.9.6 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KDECore

Skip menu "KDECore"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs-4.14.38 API Reference

Skip menu "kdelibs-4.14.38 API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal