• Skip to content
  • Skip to link menu
  • KDE API Reference
  • kdelibs-4.14.38 API Reference
  • KDE Home
  • Contact Us
 

KDECore

  • kdecore
  • localization
  • probers
nsSBCharSetProber.cpp
Go to the documentation of this file.
1/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2/* -*- C++ -*-
3* Copyright (C) 1998 <developer@mozilla.org>
4*
5*
6* Permission is hereby granted, free of charge, to any person obtaining
7* a copy of this software and associated documentation files (the
8* "Software"), to deal in the Software without restriction, including
9* without limitation the rights to use, copy, modify, merge, publish,
10* distribute, sublicense, and/or sell copies of the Software, and to
11* permit persons to whom the Software is furnished to do so, subject to
12* the following conditions:
13*
14* The above copyright notice and this permission notice shall be included
15* in all copies or substantial portions of the Software.
16*
17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24*/
25
26#include "nsSBCharSetProber.h"
27
28#include <stdio.h>
29
30namespace kencodingprober {
31nsProbingState nsSingleByteCharSetProber::HandleData(const char* aBuf, unsigned int aLen)
32{
33 unsigned char order;
34
35 for (unsigned int i = 0; i < aLen; i++)
36 {
37 order = mModel->charToOrderMap[(unsigned char)aBuf[i]];
38
39 if (order < SYMBOL_CAT_ORDER)
40 mTotalChar++;
41 if (order < SAMPLE_SIZE)
42 {
43 mFreqChar++;
44
45 if (mLastOrder < SAMPLE_SIZE)
46 {
47 mTotalSeqs++;
48 if (!mReversed)
49 ++(mSeqCounters[(int)mModel->precedenceMatrix[mLastOrder*SAMPLE_SIZE+order]]);
50 else // reverse the order of the letters in the lookup
51 ++(mSeqCounters[(int)mModel->precedenceMatrix[order*SAMPLE_SIZE+mLastOrder]]);
52 }
53 }
54 mLastOrder = order;
55 }
56
57 if (mState == eDetecting)
58 if (mTotalSeqs > SB_ENOUGH_REL_THRESHOLD)
59 {
60 float cf = GetConfidence();
61 if (cf > POSITIVE_SHORTCUT_THRESHOLD)
62 mState = eFoundIt;
63 else if (cf < NEGATIVE_SHORTCUT_THRESHOLD)
64 mState = eNotMe;
65 }
66
67 return mState;
68}
69
70void nsSingleByteCharSetProber::Reset(void)
71{
72 mState = eDetecting;
73 mLastOrder = 255;
74 for (unsigned int i = 0; i < NUMBER_OF_SEQ_CAT; i++)
75 mSeqCounters[i] = 0;
76 mTotalSeqs = 0;
77 mTotalChar = 0;
78 mFreqChar = 0;
79}
80
81//#define NEGATIVE_APPROACH 1
82
83float nsSingleByteCharSetProber::GetConfidence(void)
84{
85#ifdef NEGATIVE_APPROACH
86 if (mTotalSeqs > 0)
87 if (mTotalSeqs > mSeqCounters[NEGATIVE_CAT]*10 )
88 return ((float)(mTotalSeqs - mSeqCounters[NEGATIVE_CAT]*10))/mTotalSeqs * mFreqChar / mTotalChar;
89 return (float)0.01;
90#else //POSITIVE_APPROACH
91 float r;
92
93 if (mTotalSeqs > 0) {
94 r = ((float)1.0) * mSeqCounters[POSITIVE_CAT] / mTotalSeqs / mModel->mTypicalPositiveRatio;
95 r = r*mFreqChar/mTotalChar;
96 if (r >= (float)1.00)
97 r = (float)0.99;
98 return r;
99 }
100 return (float)0.01;
101#endif
102}
103
104const char* nsSingleByteCharSetProber::GetCharSetName()
105{
106 if (!mNameProber)
107 return mModel->charsetName;
108 return mNameProber->GetCharSetName();
109}
110
111#ifdef DEBUG_PROBE
112void nsSingleByteCharSetProber::DumpStatus()
113{
114 printf(" SBCS: %1.3f [%s]\r\n", GetConfidence(), GetCharSetName());
115}
116#endif
117}
118
119
kencodingprober::nsCharSetProber::GetCharSetName
virtual const char * GetCharSetName()=0
kencodingprober::nsSingleByteCharSetProber::Reset
virtual void Reset(void)
Definition: nsSBCharSetProber.cpp:70
kencodingprober::nsSingleByteCharSetProber::mSeqCounters
unsigned int mSeqCounters[NUMBER_OF_SEQ_CAT]
Definition: nsSBCharSetProber.h:86
kencodingprober::nsSingleByteCharSetProber::GetCharSetName
virtual const char * GetCharSetName()
Definition: nsSBCharSetProber.cpp:104
kencodingprober::nsSingleByteCharSetProber::mModel
const SequenceModel * mModel
Definition: nsSBCharSetProber.h:79
kencodingprober::nsSingleByteCharSetProber::mTotalSeqs
unsigned int mTotalSeqs
Definition: nsSBCharSetProber.h:85
kencodingprober::nsSingleByteCharSetProber::mNameProber
nsCharSetProber * mNameProber
Definition: nsSBCharSetProber.h:93
kencodingprober::nsSingleByteCharSetProber::mLastOrder
unsigned char mLastOrder
Definition: nsSBCharSetProber.h:83
kencodingprober::nsSingleByteCharSetProber::HandleData
virtual nsProbingState HandleData(const char *aBuf, unsigned int aLen)
Definition: nsSBCharSetProber.cpp:31
kencodingprober::nsSingleByteCharSetProber::GetConfidence
virtual float GetConfidence(void)
Definition: nsSBCharSetProber.cpp:83
kencodingprober::nsSingleByteCharSetProber::mTotalChar
unsigned int mTotalChar
Definition: nsSBCharSetProber.h:88
kencodingprober::nsSingleByteCharSetProber::mState
nsProbingState mState
Definition: nsSBCharSetProber.h:78
kencodingprober::nsSingleByteCharSetProber::mReversed
const bool mReversed
Definition: nsSBCharSetProber.h:80
kencodingprober::nsSingleByteCharSetProber::mFreqChar
unsigned int mFreqChar
Definition: nsSBCharSetProber.h:90
kencodingprober
Definition: CharDistribution.cpp:37
kencodingprober::nsProbingState
nsProbingState
Definition: nsCharSetProber.h:34
kencodingprober::eNotMe
@ eNotMe
Definition: nsCharSetProber.h:37
kencodingprober::eFoundIt
@ eFoundIt
Definition: nsCharSetProber.h:36
kencodingprober::eDetecting
@ eDetecting
Definition: nsCharSetProber.h:35
nsSBCharSetProber.h
SYMBOL_CAT_ORDER
#define SYMBOL_CAT_ORDER
Definition: nsSBCharSetProber.h:35
POSITIVE_SHORTCUT_THRESHOLD
#define POSITIVE_SHORTCUT_THRESHOLD
Definition: nsSBCharSetProber.h:33
NEGATIVE_SHORTCUT_THRESHOLD
#define NEGATIVE_SHORTCUT_THRESHOLD
Definition: nsSBCharSetProber.h:34
POSITIVE_CAT
#define POSITIVE_CAT
Definition: nsSBCharSetProber.h:37
SB_ENOUGH_REL_THRESHOLD
#define SB_ENOUGH_REL_THRESHOLD
Definition: nsSBCharSetProber.h:32
NUMBER_OF_SEQ_CAT
#define NUMBER_OF_SEQ_CAT
Definition: nsSBCharSetProber.h:36
SAMPLE_SIZE
#define SAMPLE_SIZE
Definition: nsSBCharSetProber.h:31
NEGATIVE_CAT
#define NEGATIVE_CAT
Definition: nsSBCharSetProber.h:38
kencodingprober::SequenceModel::mTypicalPositiveRatio
float mTypicalPositiveRatio
Definition: nsSBCharSetProber.h:45
kencodingprober::SequenceModel::charToOrderMap
const unsigned char * charToOrderMap
Definition: nsSBCharSetProber.h:43
kencodingprober::SequenceModel::precedenceMatrix
const char * precedenceMatrix
Definition: nsSBCharSetProber.h:44
kencodingprober::SequenceModel::charsetName
const char * charsetName
Definition: nsSBCharSetProber.h:47
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Mon Feb 20 2023 00:00:00 by doxygen 1.9.6 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KDECore

Skip menu "KDECore"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs-4.14.38 API Reference

Skip menu "kdelibs-4.14.38 API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal