Libparserutils
utf8.c
Go to the documentation of this file.
1/*
2 * This file is part of LibParserUtils.
3 * Licensed under the MIT License,
4 * http://www.opensource.org/licenses/mit-license.php
5 * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
6 */
7
11
12#include <stdbool.h>
13#include <stdlib.h>
14#include <string.h>
15
18
20const uint8_t numContinuations[256] = {
21 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
36 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
37};
38
52 uint32_t *ucs4, size_t *clen)
53{
55
56 UTF8_TO_UCS4(s, len, ucs4, clen, error);
57
58 return error;
59}
60
73 uint8_t **s, size_t *len)
74{
76
77 UTF8_FROM_UCS4(ucs4, s, len, error);
78
79 return error;
80}
81
91 size_t *len)
92{
94
95 UTF8_LENGTH(s, max, len, error);
96
97 return error;
98}
99
108 size_t *len)
109{
110 parserutils_error error;
111
112 UTF8_CHAR_BYTE_LENGTH(s, len, error);
113
114 return error;
115}
116
127 uint32_t *prevoff)
128{
129 parserutils_error error;
130
131 UTF8_PREV(s, off, prevoff, error);
132
133 return error;
134}
135
147 uint32_t off, uint32_t *nextoff)
148{
149 parserutils_error error;
150
151 UTF8_NEXT(s, len, off, nextoff, error);
152
153 return error;
154}
155
167 uint32_t len, uint32_t off, uint32_t *nextoff)
168{
169 parserutils_error error;
170
171 UTF8_NEXT_PARANOID(s, len, off, nextoff, error);
172
173 return error;
174}
175
size_t len
Definition codec_8859.c:23
parserutils_error
Definition errors.h:18
parserutils_error parserutils_charset_utf8_length(const uint8_t *s, size_t max, size_t *len)
Calculate the length (in characters) of a bounded UTF-8 string.
Definition utf8.c:90
parserutils_error parserutils_charset_utf8_from_ucs4(uint32_t ucs4, uint8_t **s, size_t *len)
Convert a single UCS-4 character into a UTF-8 multibyte sequence.
Definition utf8.c:72
parserutils_error parserutils_charset_utf8_next_paranoid(const uint8_t *s, uint32_t len, uint32_t off, uint32_t *nextoff)
Find next legal UTF-8 char in string.
Definition utf8.c:166
parserutils_error parserutils_charset_utf8_to_ucs4(const uint8_t *s, size_t len, uint32_t *ucs4, size_t *clen)
Convert a UTF-8 multibyte sequence into a single UCS-4 character.
Definition utf8.c:51
const uint8_t numContinuations[256]
Number of continuation bytes for a given start byte.
Definition utf8.c:20
parserutils_error parserutils_charset_utf8_next(const uint8_t *s, uint32_t len, uint32_t off, uint32_t *nextoff)
Find next legal UTF-8 char in string.
Definition utf8.c:146
parserutils_error parserutils_charset_utf8_prev(const uint8_t *s, uint32_t off, uint32_t *prevoff)
Find previous legal UTF-8 char in string.
Definition utf8.c:126
parserutils_error parserutils_charset_utf8_char_byte_length(const uint8_t *s, size_t *len)
Calculate the length (in bytes) of a UTF-8 character.
Definition utf8.c:107
UTF-8 manipulation functions (interface).
UTF-8 manipulation macros (implementation).
#define UTF8_TO_UCS4(s, len, ucs4, clen, error)
Convert a UTF-8 multibyte sequence into a single UCS-4 character.
Definition utf8impl.h:34
#define UTF8_PREV(s, off, prevoff, error)
Find previous legal UTF-8 char in string.
Definition utf8impl.h:249
#define UTF8_FROM_UCS4(ucs4, s, len, error)
Convert a single UCS-4 character into a UTF-8 multibyte sequence.
Definition utf8impl.h:123
#define UTF8_NEXT(s, len, off, nextoff, error)
Find next legal UTF-8 char in string.
Definition utf8impl.h:274
#define UTF8_LENGTH(s, max, len, error)
Calculate the length (in characters) of a bounded UTF-8 string.
Definition utf8impl.h:182
#define UTF8_CHAR_BYTE_LENGTH(s, len, error)
Calculate the length (in bytes) of a UTF-8 character.
Definition utf8impl.h:228
#define UTF8_NEXT_PARANOID(s, len, off, nextoff, error)
Skip to start of next sequence in UTF-8 input.
Definition utf8impl.h:303
#define max(a, b)
Definition utils.h:12