cprover
Loading...
Searching...
No Matches
unescape_string.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: ANSI-C Language Conversion
4
5Author: Daniel Kroening, kroening@kroening.com
6
7\*******************************************************************/
8
11
12#include "unescape_string.h"
13
14#include <cctype>
15
16#include <util/invariant.h>
17#include <util/unicode.h>
18
20 unsigned int value,
21 std::string &dest)
22{
23 std::basic_string<unsigned int> value_str(1, value);
24
25 // turn into utf-8
26 const std::string utf8_value = utf32_native_endian_to_utf8(value_str);
27
28 dest.append(utf8_value);
29}
30
32 unsigned int value,
33 std::basic_string<unsigned int> &dest)
34{
35 dest.push_back(value);
36}
37
38template<typename T>
39std::basic_string<T> unescape_string_templ(const std::string &src)
40{
41 std::basic_string<T> dest;
42
43 dest.reserve(src.size()); // about that long, but may be shorter
44
45 for(unsigned i=0; i<src.size(); i++)
46 {
47 T ch=(unsigned char)src[i];
48
49 if(ch=='\\') // escape?
50 {
51 // go to next character
52 i++;
53 INVARIANT(i < src.size(), "backslash can't be last character");
54
55 ch=(unsigned char)src[i];
56 switch(ch)
57 {
58 case '\\': dest.push_back(ch); break;
59 case 'n': dest.push_back('\n'); break; /* NL (0x0a) */
60 case 't': dest.push_back('\t'); break; /* HT (0x09) */
61 case 'v': dest.push_back('\v'); break; /* VT (0x0b) */
62 case 'b': dest.push_back('\b'); break; /* BS (0x08) */
63 case 'r': dest.push_back('\r'); break; /* CR (0x0d) */
64 case 'f': dest.push_back('\f'); break; /* FF (0x0c) */
65 case 'a': dest.push_back('\a'); break; /* BEL (0x07) */
66 case '"': dest.push_back('"'); break;
67 case '\'': dest.push_back('\''); break;
68
69 case 'u': // universal character
70 case 'U': // universal character
71 i++;
72
73 {
74 std::string hex;
75
76 const unsigned digits = (ch == 'u') ? 4u : 8u;
77 hex.reserve(digits);
78
79 for(unsigned count=digits;
80 count!=0 && i<src.size();
81 i++, count--)
82 hex+=src[i];
83
84 // go back
85 i--;
86
87 unsigned int result=hex_to_unsigned(hex.c_str(), hex.size());
88
89 append_universal_char(result, dest);
90 }
91
92 break;
93
94 case 'x': // hex
95 i++;
96
97 {
98 std::string hex;
99
100 while(i<src.size() && isxdigit(src[i]))
101 {
102 hex+=src[i];
103 i++;
104 }
105
106 // go back
107 i--;
108
109 ch=hex_to_unsigned(hex.c_str(), hex.size());
110 }
111
112 // if T isn't sufficiently wide to hold unsigned values
113 // the following might truncate; but then
114 // universal characters in non-wide strings don't
115 // really work; gcc just issues a warning.
116 dest.push_back(ch);
117 break;
118
119 default:
120 if(isdigit(ch)) // octal
121 {
122 std::string octal;
123
124 while(i<src.size() && isdigit(src[i]))
125 {
126 octal+=src[i];
127 i++;
128 }
129
130 // go back
131 i--;
132
133 ch=octal_to_unsigned(octal.c_str(), octal.size());
134 dest.push_back(ch);
135 }
136 else
137 {
138 // Unknown escape sequence.
139 // Both GCC and CL turn \% into %.
140 dest.push_back(ch);
141 }
142 }
143 }
144 else
145 dest.push_back(ch);
146 }
147
148 return dest;
149}
150
151std::string unescape_string(const std::string &src)
152{
153 return unescape_string_templ<char>(src);
154}
155
156std::basic_string<unsigned int> unescape_wide_string(
157 const std::string &src)
158{
159 return unescape_string_templ<unsigned int>(src);
160}
161
162unsigned hex_to_unsigned(const char *hex, std::size_t digits)
163{
164 unsigned value=0;
165
166 for(; digits!=0; digits--, hex++)
167 {
168 char ch=*hex;
169
170 if(ch==0)
171 break;
172
173 value<<=4;
174
175 if(isdigit(ch))
176 value|=ch-'0';
177 else if(isxdigit(ch))
178 value|=10+tolower(ch)-'a';
179 }
180
181 return value;
182}
183
184unsigned octal_to_unsigned(const char *octal, std::size_t digits)
185{
186 unsigned value=0;
187
188 for(; digits!=0; digits--, octal++)
189 {
190 char ch=*octal;
191
192 if(ch==0)
193 break;
194
195 value<<=3;
196
197 if(isdigit(ch))
198 value|=ch-'0';
199 }
200
201 return value;
202}
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition invariant.h:423
std::basic_string< unsigned int > unescape_wide_string(const std::string &src)
std::basic_string< T > unescape_string_templ(const std::string &src)
unsigned octal_to_unsigned(const char *octal, std::size_t digits)
std::string unescape_string(const std::string &src)
static void append_universal_char(unsigned int value, std::string &dest)
unsigned hex_to_unsigned(const char *hex, std::size_t digits)
ANSI-C Language Conversion.
std::string utf32_native_endian_to_utf8(const std::basic_string< unsigned int > &s)
Definition unicode.cpp:137