cprover
Loading...
Searching...
No Matches
symex_dereference.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: Symbolic Execution of ANSI-C
4
5Author: Daniel Kroening, kroening@kroening.com
6
7\*******************************************************************/
8
11
12#include "goto_symex.h"
13
14#include <util/arith_tools.h>
15#include <util/byte_operators.h>
16#include <util/c_types.h>
18#include <util/expr_util.h>
19#include <util/fresh_symbol.h>
20#include <util/invariant.h>
22
24
25#include "expr_skeleton.h"
26#include "path_storage.h"
27#include "symex_assign.h"
29
44 const exprt &expr,
45 statet &state,
46 bool keep_array)
47{
48 exprt result;
49
50 if(expr.id()==ID_byte_extract_little_endian ||
51 expr.id()==ID_byte_extract_big_endian)
52 {
53 // address_of(byte_extract(op, offset, t)) is
54 // address_of(op) + offset with adjustments for arrays
55
57
58 // recursive call
59 result = address_arithmetic(be.op(), state, keep_array);
60
61 if(be.op().type().id() == ID_array && result.id() == ID_address_of)
62 {
64
65 // turn &a of type T[i][j] into &(a[0][0])
66 for(const typet *t = &(a.type().subtype());
67 t->id() == ID_array && expr.type() != *t;
68 t = &(to_array_type(*t).element_type()))
70 }
71
72 // do (expr.type() *)(((char *)op)+offset)
73 result=typecast_exprt(result, pointer_type(char_type()));
74
75 // there could be further dereferencing in the offset
76 exprt offset=be.offset();
77 dereference_rec(offset, state, false, false);
78
79 result=plus_exprt(result, offset);
80
81 // treat &array as &array[0]
82 const typet &expr_type = expr.type();
83 typet dest_type_subtype;
84
85 if(expr_type.id()==ID_array && !keep_array)
86 dest_type_subtype = to_array_type(expr_type).element_type();
87 else
88 dest_type_subtype=expr_type;
89
90 result=typecast_exprt(result, pointer_type(dest_type_subtype));
91 }
92 else if(expr.id()==ID_index ||
93 expr.id()==ID_member)
94 {
96 ode.build(expr, ns);
97
98 const byte_extract_exprt be =
99 make_byte_extract(ode.root_object(), ode.offset(), expr.type());
100
101 // recursive call
102 result = address_arithmetic(be, state, keep_array);
103
104 do_simplify(result);
105 }
106 else if(expr.id()==ID_dereference)
107 {
108 // ANSI-C guarantees &*p == p no matter what p is,
109 // even if it's complete garbage
110 // just grab the pointer, but be wary of further dereferencing
111 // in the pointer itself
112 result=to_dereference_expr(expr).pointer();
113 dereference_rec(result, state, false, false);
114 }
115 else if(expr.id()==ID_if)
116 {
117 if_exprt if_expr=to_if_expr(expr);
118
119 // the condition is not an address
120 dereference_rec(if_expr.cond(), state, false, false);
121
122 // recursive call
123 if_expr.true_case() =
124 address_arithmetic(if_expr.true_case(), state, keep_array);
125 if_expr.false_case() =
126 address_arithmetic(if_expr.false_case(), state, keep_array);
127
128 result=if_expr;
129 }
130 else if(expr.id()==ID_symbol ||
131 expr.id()==ID_string_constant ||
132 expr.id()==ID_label ||
133 expr.id()==ID_array)
134 {
135 // give up, just dereference
136 result=expr;
137 dereference_rec(result, state, false, false);
138
139 // turn &array into &array[0]
140 if(result.type().id() == ID_array && !keep_array)
141 result = index_exprt(result, from_integer(0, c_index_type()));
142
143 // handle field-sensitive SSA symbol
144 mp_integer offset=0;
145 if(is_ssa_expr(expr))
146 {
147 auto offset_opt = compute_pointer_offset(expr, ns);
148 PRECONDITION(offset_opt.has_value());
149 offset = *offset_opt;
150 }
151
152 if(offset>0)
153 {
155 to_ssa_expr(expr).get_l1_object(),
156 from_integer(offset, c_index_type()),
157 expr.type());
158
159 result = address_arithmetic(be, state, keep_array);
160
161 do_simplify(result);
162 }
163 else
164 result=address_of_exprt(result);
165 }
166 else if(expr.id() == ID_typecast)
167 {
168 const typecast_exprt &tc_expr = to_typecast_expr(expr);
169
170 result = address_arithmetic(tc_expr.op(), state, keep_array);
171
172 // treat &array as &array[0]
173 const typet &expr_type = expr.type();
174 typet dest_type_subtype;
175
176 if(expr_type.id() == ID_array && !keep_array)
177 dest_type_subtype = to_array_type(expr_type).element_type();
178 else
179 dest_type_subtype = expr_type;
180
181 result = typecast_exprt(result, pointer_type(dest_type_subtype));
182 }
183 else
185 "goto_symext::address_arithmetic does not handle " + expr.id_string());
186
187 const typet &expr_type = expr.type();
188 INVARIANT(
189 (expr_type.id() == ID_array && !keep_array) ||
190 pointer_type(expr_type) == result.type(),
191 "either non-persistent array or pointer to result");
192
193 return result;
194}
195
197goto_symext::cache_dereference(exprt &dereference_result, statet &state)
198{
199 auto const cache_key = [&] {
200 auto cache_key =
201 state.field_sensitivity.apply(ns, state, dereference_result, false);
202 if(auto let_expr = expr_try_dynamic_cast<let_exprt>(dereference_result))
203 {
204 let_expr->value() = state.rename<L2>(let_expr->value(), ns).get();
205 }
206 else
207 {
208 cache_key = state.rename<L2>(cache_key, ns).get();
209 }
210 return cache_key;
211 }();
212
213 if(auto cached = state.dereference_cache.find(cache_key))
214 {
215 return *cached;
216 }
217
218 auto const &cache_symbol = get_fresh_aux_symbol(
219 cache_key.type(),
220 "symex",
221 "dereference_cache",
222 dereference_result.source_location(),
224 ns,
225 state.symbol_table);
226
227 // we need to lift possible lets
228 // (come from the value set to avoid repeating complex pointer comparisons)
229 auto cache_value = cache_key;
230 lift_lets(state, cache_value);
231
232 auto assign = symex_assignt{
234
235 auto cache_symbol_expr = cache_symbol.symbol_expr();
236 assign.assign_symbol(
237 to_ssa_expr(state.rename<L1>(cache_symbol_expr, ns).get()),
239 cache_value,
240 {});
241
242 state.dereference_cache.insert(cache_key, cache_symbol_expr);
243 return cache_symbol_expr;
244}
245
258 exprt &expr,
259 statet &state,
260 bool write,
261 bool is_in_quantifier)
262{
263 if(expr.id()==ID_dereference)
264 {
265 bool expr_is_not_null = false;
266
267 if(state.threads.size() == 1)
268 {
269 const irep_idt &expr_function = state.source.function_id;
270 if(!expr_function.empty())
271 {
272 const dereference_exprt to_check =
274
275 expr_is_not_null = path_storage.safe_pointers.at(expr_function)
276 .is_safe_dereference(to_check, state.source.pc);
277 }
278 }
279
280 exprt tmp1;
281 tmp1.swap(to_dereference_expr(expr).pointer());
282
283 // first make sure there are no dereferences in there
284 dereference_rec(tmp1, state, false, is_in_quantifier);
285
286 // Depending on the nature of the pointer expression, the recursive deref
287 // operation might have introduced a construct such as
288 // (x == &o1 ? o1 : o2).field, in which case we should simplify to push the
289 // member operator inside the if, then apply field-sensitivity to yield
290 // (x == &o1 ? o1..field : o2..field). value_set_dereferencet can then
291 // apply the dereference operation to each of o1..field and o2..field
292 // independently, as it special-cases the ternary conditional operator.
293 // There may also be index operators in tmp1 which can now be resolved to
294 // constant array cell references, so we replace symbols with constants
295 // first, hoping for a transformation such as
296 // (x == &o1 ? o1 : o2)[idx] =>
297 // (x == &o1 ? o1 : o2)[2] =>
298 // (x == &o1 ? o1[[2]] : o2[[2]])
299 // Note we don't L2 rename non-constant symbols at this point, because the
300 // value-set works in terms of L1 names and we don't want to ask it to
301 // dereference an L2 pointer, which it would not have an entry for.
302
303 tmp1 = state.rename<L1_WITH_CONSTANT_PROPAGATION>(tmp1, ns).get();
304
305 do_simplify(tmp1);
306
308 {
309 // make sure simplify has not re-introduced any dereferencing that
310 // had previously been cleaned away
311 INVARIANT(
312 !has_subexpr(tmp1, ID_dereference),
313 "simplify re-introduced dereferencing");
314 }
315
316 tmp1 = state.field_sensitivity.apply(ns, state, std::move(tmp1), false);
317
318 // we need to set up some elaborate call-backs
319 symex_dereference_statet symex_dereference_state(state, ns);
320
322 ns,
323 state.symbol_table,
324 symex_dereference_state,
326 expr_is_not_null,
327 log);
328
329 // std::cout << "**** " << format(tmp1) << '\n';
330 exprt tmp2 =
332 // std::cout << "**** " << format(tmp2) << '\n';
333
334
335 // this may yield a new auto-object
336 trigger_auto_object(tmp2, state);
337
338 // Check various conditions for when we should try to cache the expression.
339 // 1. Caching dereferences must be enabled.
340 // 2. Do not cache inside LHS of writes.
341 // 3. Do not cache inside quantifiers (references variables outside their
342 // scope).
343 // 4. Only cache "complicated" expressions, i.e. of the form
344 // [let p = <expr> in ]
345 // (p == &something ? something : ...))
346 // Otherwise we should just return it unchanged.
347 if(
348 symex_config.cache_dereferences && !write && !is_in_quantifier &&
349 (tmp2.id() == ID_if || tmp2.id() == ID_let))
350 {
351 expr = cache_dereference(tmp2, state);
352 }
353 else
354 {
355 expr = std::move(tmp2);
356 }
357 }
358 else if(
359 expr.id() == ID_index && to_index_expr(expr).array().id() == ID_member &&
360 to_array_type(to_index_expr(expr).array().type()).size().is_zero())
361 {
362 // This is an expression of the form x.a[i],
363 // where a is a zero-sized array. This gets
364 // re-written into *(&x.a+i)
365
366 index_exprt index_expr=to_index_expr(expr);
367
368 address_of_exprt address_of_expr(index_expr.array());
369 address_of_expr.type()=pointer_type(expr.type());
370
371 dereference_exprt tmp{plus_exprt{address_of_expr, index_expr.index()}};
373
374 // recursive call
375 dereference_rec(tmp, state, write, is_in_quantifier);
376
377 expr.swap(tmp);
378 }
379 else if(expr.id()==ID_index &&
380 to_index_expr(expr).array().type().id()==ID_pointer)
381 {
382 // old stuff, will go away
384 }
385 else if(expr.id()==ID_address_of)
386 {
387 address_of_exprt &address_of_expr=to_address_of_expr(expr);
388
389 exprt &object=address_of_expr.object();
390
391 expr = address_arithmetic(
392 object, state, to_pointer_type(expr.type()).base_type().id() == ID_array);
393 }
394 else if(expr.id()==ID_typecast)
395 {
396 exprt &tc_op=to_typecast_expr(expr).op();
397
398 // turn &array into &array[0] when casting to pointer-to-element-type
399 if(
400 tc_op.id() == ID_address_of &&
401 to_address_of_expr(tc_op).object().type().id() == ID_array &&
402 expr.type() ==
403 pointer_type(to_address_of_expr(tc_op).object().type().subtype()))
404 {
406 to_address_of_expr(tc_op).object(), from_integer(0, c_index_type())));
407
408 dereference_rec(expr, state, write, is_in_quantifier);
409 }
410 else
411 {
412 dereference_rec(tc_op, state, write, is_in_quantifier);
413 }
414 }
415 else
416 {
417 bool is_quantifier = expr.id() == ID_forall || expr.id() == ID_exists;
418 Forall_operands(it, expr)
419 dereference_rec(*it, state, write, is_in_quantifier || is_quantifier);
420 }
421}
422
423static exprt
425{
426 if(auto deref = expr_try_dynamic_cast<dereference_exprt>(e))
427 {
428 deref->op() = f(std::move(deref->op()));
429 return *deref;
430 }
431
432 for(auto &sub : e.operands())
433 sub = apply_to_objects_in_dereference(std::move(sub), f);
434 return e;
435}
436
474void goto_symext::dereference(exprt &expr, statet &state, bool write)
475{
476 PRECONDITION(!state.call_stack().empty());
477
478 // Symbols whose address is taken need to be renamed to level 1
479 // in order to distinguish addresses of local variables
480 // from different frames.
481 expr = apply_to_objects_in_dereference(std::move(expr), [&](exprt e) {
482 return state.field_sensitivity.apply(
483 ns, state, state.rename<L1>(std::move(e), ns).get(), false);
484 });
485
486 // start the recursion!
487 dereference_rec(expr, state, write, false);
488 // dereferencing may introduce new symbol_exprt
489 // (like __CPROVER_memory)
490 expr = state.rename<L1>(std::move(expr), ns).get();
491
492 // Dereferencing is likely to introduce new member-of-if constructs --
493 // for example, "x->field" may have become "(x == &o1 ? o1 : o2).field."
494 // Run expression simplification, which converts that to
495 // (x == &o1 ? o1.field : o2.field))
496 // before applying field sensitivity. Field sensitivity can then turn such
497 // field-of-symbol expressions into atomic SSA expressions instead of having
498 // to rewrite all of 'o1' otherwise.
499 // Even without field sensitivity this can be beneficial: for example,
500 // "(b ? s1 : s2).member := X" results in
501 // (b ? s1 : s2) := (b ? s1 : s2) with (member := X)
502 // and then
503 // s1 := b ? ((b ? s1 : s2) with (member := X)) : s1
504 // when all we need is
505 // s1 := s1 with (member := X) [and guard b]
506 // s2 := s2 with (member := X) [and guard !b]
507 do_simplify(expr);
508
510 {
511 // make sure simplify has not re-introduced any dereferencing that
512 // had previously been cleaned away
513 INVARIANT(
514 !has_subexpr(expr, ID_dereference),
515 "simplify re-introduced dereferencing");
516 }
517
518 expr = state.field_sensitivity.apply(ns, state, std::move(expr), write);
519}
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Expression classes for byte-level operators.
const byte_extract_exprt & to_byte_extract_expr(const exprt &expr)
pointer_typet pointer_type(const typet &subtype)
Definition c_types.cpp:253
bitvector_typet char_type()
Definition c_types.cpp:124
bitvector_typet c_index_type()
Definition c_types.cpp:16
Operator to return the address of an object.
const typet & element_type() const
The type of the elements of the array.
Definition std_types.h:777
Expression of type type extracted from some object op starting at position offset (given in number of...
Operator to dereference a pointer.
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition dstring.h:37
bool empty() const
Definition dstring.h:88
Expression in which some part is missing and can be substituted for another expression.
Base class for all expressions.
Definition expr.h:54
typet & type()
Return the type of the expression.
Definition expr.h:82
operandst & operands()
Definition expr.h:92
const source_locationt & source_location() const
Definition expr.h:230
source_locationt & add_source_location()
Definition expr.h:235
NODISCARD exprt apply(const namespacet &ns, goto_symex_statet &state, exprt expr, bool write) const
Turn an expression expr into a field-sensitive SSA expression.
sharing_mapt< exprt, symbol_exprt, false, irep_hash > dereference_cache
This is used for eliminating repeated complicated dereferences.
Definition goto_state.h:43
Central data structure: state.
call_stackt & call_stack()
NODISCARD renamedt< exprt, level > rename(exprt expr, const namespacet &ns)
Rewrites symbol expressions in exprt, applying a suffix to each symbol reflecting its most recent ver...
symbol_tablet symbol_table
contains symbols that are minted during symbolic execution, such as dynamically created objects etc.
field_sensitivityt field_sensitivity
symex_targett::sourcet source
std::vector< threadt > threads
irep_idt language_mode
language_mode: ID_java, ID_C or another language identifier if we know the source language in use,...
Definition goto_symex.h:226
path_storaget & path_storage
Symbolic execution paths to be resumed later.
Definition goto_symex.h:788
symex_target_equationt & target
The equation that this execution is building up.
Definition goto_symex.h:251
namespacet ns
Initialized just before symbolic execution begins, to point to both outer_symbol_table and the symbol...
Definition goto_symex.h:243
void trigger_auto_object(const exprt &, statet &)
void lift_lets(statet &, exprt &)
Execute any let expressions in expr using symex_assignt::assign_symbol.
exprt address_arithmetic(const exprt &, statet &, bool keep_array)
Transforms an lvalue expression by replacing any dereference operations it contains with explicit ref...
virtual void do_simplify(exprt &expr)
symbol_exprt cache_dereference(exprt &dereference_result, statet &state)
messaget log
The messaget to write log messages to.
Definition goto_symex.h:263
const symex_configt symex_config
The configuration to use for this symbolic execution.
Definition goto_symex.h:170
virtual void dereference(exprt &, statet &, bool write)
Replace all dereference operations within expr with explicit references to the objects they may refer...
void dereference_rec(exprt &expr, statet &state, bool write, bool is_in_quantifier)
If expr is a dereference_exprt, replace it with explicit references to the objects it may point to.
The trinary if-then-else operator.
Definition std_expr.h:2226
exprt & cond()
Definition std_expr.h:2243
exprt & false_case()
Definition std_expr.h:2263
exprt & true_case()
Definition std_expr.h:2253
Array index operator.
Definition std_expr.h:1328
exprt & index()
Definition std_expr.h:1363
exprt & array()
Definition std_expr.h:1353
const std::string & id_string() const
Definition irep.h:399
void swap(irept &irep)
Definition irep.h:442
const irep_idt & id() const
Definition irep.h:396
Split an expression into a base object and a (byte) offset.
void build(const exprt &expr, const namespacet &ns)
Given an expression expr, attempt to find the underlying object it represents by skipping over type c...
static const exprt & root_object(const exprt &expr)
std::unordered_map< irep_idt, local_safe_pointerst > safe_pointers
Map function identifiers to local_safe_pointerst instances.
The plus expression Associativity is not specified.
Definition std_expr.h:914
const typet & base_type() const
The type of the data what we point to.
void insert(const key_type &k, valueU &&m)
Insert element, element must not exist in map.
optionalt< std::reference_wrapper< const mapped_type > > find(const key_type &k) const
Find element.
Expression to hold a symbol (variable)
Definition std_expr.h:80
Functor for symex assignment.
Callback object that goto_symext::dereference_rec provides to value_set_dereferencet to provide value...
Semantic type conversion.
Definition std_expr.h:1920
The type of an expression, extends irept.
Definition type.h:29
const typet & subtype() const
Definition type.h:48
const exprt & op() const
Definition std_expr.h:293
Thrown when we encounter an instruction, parameters to an instruction etc.
Wrapper for a function dereferencing pointer expressions using a value set.
#define Forall_operands(it, expr)
Definition expr.h:25
Expression skeleton.
bool has_subexpr(const exprt &expr, const std::function< bool(const exprt &)> &pred)
returns true if the expression has a subexpression that satisfies pred
Deprecated expression utility functions.
symbolt & get_fresh_aux_symbol(const typet &type, const std::string &name_prefix, const std::string &basename_prefix, const source_locationt &source_location, const irep_idt &symbol_mode, const namespacet &ns, symbol_table_baset &symbol_table)
Installs a fresh-named symbol with respect to the given namespace ns with the requested name pattern ...
Fresh auxiliary symbol creation.
Symbolic Execution.
Storage of symbolic execution paths to resume.
const address_of_exprt & to_address_of_expr(const exprt &expr)
Cast an exprt to an address_of_exprt.
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
const dereference_exprt & to_dereference_expr(const exprt &expr)
Cast an exprt to a dereference_exprt.
optionalt< mp_integer > compute_pointer_offset(const exprt &expr, const namespacet &ns)
Pointer Logic.
@ L2
Definition renamed.h:26
@ L1_WITH_CONSTANT_PROPAGATION
Definition renamed.h:25
@ L1
Definition renamed.h:24
exprt get_original_name(exprt expr)
Undo all levels of renaming.
BigInt mp_integer
Definition smt_terms.h:12
#define UNREACHABLE
This should be used to mark dead code.
Definition invariant.h:503
#define PRECONDITION(CONDITION)
Definition invariant.h:463
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition invariant.h:423
bool is_ssa_expr(const exprt &expr)
Definition ssa_expr.h:125
const ssa_exprt & to_ssa_expr(const exprt &expr)
Cast a generic exprt to an ssa_exprt.
Definition ssa_expr.h:145
const index_exprt & to_index_expr(const exprt &expr)
Cast an exprt to an index_exprt.
Definition std_expr.h:1391
const typecast_exprt & to_typecast_expr(const exprt &expr)
Cast an exprt to a typecast_exprt.
Definition std_expr.h:1954
const if_exprt & to_if_expr(const exprt &expr)
Cast an exprt to an if_exprt.
Definition std_expr.h:2291
const array_typet & to_array_type(const typet &type)
Cast a typet to an array_typet.
Definition std_types.h:832
bool show_points_to_sets
bool cache_dereferences
Whether or not to replace multiple occurrences of the same dereference with a single symbol that cont...
bool run_validation_checks
Should the additional validation checks be run? If this flag is set the checks for renaming (both lev...
goto_programt::const_targett pc
Symbolic Execution of assignments.
static exprt apply_to_objects_in_dereference(exprt e, const std::function< exprt(exprt)> &f)
Symbolic Execution of ANSI-C.
byte_extract_exprt make_byte_extract(const exprt &_op, const exprt &_offset, const typet &_type)
Construct a byte_extract_exprt with endianness and byte width matching the current configuration.
Pointer Dereferencing.