cprover
Loading...
Searching...
No Matches
ci_lazy_methods.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: Java Bytecode
4
5Author: Diffblue Ltd.
6
7\*******************************************************************/
8
9#include "ci_lazy_methods.h"
11#include "java_class_loader.h"
12#include "java_entry_point.h"
13#include "remove_exceptions.h"
14
15#include <util/expr_iterator.h>
16#include <util/namespace.h>
17#include <util/suffix.h>
18
20
37 const symbol_tablet &symbol_table,
38 const irep_idt &main_class,
39 const std::vector<irep_idt> &main_jar_classes,
40 const std::vector<load_extra_methodst> &lazy_methods_extra_entry_points,
41 java_class_loadert &java_class_loader,
42 const std::vector<irep_idt> &extra_instantiated_classes,
43 const select_pointer_typet &pointer_type_selector,
44 const synthetic_methods_mapt &synthetic_methods)
45 : main_class(main_class),
46 main_jar_classes(main_jar_classes),
47 lazy_methods_extra_entry_points(lazy_methods_extra_entry_points),
48 java_class_loader(java_class_loader),
49 extra_instantiated_classes(extra_instantiated_classes),
50 pointer_type_selector(pointer_type_selector),
51 synthetic_methods(synthetic_methods)
52{
53 // build the class hierarchy
54 class_hierarchy(symbol_table);
55}
56
63static bool references_class_model(const exprt &expr)
64{
65 static const struct_tag_typet class_type("java::java.lang.Class");
66
67 for(auto it = expr.depth_begin(); it != expr.depth_end(); ++it)
68 {
70 it->type() == class_type &&
74 {
75 return true;
76 }
77 }
78
79 return false;
80}
81
99 symbol_tablet &symbol_table,
100 method_bytecodet &method_bytecode,
101 const method_convertert &method_converter,
102 message_handlert &message_handler)
103{
104 std::unordered_set<irep_idt> methods_to_convert_later =
105 entry_point_methods(symbol_table, message_handler);
106
107 // Add any extra entry points specified; we should elaborate these in the
108 // same way as the main function.
109 for(const auto &extra_function_generator : lazy_methods_extra_entry_points)
110 {
111 std::vector<irep_idt> extra_methods =
112 extra_function_generator(symbol_table);
113 methods_to_convert_later.insert(extra_methods.begin(), extra_methods.end());
114 }
115
116 std::unordered_set<irep_idt> instantiated_classes;
117
118 {
119 std::unordered_set<irep_idt> initial_callable_methods;
120 ci_lazy_methods_neededt initial_lazy_methods(
121 initial_callable_methods,
122 instantiated_classes,
123 symbol_table,
126 methods_to_convert_later, namespacet(symbol_table), initial_lazy_methods);
127 methods_to_convert_later.insert(
128 initial_callable_methods.begin(), initial_callable_methods.end());
129 }
130
131 std::unordered_set<irep_idt> methods_already_populated;
132 std::unordered_set<class_method_descriptor_exprt, irep_hash>
133 called_virtual_functions;
134 bool class_initializer_seen = false;
135
136 messaget log{message_handler};
137
138 bool any_new_classes = true;
139 while(any_new_classes)
140 {
141 bool any_new_methods = true;
142 while(any_new_methods)
143 {
144 any_new_methods = false;
145 while(!methods_to_convert_later.empty())
146 {
147 std::unordered_set<irep_idt> methods_to_convert;
148 std::swap(methods_to_convert, methods_to_convert_later);
149 for(const auto &mname : methods_to_convert)
150 {
151 const auto conversion_result = convert_and_analyze_method(
152 method_converter,
153 methods_already_populated,
154 class_initializer_seen,
155 mname,
156 symbol_table,
157 methods_to_convert_later,
158 instantiated_classes,
159 called_virtual_functions,
160 message_handler);
161 any_new_methods |= conversion_result.new_method_seen;
162 class_initializer_seen |= conversion_result.class_initializer_seen;
163 }
164 }
165
166 // Given the object types we now know may be created, populate more
167 // possible virtual function call targets:
168
169 log.debug() << "CI lazy methods: add virtual method targets ("
170 << called_virtual_functions.size() << " callsites)"
171 << messaget::eom;
172
173 for(const class_method_descriptor_exprt &called_virtual_function :
174 called_virtual_functions)
175 {
177 called_virtual_function,
178 instantiated_classes,
179 methods_to_convert_later,
180 symbol_table);
181 }
182 }
183
185 methods_to_convert_later,
186 instantiated_classes,
187 called_virtual_functions,
188 symbol_table);
189 }
190
191 // Remove symbols for methods that were declared but never used:
192 symbol_tablet keep_symbols;
193 // Manually keep @inflight_exception, as it is unused at this stage
194 // but will become used when the `remove_exceptions` pass is run:
195 keep_symbols.add(symbol_table.lookup_ref(INFLIGHT_EXCEPTION_VARIABLE_NAME));
196
197 for(const auto &sym : symbol_table.symbols)
198 {
199 // Don't keep global variables (unless they're gathered below from a
200 // function that references them)
201 if(sym.second.is_static_lifetime)
202 continue;
203 if(sym.second.type.id()==ID_code)
204 {
205 // Don't keep functions that belong to this language that we haven't
206 // converted above
207 if(
208 (method_bytecode.contains_method(sym.first) ||
209 synthetic_methods.count(sym.first)) &&
210 !methods_already_populated.count(sym.first))
211 {
212 continue;
213 }
214 // If this is a function then add all the things used in it
215 gather_needed_globals(sym.second.value, symbol_table, keep_symbols);
216 }
217 keep_symbols.add(sym.second);
218 }
219
220 log.debug() << "CI lazy methods: removed "
221 << symbol_table.symbols.size() - keep_symbols.symbols.size()
222 << " unreachable methods and globals" << messaget::eom;
223
224 symbol_table.swap(keep_symbols);
225
226 return false;
227}
228
237 std::unordered_set<irep_idt> &methods_to_convert_later,
238 std::unordered_set<irep_idt> &instantiated_classes,
239 const std::unordered_set<class_method_descriptor_exprt, irep_hash>
240 &virtual_functions,
241 symbol_tablet &symbol_table)
242{
243 ci_lazy_methods_neededt lazy_methods_loader(
244 methods_to_convert_later,
245 instantiated_classes,
246 symbol_table,
248
249 bool any_new_classes = false;
250 for(const class_method_descriptor_exprt &virtual_function : virtual_functions)
251 {
252 std::unordered_set<irep_idt> candidate_target_methods;
254 virtual_function,
255 instantiated_classes,
256 candidate_target_methods,
257 symbol_table);
258
259 if(!candidate_target_methods.empty())
260 continue;
261
262 const java_method_typet &java_method_type =
263 to_java_method_type(virtual_function.type());
264
265 // Add the call class to instantiated_classes and assert that it
266 // didn't already exist. It can't be instantiated already, otherwise it
267 // would give a concrete definition of the called method, and
268 // candidate_target_methods would be non-empty.
269 const irep_idt &call_class = virtual_function.class_id();
270 bool was_missing = instantiated_classes.count(call_class) == 0;
271 CHECK_RETURN(was_missing);
272 any_new_classes = true;
273
274 const typet &this_type = java_method_type.get_this()->type();
275 if(
276 const pointer_typet *this_pointer_type =
277 type_try_dynamic_cast<pointer_typet>(this_type))
278 {
279 lazy_methods_loader.add_all_needed_classes(*this_pointer_type);
280 }
281
282 // That should in particular have added call_class to the possibly
283 // instantiated types.
284 bool still_missing = instantiated_classes.count(call_class) == 0;
285 CHECK_RETURN(!still_missing);
286
287 // Make sure we add our return type as required, as we may not have
288 // seen any concrete instances of it being created.
289 const typet &return_type = java_method_type.return_type();
290 if(
291 const pointer_typet *return_pointer_type =
292 type_try_dynamic_cast<pointer_typet>(return_type))
293 {
294 lazy_methods_loader.add_all_needed_classes(*return_pointer_type);
295 }
296
297 // Check that `get_virtual_method_target` returns a method now
298 const irep_idt &method_name = virtual_function.mangled_method_name();
299 const irep_idt method_id = get_virtual_method_target(
300 instantiated_classes, method_name, call_class, symbol_table);
301 CHECK_RETURN(!method_id.empty());
302
303 // Add what it returns to methods_to_convert_later
304 methods_to_convert_later.insert(method_id);
305 }
306 return any_new_classes;
307}
308
320 const method_convertert &method_converter,
321 std::unordered_set<irep_idt> &methods_already_populated,
322 const bool class_initializer_already_seen,
323 const irep_idt &method_name,
324 symbol_tablet &symbol_table,
325 std::unordered_set<irep_idt> &methods_to_convert_later,
326 std::unordered_set<irep_idt> &instantiated_classes,
327 std::unordered_set<class_method_descriptor_exprt, irep_hash>
328 &called_virtual_functions,
329 message_handlert &message_handler)
330{
332 if(!methods_already_populated.insert(method_name).second)
333 return result;
334
335 messaget log{message_handler};
336 log.debug() << "CI lazy methods: elaborate " << method_name << messaget::eom;
337
338 // Note this wraps *references* to methods_to_convert_later &
339 // instantiated_classes
340 ci_lazy_methods_neededt needed_methods(
341 methods_to_convert_later,
342 instantiated_classes,
343 symbol_table,
345
346 if(method_converter(method_name, needed_methods))
347 return result;
348
349 const exprt &method_body = symbol_table.lookup_ref(method_name).value;
350 gather_virtual_callsites(method_body, called_virtual_functions);
351
352 if(!class_initializer_already_seen && references_class_model(method_body))
353 {
354 result.class_initializer_seen = true;
355 const irep_idt initializer_signature =
357 if(symbol_table.has_symbol(initializer_signature))
358 methods_to_convert_later.insert(initializer_signature);
359 }
360 result.new_method_seen = true;
361 return result;
362}
363
369std::unordered_set<irep_idt> ci_lazy_methodst::entry_point_methods(
370 const symbol_tablet &symbol_table,
371 message_handlert &message_handler)
372{
373 std::unordered_set<irep_idt> methods_to_convert_later;
374
375 const main_function_resultt main_function =
376 get_main_symbol(symbol_table, this->main_class, message_handler);
377 if(!main_function.is_success())
378 {
379 // Failed, mark all functions in the given main class(es)
380 // reachable.
381 std::vector<irep_idt> reachable_classes;
382 if(!this->main_class.empty())
383 reachable_classes.push_back(this->main_class);
384 else
385 reachable_classes = this->main_jar_classes;
386 for(const irep_idt &class_name : reachable_classes)
387 {
388 const auto &methods =
389 this->java_class_loader.get_original_class(class_name)
391 for(const auto &method : methods)
392 {
393 const irep_idt methodid = "java::" + id2string(class_name) + "." +
394 id2string(method.name) + ":" +
395 id2string(method.descriptor);
396 methods_to_convert_later.insert(methodid);
397 }
398 }
399 }
400 else
401 methods_to_convert_later.insert(main_function.main_function.name);
402 return methods_to_convert_later;
403}
404
414 const std::unordered_set<irep_idt> &entry_points,
415 const namespacet &ns,
416 ci_lazy_methods_neededt &needed_lazy_methods)
417{
418 for(const auto &mname : entry_points)
419 {
420 const auto &symbol=ns.lookup(mname);
421 const auto &mtype = to_java_method_type(symbol.type);
422 for(const auto &param : mtype.parameters())
423 {
424 if(param.type().id()==ID_pointer)
425 {
426 const pointer_typet &original_pointer = to_pointer_type(param.type());
427 needed_lazy_methods.add_all_needed_classes(original_pointer);
428 }
429 }
430 }
431
432 // Also add classes whose instances are magically
433 // created by the JVM and so won't be spotted by
434 // looking for constructors and calls as usual:
435 needed_lazy_methods.add_needed_class("java::java.lang.String");
436 needed_lazy_methods.add_needed_class("java::java.lang.Class");
437 needed_lazy_methods.add_needed_class("java::java.lang.Object");
438
439 // As in class_loader, ensure these classes stay available
440 for(const auto &id : extra_instantiated_classes)
441 needed_lazy_methods.add_needed_class("java::" + id2string(id));
442}
443
449 const exprt &e,
450 std::unordered_set<class_method_descriptor_exprt, irep_hash> &result)
451{
452 if(e.id()!=ID_code)
453 return;
454 const codet &c=to_code(e);
455 if(
456 c.get_statement() == ID_function_call &&
458 to_code_function_call(c).function()))
459 {
460 result.insert(
462 }
463 else
464 {
465 for(const exprt &op : e.operands())
466 gather_virtual_callsites(op, result);
467 }
468}
469
481 const class_method_descriptor_exprt &called_function,
482 const std::unordered_set<irep_idt> &instantiated_classes,
483 std::unordered_set<irep_idt> &callable_methods,
484 symbol_tablet &symbol_table)
485{
486 const auto &call_class = called_function.class_id();
487 const auto &method_name = called_function.mangled_method_name();
488
489 class_hierarchyt::idst self_and_child_classes =
491 self_and_child_classes.push_back(call_class);
492
493 for(const irep_idt &class_name : self_and_child_classes)
494 {
495 const irep_idt method_id = get_virtual_method_target(
496 instantiated_classes, method_name, class_name, symbol_table);
497 if(!method_id.empty())
498 callable_methods.insert(method_id);
499 }
500}
501
508 const exprt &e,
509 const symbol_tablet &symbol_table,
510 symbol_tablet &needed)
511{
512 if(e.id()==ID_symbol)
513 {
514 // If the symbol isn't in the symbol table at all, then it is defined
515 // on an opaque type (i.e. we don't have the class definition at this point)
516 // and will be created during the typecheck phase.
517 // We don't mark it as 'needed' as it doesn't exist yet to keep.
518 const auto findit=
519 symbol_table.symbols.find(to_symbol_expr(e).get_identifier());
520 if(findit!=symbol_table.symbols.end() &&
521 findit->second.is_static_lifetime)
522 {
523 needed.add(findit->second);
524 // Gather any globals referenced in the initialiser:
525 gather_needed_globals(findit->second.value, symbol_table, needed);
526 }
527 }
528 else
529 forall_operands(opit, e)
530 gather_needed_globals(*opit, symbol_table, needed);
531}
532
546 const std::unordered_set<irep_idt> &instantiated_classes,
547 const irep_idt &call_basename,
548 const irep_idt &classname,
549 const symbol_tablet &symbol_table)
550{
551 // Program-wide, is this class ever instantiated?
552 if(!instantiated_classes.count(classname))
553 return irep_idt();
554
555 auto resolved_call =
556 get_inherited_method_implementation(call_basename, classname, symbol_table);
557
558 if(resolved_call)
559 return resolved_call->get_full_component_identifier();
560 else
561 return irep_idt();
562}
static bool references_class_model(const exprt &expr)
Checks if an expression refers to any class literals (e.g.
Collect methods needed to be loaded using the lazy method.
std::function< bool(const irep_idt &function_id, ci_lazy_methods_neededt)> method_convertert
void add_all_needed_classes(const pointer_typet &pointer_type)
Add to the needed classes all classes specified, the replacement type if it will be replaced,...
bool add_needed_class(const irep_idt &)
Notes class class_symbol_name will be instantiated, or a static field belonging to it will be accesse...
const std::vector< irep_idt > & extra_instantiated_classes
void gather_needed_globals(const exprt &e, const symbol_tablet &symbol_table, symbol_tablet &needed)
See output.
std::vector< irep_idt > main_jar_classes
ci_lazy_methodst(const symbol_tablet &symbol_table, const irep_idt &main_class, const std::vector< irep_idt > &main_jar_classes, const std::vector< load_extra_methodst > &lazy_methods_extra_entry_points, java_class_loadert &java_class_loader, const std::vector< irep_idt > &extra_instantiated_classes, const select_pointer_typet &pointer_type_selector, const synthetic_methods_mapt &synthetic_methods)
Constructor for lazy-method loading.
const select_pointer_typet & pointer_type_selector
const std::vector< load_extra_methodst > & lazy_methods_extra_entry_points
class_hierarchyt class_hierarchy
void gather_virtual_callsites(const exprt &e, std::unordered_set< class_method_descriptor_exprt, irep_hash > &result)
Get places where virtual functions are called.
java_class_loadert & java_class_loader
void get_virtual_method_targets(const class_method_descriptor_exprt &called_function, const std::unordered_set< irep_idt > &instantiated_classes, std::unordered_set< irep_idt > &callable_methods, symbol_tablet &symbol_table)
Find possible callees, excluding types that are not known to be instantiated.
const synthetic_methods_mapt & synthetic_methods
std::unordered_set< irep_idt > entry_point_methods(const symbol_tablet &symbol_table, message_handlert &message_handler)
Entry point methods are either:
convert_method_resultt convert_and_analyze_method(const method_convertert &method_converter, std::unordered_set< irep_idt > &methods_already_populated, const bool class_initializer_already_seen, const irep_idt &method_name, symbol_tablet &symbol_table, std::unordered_set< irep_idt > &methods_to_convert_later, std::unordered_set< irep_idt > &instantiated_classes, std::unordered_set< class_method_descriptor_exprt, irep_hash > &called_virtual_functions, message_handlert &message_handler)
Convert a method, add it to the populated set, add needed methods to methods_to_convert_later and add...
irep_idt get_virtual_method_target(const std::unordered_set< irep_idt > &instantiated_classes, const irep_idt &call_basename, const irep_idt &classname, const symbol_tablet &symbol_table)
Find a virtual callee, if one is defined and the callee type is known to exist.
void initialize_instantiated_classes(const std::unordered_set< irep_idt > &entry_points, const namespacet &ns, ci_lazy_methods_neededt &needed_lazy_methods)
Build up a list of methods whose type may be passed around reachable from the entry point.
bool handle_virtual_methods_with_no_callees(std::unordered_set< irep_idt > &methods_to_convert_later, std::unordered_set< irep_idt > &instantiated_classes, const std::unordered_set< class_method_descriptor_exprt, irep_hash > &virtual_functions, symbol_tablet &symbol_table)
Look for virtual callsites with no candidate targets.
bool operator()(symbol_tablet &symbol_table, method_bytecodet &method_bytecode, const method_convertert &method_converter, message_handlert &message_handler)
Uses a simple context-insensitive ('ci') analysis to determine which methods may be reachable from th...
idst get_children_trans(const irep_idt &id) const
std::vector< irep_idt > idst
An expression describing a method on a class.
Definition std_expr.h:3272
const irep_idt & mangled_method_name() const
The method name after mangling it by combining it with the descriptor.
Definition std_expr.h:3309
const irep_idt & class_id() const
Unique identifier in the symbol table, of the compile time type of the class which this expression is...
Definition std_expr.h:3317
const typet & return_type() const
Definition std_types.h:645
const parametert * get_this() const
Definition std_types.h:621
Data structure for representing an arbitrary statement in a program.
const irep_idt & get_statement() const
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition dstring.h:37
bool empty() const
Definition dstring.h:88
Base class for all expressions.
Definition expr.h:54
depth_iteratort depth_end()
Definition expr.cpp:267
depth_iteratort depth_begin()
Definition expr.cpp:265
typet & type()
Return the type of the expression.
Definition expr.h:82
operandst & operands()
Definition expr.h:92
const irep_idt & id() const
Definition irep.h:396
Class responsible to load .class files.
const java_bytecode_parse_treet & get_original_class(const irep_idt &class_name)
Class that provides messages with a built-in verbosity 'level'.
Definition message.h:155
static eomt eom
Definition message.h:297
bool contains_method(const irep_idt &method_id) const
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition namespace.h:91
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
The pointer type These are both 'bitvector_typet' (they have a width) and 'type_with_subtypet' (they ...
A struct tag type, i.e., struct_typet with an identifier.
Definition std_types.h:449
const irep_idt & get_identifier() const
Definition std_expr.h:109
const symbolst & symbols
Read-only field, used to look up symbols given their names.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
The symbol table.
void swap(symbol_tablet &other)
Swap symbol maps between two symbol tables.
irep_idt name
The unique identifier.
Definition symbol.h:40
exprt value
Initial value of symbol.
Definition symbol.h:34
The type of an expression, extends irept.
Definition type.h:29
#define forall_operands(it, expr)
Definition expr.h:18
Forward depth-first search iterators These iterators' copy operations are expensive,...
const code_function_callt & to_code_function_call(const codet &code)
dstringt irep_idt
Definition irep.h:37
const std::string & id2string(const irep_idt &d)
Definition irep.h:47
#define JAVA_CLASS_MODEL_SUFFIX
main_function_resultt get_main_symbol(const symbol_table_baset &symbol_table, const irep_idt &main_class, message_handlert &message_handler)
Figures out the entry point of the code to verify.
irep_idt get_java_class_literal_initializer_signature()
Get the symbol name of java.lang.Class' initializer method.
const java_method_typet & to_java_method_type(const typet &type)
Definition java_types.h:184
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Remove function exceptional returns.
#define INFLIGHT_EXCEPTION_VARIABLE_NAME
optionalt< resolve_inherited_componentt::inherited_componentt > get_inherited_method_implementation(const irep_idt &call_basename, const irep_idt &classname, const symbol_tablet &symbol_table)
Given a class and a component, identify the concrete method it is resolved to.
Given a class and a component (either field or method), find the closest parent that defines that com...
static optionalt< smt_termt > get_identifier(const exprt &expr, const std::unordered_map< exprt, smt_identifier_termt, irep_hash > &expression_handle_identifiers, const std::unordered_map< exprt, smt_identifier_termt, irep_hash > &expression_identifiers)
#define CHECK_RETURN(CONDITION)
Definition invariant.h:495
const codet & to_code(const exprt &expr)
const class_method_descriptor_exprt & to_class_method_descriptor_expr(const exprt &expr)
Cast an exprt to a class_method_descriptor_exprt.
Definition std_expr.h:3362
bool can_cast_expr< class_method_descriptor_exprt >(const exprt &base)
Definition std_expr.h:3372
bool can_cast_expr< symbol_exprt >(const exprt &base)
Definition std_expr.h:173
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition std_expr.h:189
bool has_suffix(const std::string &s, const std::string &suffix)
Definition suffix.h:17
std::unordered_map< irep_idt, synthetic_method_typet > synthetic_methods_mapt
Maps method names on to a synthetic method kind.