cprover
remove_java_new.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Remove Java New Operators
4 
5 Author: Peter Schrammel
6 
7 \*******************************************************************/
8 
11 
12 #include "remove_java_new.h"
13 
16 
17 #include <util/arith_tools.h>
18 #include <util/c_types.h>
19 #include <util/expr_cast.h>
20 #include <util/expr_initializer.h>
21 #include <util/fresh_symbol.h>
22 #include <util/message.h>
24 
25 class remove_java_newt : public messaget
26 {
27 public:
30  message_handlert &_message_handler)
31  : messaget(_message_handler), symbol_table(symbol_table), ns(symbol_table)
32  {
33  }
34 
35  // Lower java_new for a single function
37 
38  // Lower java_new for a single instruction
41 
42 protected:
45 
47  exprt lhs,
49  goto_programt &,
51 
53  exprt lhs,
55  goto_programt &,
57 };
58 
71  exprt lhs,
73  goto_programt &dest,
75 {
76  PRECONDITION(!lhs.is_nil());
77  PRECONDITION(rhs.operands().empty());
78  PRECONDITION(rhs.type().id() == ID_pointer);
79  source_locationt location = rhs.source_location();
80  typet object_type = rhs.type().subtype();
81 
82  // build size expression
83  exprt object_size = size_of_expr(object_type, ns);
85 
86  // we produce a malloc side-effect, which stays
87  side_effect_exprt malloc_expr(ID_allocate, rhs.type(), location);
88  malloc_expr.copy_to_operands(object_size);
89  // could use true and get rid of the code below
90  malloc_expr.copy_to_operands(false_exprt());
91  target->make_assignment(code_assignt(lhs, malloc_expr));
92  target->source_location = location;
93 
94  // zero-initialize the object
95  dereference_exprt deref(lhs, object_type);
96  auto zero_object = zero_initializer(object_type, location, ns);
97  CHECK_RETURN(zero_object.has_value());
99  to_struct_expr(*zero_object), ns, to_struct_tag_type(object_type));
100  goto_programt::targett t_i = dest.insert_after(target);
101  t_i->make_assignment(code_assignt(deref, *zero_object));
102  t_i->source_location = location;
103 
104  return t_i;
105 }
106 
119  exprt lhs,
120  side_effect_exprt rhs,
121  goto_programt &dest,
122  goto_programt::targett target)
123 {
124  // lower_java_new_array without lhs not implemented
125  PRECONDITION(!lhs.is_nil());
126  PRECONDITION(rhs.operands().size() >= 1); // one per dimension
127  PRECONDITION(rhs.type().id() == ID_pointer);
128 
129  source_locationt location = rhs.source_location();
130  typet object_type = rhs.type().subtype();
131  PRECONDITION(ns.follow(object_type).id() == ID_struct);
132 
133  // build size expression
134  exprt object_size = size_of_expr(object_type, ns);
136 
137  // we produce a malloc side-effect, which stays
138  side_effect_exprt malloc_expr(ID_allocate, rhs.type(), location);
139  malloc_expr.copy_to_operands(object_size);
140  // code use true and get rid of the code below
141  malloc_expr.copy_to_operands(false_exprt());
142 
143  target->make_assignment(code_assignt(lhs, malloc_expr));
144  target->source_location = location;
145  goto_programt::targett next = std::next(target);
146 
147  const struct_typet &struct_type = to_struct_type(ns.follow(object_type));
148 
149  // Ideally we would have a check for `is_valid_java_array(struct_type)` but
150  // `is_valid_java_array is part of the java_bytecode module and we cannot
151  // introduce such dependencies. We do this simple check instead:
152  PRECONDITION(struct_type.components().size() == 3);
153 
154  // Init base class:
155  dereference_exprt deref(lhs, object_type);
156  auto zero_object = zero_initializer(object_type, location, ns);
157  CHECK_RETURN(zero_object.has_value());
159  to_struct_expr(*zero_object), ns, to_struct_tag_type(object_type));
160  goto_programt::targett t_i = dest.insert_before(next);
161  t_i->make_assignment(code_assignt(deref, *zero_object));
162  t_i->source_location = location;
163 
164  // if it's an array, we need to set the length field
165  member_exprt length(
166  deref,
167  struct_type.components()[1].get_name(),
168  struct_type.components()[1].type());
169  goto_programt::targett t_s = dest.insert_before(next);
170  t_s->make_assignment(code_assignt(length, rhs.op0()));
171  t_s->source_location = location;
172 
173  // we also need to allocate space for the data
175  deref,
176  struct_type.components()[2].get_name(),
177  struct_type.components()[2].type());
178 
179  // Allocate a (struct realtype**) instead of a (void**) if possible.
180  const irept &given_element_type = object_type.find(ID_element_type);
181  typet allocate_data_type;
182  if(given_element_type.is_not_nil())
183  {
184  allocate_data_type =
185  pointer_type(static_cast<const typet &>(given_element_type));
186  }
187  else
188  allocate_data_type = data.type();
189 
190  side_effect_exprt data_java_new_expr(
191  ID_java_new_array_data, allocate_data_type, location);
192 
193  // The instruction may specify a (hopefully small) upper bound on the
194  // array size, in which case we allocate a fixed-length array that may
195  // be larger than the `length` member rather than use a true variable-
196  // length array, which produces a more complex formula in the current
197  // backend.
198  const irept size_bound = rhs.find(ID_length_upper_bound);
199  if(size_bound.is_nil())
200  data_java_new_expr.set(ID_size, rhs.op0());
201  else
202  data_java_new_expr.set(ID_size, size_bound);
203 
204  // Must directly assign the new array to a temporary
205  // because goto-symex will notice `x=side_effect_exprt` but not
206  // `x=typecast_exprt(side_effect_exprt(...))`
207  symbol_exprt new_array_data_symbol = get_fresh_aux_symbol(
208  data_java_new_expr.type(),
209  id2string(target->function),
210  "tmp_new_data_array",
211  location,
212  ID_java,
213  symbol_table)
214  .symbol_expr();
215  code_declt array_decl(new_array_data_symbol);
216  array_decl.add_source_location() = location;
217  goto_programt::targett t_array_decl = dest.insert_before(next);
218  t_array_decl->make_decl(array_decl);
219  t_array_decl->source_location = location;
220  goto_programt::targett t_p2 = dest.insert_before(next);
221  t_p2->make_assignment(
222  code_assignt(new_array_data_symbol, data_java_new_expr));
223  t_p2->source_location = location;
224 
225  goto_programt::targett t_p = dest.insert_before(next);
226  exprt cast_java_new = new_array_data_symbol;
227  if(cast_java_new.type() != data.type())
228  cast_java_new = typecast_exprt(cast_java_new, data.type());
229  t_p->make_assignment(code_assignt(data, cast_java_new));
230  t_p->source_location = location;
231 
232  // zero-initialize the data
233  if(!rhs.get_bool(ID_skip_initialize))
234  {
235  const auto zero_element =
236  zero_initializer(data.type().subtype(), location, ns);
237  CHECK_RETURN(zero_element.has_value());
238  codet array_set(ID_array_set);
239  array_set.copy_to_operands(new_array_data_symbol, *zero_element);
240  goto_programt::targett t_d = dest.insert_before(next);
241  t_d->make_other(array_set);
242  t_d->source_location = location;
243  }
244 
245  // multi-dimensional?
246 
247  if(rhs.operands().size() >= 2)
248  {
249  // produce
250  // for(int i=0; i<size; i++) tmp[i]=java_new(dim-1);
251  // This will be converted recursively.
252 
253  goto_programt tmp;
254 
256  length.type(),
257  id2string(target->function),
258  "tmp_index",
259  location,
260  ID_java,
261  symbol_table)
262  .symbol_expr();
263  code_declt decl(tmp_i);
264  decl.add_source_location() = location;
265  goto_programt::targett t_decl = tmp.insert_before(tmp.instructions.begin());
266  t_decl->make_decl(decl);
267  t_decl->source_location = location;
268 
269  side_effect_exprt sub_java_new = rhs;
270  sub_java_new.operands().erase(sub_java_new.operands().begin());
271 
272  // we already know that rhs has pointer type
273  typet sub_type =
274  static_cast<const typet &>(rhs.type().subtype().find(ID_element_type));
275  CHECK_RETURN(sub_type.id() == ID_pointer);
276  sub_java_new.type() = sub_type;
277 
278  side_effect_exprt inc(ID_assign, typet(), location);
279  inc.operands().resize(2);
280  inc.op0() = tmp_i;
281  inc.op1() = plus_exprt(tmp_i, from_integer(1, tmp_i.type()));
282 
283  dereference_exprt deref_expr(
284  plus_exprt(data, tmp_i), data.type().subtype());
285 
286  code_blockt for_body;
288  sub_type,
289  id2string(target->function),
290  "subarray_init",
291  location,
292  ID_java,
293  symbol_table)
294  .symbol_expr();
295  code_declt init_decl(init_sym);
296  init_decl.add_source_location() = location;
297  for_body.add(std::move(init_decl));
298 
299  code_assignt init_subarray(init_sym, sub_java_new);
300  for_body.add(std::move(init_subarray));
301  code_assignt assign_subarray(
302  deref_expr, typecast_exprt(init_sym, deref_expr.type()));
303  for_body.add(std::move(assign_subarray));
304 
305  const code_fort for_loop(
306  code_assignt(tmp_i, from_integer(0, tmp_i.type())),
307  binary_relation_exprt(tmp_i, ID_lt, rhs.op0()),
308  std::move(inc),
309  std::move(for_body));
310 
311  goto_convert(for_loop, symbol_table, tmp, get_message_handler(), ID_java);
312 
313  // lower new side effects recursively
314  lower_java_new(tmp);
315 
316  dest.destructive_insert(next, tmp);
317  }
318 
319  return std::prev(next);
320 }
321 
328  goto_programt &goto_program,
329  goto_programt::targett target)
330 {
331  const auto maybe_code_assign =
332  expr_try_dynamic_cast<code_assignt>(target->code);
333  if(!maybe_code_assign)
334  return target;
335 
336  const exprt &lhs = maybe_code_assign->lhs();
337  const exprt &rhs = maybe_code_assign->rhs();
338 
339  if(rhs.id() == ID_side_effect && rhs.get(ID_statement) == ID_java_new)
340  {
341  return lower_java_new(lhs, to_side_effect_expr(rhs), goto_program, target);
342  }
343 
344  if(rhs.id() == ID_side_effect && rhs.get(ID_statement) == ID_java_new_array)
345  {
346  return lower_java_new_array(
347  lhs, to_side_effect_expr(rhs), goto_program, target);
348  }
349 
350  return target;
351 }
352 
359 {
360  bool changed = false;
361  for(goto_programt::instructionst::iterator target =
362  goto_program.instructions.begin();
363  target != goto_program.instructions.end();
364  ++target)
365  {
366  goto_programt::targett new_target = lower_java_new(goto_program, target);
367  changed = changed || new_target == target;
368  target = new_target;
369  }
370  if(!changed)
371  return false;
372  goto_program.update();
373  return true;
374 }
375 
384  goto_programt::targett target,
385  goto_programt &goto_program,
386  symbol_table_baset &symbol_table,
387  message_handlert &message_handler)
388 {
389  remove_java_newt rem(symbol_table, message_handler);
390  rem.lower_java_new(goto_program, target);
391 }
392 
401  symbol_table_baset &symbol_table,
402  message_handlert &message_handler)
403 {
404  remove_java_newt rem(symbol_table, message_handler);
405  rem.lower_java_new(function.body);
406 }
407 
415  goto_functionst &goto_functions,
416  symbol_table_baset &symbol_table,
417  message_handlert &message_handler)
418 {
419  remove_java_newt rem(symbol_table, message_handler);
420  bool changed = false;
421  for(auto &f : goto_functions.function_map)
422  changed = rem.lower_java_new(f.second.body) || changed;
423  if(changed)
424  goto_functions.compute_location_numbers();
425 }
426 
433 void remove_java_new(goto_modelt &goto_model, message_handlert &message_handler)
434 {
436  goto_model.goto_functions, goto_model.symbol_table, message_handler);
437 }
messaget
Class that provides messages with a built-in verbosity 'level'.
Definition: message.h:144
remove_java_newt::remove_java_newt
remove_java_newt(symbol_table_baset &symbol_table, message_handlert &_message_handler)
Definition: remove_java_new.cpp:28
exprt::copy_to_operands
void copy_to_operands(const exprt &expr)
Copy the given argument to the end of exprt's operands.
Definition: expr.h:123
struct_union_typet::components
const componentst & components() const
Definition: std_types.h:205
pointer_offset_size.h
code_blockt
A codet representing sequential composition of program statements.
Definition: std_code.h:150
PRECONDITION
#define PRECONDITION(CONDITION)
Definition: invariant.h:438
typet::subtype
const typet & subtype() const
Definition: type.h:38
arith_tools.h
to_struct_type
const struct_typet & to_struct_type(const typet &type)
Cast a typet to a struct_typet.
Definition: std_types.h:349
code_fort
codet representation of a for statement.
Definition: std_code.h:893
typet
The type of an expression, extends irept.
Definition: type.h:27
fresh_symbol.h
get_fresh_aux_symbol
symbolt & get_fresh_aux_symbol(const typet &type, const std::string &name_prefix, const std::string &basename_prefix, const source_locationt &source_location, const irep_idt &symbol_mode, symbol_table_baset &symbol_table)
Installs a fresh-named symbol with the requested name pattern.
Definition: fresh_symbol.cpp:30
dereference_exprt
Operator to dereference a pointer.
Definition: std_expr.h:3355
data
Definition: kdev_t.h:24
irept::add
irept & add(const irep_namet &name)
Definition: irep.cpp:305
irept::find
const irept & find(const irep_namet &name) const
Definition: irep.cpp:284
goto_programt::update
void update()
Update all indices.
Definition: goto_program.cpp:506
code_declt
A codet representing the declaration of a local variable.
Definition: std_code.h:352
plus_exprt
The plus expression Associativity is not specified.
Definition: std_expr.h:1042
goto_functionst::compute_location_numbers
void compute_location_numbers()
Definition: goto_functions.cpp:18
exprt
Base class for all expressions.
Definition: expr.h:54
goto_modelt
Definition: goto_model.h:24
exprt::op0
exprt & op0()
Definition: expr.h:84
to_side_effect_expr
side_effect_exprt & to_side_effect_expr(exprt &expr)
Definition: std_code.h:1620
goto_convert.h
goto_functionst::function_map
function_mapt function_map
Definition: goto_functions.h:27
symbol_exprt
Expression to hold a symbol (variable)
Definition: std_expr.h:143
remove_java_newt::symbol_table
symbol_table_baset & symbol_table
Definition: remove_java_new.cpp:43
zero_initializer
optionalt< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
Definition: expr_initializer.cpp:322
goto_convert
void goto_convert(const codet &code, symbol_table_baset &symbol_table, goto_programt &dest, message_handlert &message_handler, const irep_idt &mode)
Definition: goto_convert.cpp:1931
namespacet
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:93
exprt::type
typet & type()
Return the type of the expression.
Definition: expr.h:68
irept::get_bool
bool get_bool(const irep_namet &name) const
Definition: irep.cpp:239
irept::is_not_nil
bool is_not_nil() const
Definition: irep.h:173
goto_programt::insert_before
targett insert_before(const_targett target)
Insertion before the instruction pointed-to by the given instruction iterator target.
Definition: goto_program.h:510
remove_java_newt::ns
namespacet ns
Definition: remove_java_new.cpp:44
set_class_identifier
void set_class_identifier(struct_exprt &expr, const namespacet &ns, const struct_tag_typet &class_type)
If expr has its components filled in then sets the @class_identifier member of the struct.
Definition: class_identifier.cpp:82
expr_initializer.h
goto_programt::destructive_insert
void destructive_insert(const_targett target, goto_programt &p)
Inserts the given program p before target.
Definition: goto_program.h:532
size_of_expr
exprt size_of_expr(const typet &type, const namespacet &ns)
Definition: pointer_offset_size.cpp:292
id2string
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
symbol_table_baset
The symbol table base class interface.
Definition: symbol_table_base.h:21
symbolt::symbol_expr
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:121
remove_java_newt::lower_java_new
bool lower_java_new(goto_programt &)
Replace every java_new or java_new_array by a malloc side-effect and zero initialization.
Definition: remove_java_new.cpp:358
exprt::op1
exprt & op1()
Definition: expr.h:87
pointer_type
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:243
object_size
exprt object_size(const exprt &pointer)
Definition: pointer_predicates.cpp:32
irept::is_nil
bool is_nil() const
Definition: irep.h:172
irept::id
const irep_idt & id() const
Definition: irep.h:259
message_handlert
Definition: message.h:24
to_struct_tag_type
const struct_tag_typet & to_struct_tag_type(const typet &type)
Cast a typet to a struct_tag_typet.
Definition: std_types.h:543
false_exprt
The Boolean constant false.
Definition: std_expr.h:4452
remove_java_new
void remove_java_new(goto_programt::targett target, goto_programt &goto_program, symbol_table_baset &symbol_table, message_handlert &message_handler)
Replace every java_new or java_new_array by a malloc side-effect and zero initialization.
Definition: remove_java_new.cpp:383
source_locationt
Definition: source_location.h:20
goto_functionst::goto_functiont
::goto_functiont goto_functiont
Definition: goto_functions.h:25
member_exprt
Extract member of struct or union.
Definition: std_expr.h:3890
goto_programt::instructions
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:420
goto_functionst
A collection of goto functions.
Definition: goto_functions.h:22
messaget::get_message_handler
message_handlert & get_message_handler()
Definition: message.h:174
struct_typet
Structure type, corresponds to C style structs.
Definition: std_types.h:276
goto_modelt::goto_functions
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:32
namespace_baset::follow
const typet & follow(const typet &) const
Resolve type symbol to the type it points to.
Definition: namespace.cpp:62
irept::get
const irep_idt & get(const irep_namet &name) const
Definition: irep.cpp:212
irept::set
void set(const irep_namet &name, const irep_idt &value)
Definition: irep.h:286
from_integer
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:123
binary_relation_exprt
A base class for relations, i.e., binary predicates.
Definition: std_expr.h:878
expr_cast.h
Templated functions to cast to specific exprt-derived classes.
goto_programt
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:72
class_identifier.h
goto_programt::insert_after
targett insert_after(const_targett target)
Insertion after the instruction pointed-to by the given instruction iterator target.
Definition: goto_program.h:518
irept
Base class for tree-like data structures with sharing.
Definition: irep.h:156
remove_java_new.h
exprt::operands
operandst & operands()
Definition: expr.h:78
exprt::add_source_location
source_locationt & add_source_location()
Definition: expr.h:233
typecast_exprt
Semantic type conversion.
Definition: std_expr.h:2277
code_assignt
A codet representing an assignment in the program.
Definition: std_code.h:256
message.h
exprt::source_location
const source_locationt & source_location() const
Definition: expr.h:228
goto_modelt::symbol_table
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:29
remove_java_newt::lower_java_new_array
goto_programt::targett lower_java_new_array(exprt lhs, side_effect_exprt rhs, goto_programt &, goto_programt::targett)
Replaces the instruction lhs = new java_array_type by the following code: lhs = ALLOCATE(java_type) l...
Definition: remove_java_new.cpp:118
c_types.h
remove_java_newt
Definition: remove_java_new.cpp:25
goto_programt::targett
instructionst::iterator targett
Definition: goto_program.h:414
side_effect_exprt
An expression containing a side effect.
Definition: std_code.h:1560
to_struct_expr
const struct_exprt & to_struct_expr(const exprt &expr)
Cast an exprt to a struct_exprt.
Definition: std_expr.h:1942
codet
Data structure for representing an arbitrary statement in a program.
Definition: std_code.h:34
CHECK_RETURN
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:470