cprover
linker_script_merge.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Linker Script Merging
4 
5 Author: Kareem Khazem <karkhaz@karkhaz.com>, 2017
6 
7 \*******************************************************************/
8 
9 #include "linker_script_merge.h"
10 
11 #include <algorithm>
12 #include <fstream>
13 #include <iterator>
14 
15 #include <util/arith_tools.h>
16 #include <util/c_types.h>
17 #include <util/expr_initializer.h>
18 #include <util/magic.h>
19 #include <util/run.h>
20 #include <util/tempfile.h>
21 
22 #include <json/json_parser.h>
23 
25 
27 
29 {
30  if(!cmdline.isset('T'))
31  return 0;
32 
33  temporary_filet linker_def_outfile("goto-cc-linker-info", ".json");
34  std::list<irep_idt> linker_defined_symbols;
35  int fail = get_linker_script_data(
36  linker_defined_symbols,
38  elf_binary,
39  linker_def_outfile());
40  // ignore linker script parsing failures until the code is tested more widely
41  if(fail!=0)
42  return 0;
43 
44  jsont data;
45  fail=parse_json(linker_def_outfile(), get_message_handler(), data);
46  if(fail!=0)
47  {
48  error() << "Problem parsing linker script JSON data" << eom;
49  return fail;
50  }
51 
53  if(fail!=0)
54  {
55  error() << "Malformed linker-script JSON document" << eom;
56  data.output(error());
57  return fail;
58  }
59 
60  goto_modelt original_goto_model;
61 
62  fail =
63  read_goto_binary(goto_binary, original_goto_model, get_message_handler());
64 
65  if(fail!=0)
66  {
67  error() << "Unable to read goto binary for linker script merging" << eom;
68  return fail;
69  }
70 
71  fail=1;
72  linker_valuest linker_values;
73  const auto &pair =
74  original_goto_model.goto_functions.function_map.find(INITIALIZE_FUNCTION);
75  if(pair == original_goto_model.goto_functions.function_map.end())
76  {
77  error() << "No " << INITIALIZE_FUNCTION << " found in goto_functions"
78  << eom;
79  return fail;
80  }
81  fail = ls_data2instructions(
82  data,
83  cmdline.get_value('T'),
84  pair->second.body,
85  original_goto_model.symbol_table,
86  linker_values);
87  if(fail!=0)
88  {
89  error() << "Could not add linkerscript defs to " INITIALIZE_FUNCTION << eom;
90  return fail;
91  }
92 
93  fail=goto_and_object_mismatch(linker_defined_symbols, linker_values);
94  if(fail!=0)
95  return fail;
96 
97  // The keys of linker_values are exactly the elements of
98  // linker_defined_symbols, so iterate over linker_values from now on.
99 
100  fail = pointerize_linker_defined_symbols(original_goto_model, linker_values);
101 
102  if(fail!=0)
103  {
104  error() << "Could not pointerize all linker-defined expressions" << eom;
105  return fail;
106  }
107 
108  fail = compiler.write_object_file(goto_binary, original_goto_model);
109 
110  if(fail!=0)
111  error() << "Could not write linkerscript-augmented binary" << eom;
112 
113  return fail;
114 }
115 
117  compilet &compiler,
118  const std::string &elf_binary,
119  const std::string &goto_binary,
120  const cmdlinet &cmdline,
121  message_handlert &message_handler) :
122  messaget(message_handler), compiler(compiler),
123  elf_binary(elf_binary), goto_binary(goto_binary),
124  cmdline(cmdline),
125  replacement_predicates(
126  {
127  replacement_predicatet("address of array's first member",
128  [](const exprt &expr) -> const symbol_exprt&
129  { return to_symbol_expr(expr.op0().op0()); },
130  [](const exprt &expr, const namespacet &)
131  {
132  return expr.id()==ID_address_of &&
133  expr.type().id()==ID_pointer &&
134 
135  expr.op0().id()==ID_index &&
136  expr.op0().type().id()==ID_unsignedbv &&
137 
138  expr.op0().op0().id()==ID_symbol &&
139  expr.op0().op0().type().id()==ID_array &&
140 
141  expr.op0().op1().id()==ID_constant &&
142  expr.op0().op1().type().id()==ID_signedbv;
143  }),
144  replacement_predicatet("address of array",
145  [](const exprt &expr) -> const symbol_exprt&
146  { return to_symbol_expr(expr.op0()); },
147  [](const exprt &expr, const namespacet &)
148  {
149  return expr.id()==ID_address_of &&
150  expr.type().id()==ID_pointer &&
151 
152  expr.op0().id()==ID_symbol &&
153  expr.op0().type().id()==ID_array;
154  }),
155  replacement_predicatet("address of struct",
156  [](const exprt &expr) -> const symbol_exprt&
157  { return to_symbol_expr(expr.op0()); },
158  [](const exprt &expr, const namespacet &ns)
159  {
160  return expr.id()==ID_address_of &&
161  expr.type().id()==ID_pointer &&
162 
163  expr.op0().id()==ID_symbol &&
164  ns.follow(expr.op0().type()).id()==ID_struct;
165  }),
166  replacement_predicatet("array variable",
167  [](const exprt &expr) -> const symbol_exprt&
168  { return to_symbol_expr(expr); },
169  [](const exprt &expr, const namespacet &)
170  {
171  return expr.id()==ID_symbol &&
172  expr.type().id()==ID_array;
173  }),
174  replacement_predicatet("pointer (does not need pointerizing)",
175  [](const exprt &expr) -> const symbol_exprt&
176  { return to_symbol_expr(expr); },
177  [](const exprt &expr, const namespacet &)
178  {
179  return expr.id()==ID_symbol &&
180  expr.type().id()==ID_pointer;
181  })
182  })
183 {}
184 
186  goto_modelt &goto_model,
187  const linker_valuest &linker_values)
188 {
189  const namespacet ns(goto_model.symbol_table);
190 
191  int ret=0;
192  // First, pointerize the actual linker-defined symbols
193  for(const auto &pair : linker_values)
194  {
195  const auto maybe_symbol = goto_model.symbol_table.get_writeable(pair.first);
196  if(!maybe_symbol)
197  continue;
198  symbolt &entry=*maybe_symbol;
199  entry.type=pointer_type(char_type());
200  entry.is_extern=false;
201  entry.value=pair.second.second;
202  }
203 
204  // Next, find all occurrences of linker-defined symbols that are _values_
205  // of some symbol in the symbol table, and pointerize them too
206  for(const auto &pair : goto_model.symbol_table.symbols)
207  {
208  std::list<symbol_exprt> to_pointerize;
209  symbols_to_pointerize(linker_values, pair.second.value, to_pointerize);
210 
211  if(to_pointerize.empty())
212  continue;
213  debug() << "Pointerizing the symbol-table value of symbol " << pair.first
214  << eom;
215  int fail = pointerize_subexprs_of(
216  goto_model.symbol_table.get_writeable_ref(pair.first).value,
217  to_pointerize,
218  linker_values,
219  ns);
220  if(to_pointerize.empty() && fail==0)
221  continue;
222  ret=1;
223  for(const auto &sym : to_pointerize)
224  error() << " Could not pointerize '" << sym.get_identifier()
225  << "' in symbol table entry " << pair.first << ". Pretty:\n"
226  << sym.pretty() << "\n";
227  error() << eom;
228  }
229 
230  // Finally, pointerize all occurrences of linker-defined symbols in the
231  // goto program
232  for(auto &gf : goto_model.goto_functions.function_map)
233  {
234  goto_programt &program=gf.second.body;
236  {
237  for(exprt *insts : std::list<exprt *>({&(iit->code), &(iit->guard)}))
238  {
239  std::list<symbol_exprt> to_pointerize;
240  symbols_to_pointerize(linker_values, *insts, to_pointerize);
241  if(to_pointerize.empty())
242  continue;
243  debug() << "Pointerizing a program expression..." << eom;
244  int fail = pointerize_subexprs_of(
245  *insts, to_pointerize, linker_values, ns);
246  if(to_pointerize.empty() && fail==0)
247  continue;
248  ret=1;
249  for(const auto &sym : to_pointerize)
250  {
251  error() << " Could not pointerize '" << sym.get_identifier()
252  << "' in function " << gf.first << ". Pretty:\n"
253  << sym.pretty() << "\n";
254  error().source_location=iit->source_location;
255  }
256  error() << eom;
257  }
258  }
259  }
260  return ret;
261 }
262 
264  exprt &old_expr,
265  const linker_valuest &linker_values,
266  const symbol_exprt &old_symbol,
267  const irep_idt &ident,
268  const std::string &shape)
269 {
270  auto it=linker_values.find(ident);
271  if(it==linker_values.end())
272  {
273  error() << "Could not find a new expression for linker script-defined "
274  << "symbol '" << ident << "'" << eom;
275  return 1;
276  }
277  symbol_exprt new_expr=it->second.first;
278  new_expr.add_source_location()=old_symbol.source_location();
279  debug() << "Pointerizing linker-defined symbol '" << ident << "' of shape <"
280  << shape << ">." << eom;
281  old_expr=new_expr;
282  return 0;
283 }
284 
286  exprt &expr,
287  std::list<symbol_exprt> &to_pointerize,
288  const linker_valuest &linker_values,
289  const namespacet &ns)
290 {
291  int fail=0, tmp=0;
292  for(auto const &pair : linker_values)
293  for(auto const &pattern : replacement_predicates)
294  {
295  if(!pattern.match(expr, ns))
296  continue;
297  // take a copy, expr will be changed below
298  const symbol_exprt inner_symbol=pattern.inner_symbol(expr);
299  if(pair.first!=inner_symbol.get_identifier())
300  continue;
301  tmp=replace_expr(expr, linker_values, inner_symbol, pair.first,
302  pattern.description());
303  fail=tmp?tmp:fail;
304  auto result=std::find(to_pointerize.begin(), to_pointerize.end(),
305  inner_symbol);
306  if(result==to_pointerize.end())
307  {
308  fail=1;
309  error() << "Too many removals of '" << inner_symbol.get_identifier()
310  << "'" << eom;
311  }
312  else
313  to_pointerize.erase(result);
314  // If we get here, we've just pointerized this expression. That expression
315  // will be a symbol possibly wrapped in some unary junk, but won't contain
316  // other symbols as subexpressions that might need to be pointerized. So
317  // it's safe to bail out here.
318  return fail;
319  }
320 
321  for(auto &op : expr.operands())
322  {
323  tmp=pointerize_subexprs_of(op, to_pointerize, linker_values, ns);
324  fail=tmp?tmp:fail;
325  }
326  return fail;
327 }
328 
330  const linker_valuest &linker_values,
331  const exprt &expr,
332  std::list<symbol_exprt> &to_pointerize) const
333 {
334  for(const auto &op : expr.operands())
335  {
336  if(op.id()!=ID_symbol)
337  continue;
338  const symbol_exprt &sym_exp=to_symbol_expr(op);
339  const auto &pair=linker_values.find(sym_exp.get_identifier());
340  if(pair!=linker_values.end())
341  to_pointerize.push_back(sym_exp);
342  }
343  for(const auto &op : expr.operands())
344  symbols_to_pointerize(linker_values, op, to_pointerize);
345 }
346 
347 #if 0
348 The current implementation of this function is less precise than the
349  commented-out version below. To understand the difference between these
350  implementations, consider the following example:
351 
352 Suppose we have a section in the linker script, 100 bytes long, where the
353 address of the symbol sec_start is the start of the section (value 4096) and the
354 address of sec_end is the end of that section (value 4196).
355 
356 The current implementation synthesizes the goto-version of the following C:
357 
358  char __sec_array[100];
359  char *sec_start=(&__sec_array[0]);
360  char *sec_end=((&__sec_array[0])+100);
361  // Yes, it is 100 not 99. We're pointing to the end of the memory occupied
362  // by __sec_array, not the last element of __sec_array.
363 
364 This is imprecise for the following reason: the actual address of the array and
365 the pointers shall be some random CBMC-internal address, instead of being 4096
366 and 4196. The linker script, on the other hand, would have specified the exact
367 position of the section, and we even know what the actual values of sec_start
368 and sec_end are from the object file (these values are in the `addresses` list
369 of the `data` argument to this function). If the correctness of the code depends
370 on these actual values, then CBMCs model of the code is too imprecise to verify
371 this.
372 
373 The commented-out version of this function below synthesizes the following:
374 
375  char *sec_start=4096;
376  char *sec_end=4196;
377  __CPROVER_allocated_memory(4096, 100);
378 
379 This code has both the actual addresses of the start and end of the section and
380 tells CBMC that the intermediate region is valid. However, the allocated_memory
381 macro does not currently allocate an actual object at the address 4096, so
382 symbolic execution can fail. In particular, the 'allocated memory' is part of
383 __CPROVER_memory, which does not have a bounded size; this means that (for
384 example) calls to memcpy or memset fail, because the first and third arguments
385 do not have know n size. The commented-out implementation should be reinstated
386 once this limitation of __CPROVER_allocated_memory has been fixed.
387 
388 In either case, no other changes to the rest of the code (outside this function)
389 should be necessary. The rest of this file converts expressions containing the
390 linker-defined symbol into pointer types if they were not already, and this is
391 the right behaviour for both implementations.
392 #endif
394  jsont &data,
395  const std::string &linker_script,
396  goto_programt &gp,
397  symbol_tablet &symbol_table,
398  linker_valuest &linker_values)
399 #if 1
400 {
401  goto_programt::instructionst initialize_instructions=gp.instructions;
402  std::map<irep_idt, std::size_t> truncated_symbols;
403  for(auto &d : data["regions"].array)
404  {
405  bool has_end=d["has-end-symbol"].is_true();
406 
407  std::ostringstream array_name;
408  array_name << CPROVER_PREFIX << "linkerscript-array_"
409  << d["start-symbol"].value;
410  if(has_end)
411  array_name << "-" << d["end-symbol"].value;
412 
413 
414  // Array symbol_exprt
415  mp_integer array_size = string2integer(d["size"].value);
416  if(array_size > MAX_FLATTENED_ARRAY_SIZE)
417  {
418  warning() << "Object section '" << d["section"].value << "' of size "
419  << array_size << " is too large to model. Truncating to "
420  << MAX_FLATTENED_ARRAY_SIZE << " bytes" << eom;
421  array_size=MAX_FLATTENED_ARRAY_SIZE;
422  if(!has_end)
423  truncated_symbols[d["size-symbol"].value]=MAX_FLATTENED_ARRAY_SIZE;
424  }
425 
426  constant_exprt array_size_expr=from_integer(array_size, size_type());
427  array_typet array_type(char_type(), array_size_expr);
428  symbol_exprt array_expr(array_name.str(), array_type);
429  source_locationt array_loc;
430 
431  array_loc.set_file(linker_script);
432  std::ostringstream array_comment;
433  array_comment << "Object section '" << d["section"].value << "' of size "
434  << array_size << " bytes";
435  array_loc.set_comment(array_comment.str());
436  array_expr.add_source_location()=array_loc;
437 
438  // Array start address
439  index_exprt zero_idx(array_expr, from_integer(0, size_type()));
440  address_of_exprt array_start(zero_idx);
441 
442  // Linker-defined symbol_exprt pointing to start address
443  symbol_exprt start_sym(d["start-symbol"].value, pointer_type(char_type()));
444  linker_values[d["start-symbol"].value]=std::make_pair(start_sym,
445  array_start);
446 
447  // Since the value of the pointer will be a random CBMC address, write a
448  // note about the real address in the object file
449  auto it=std::find_if(data["addresses"].array.begin(),
450  data["addresses"].array.end(),
451  [&d](const jsont &add)
452  { return add["sym"].value==d["start-symbol"].value; });
453  if(it==data["addresses"].array.end())
454  {
455  error() << "Start: Could not find address corresponding to symbol '"
456  << d["start-symbol"].value << "' (start of section)" << eom;
457  return 1;
458  }
459  source_locationt start_loc;
460  start_loc.set_file(linker_script);
461  std::ostringstream start_comment;
462  start_comment << "Pointer to beginning of object section '"
463  << d["section"].value << "'. Original address in object file"
464  << " is " << (*it)["val"].value;
465  start_loc.set_comment(start_comment.str());
466  start_sym.add_source_location()=start_loc;
467 
468  // Instruction for start-address pointer in __CPROVER_initialize
469  code_assignt start_assign(start_sym, array_start);
470  start_assign.add_source_location()=start_loc;
471  goto_programt::instructiont start_assign_i;
472  start_assign_i.make_assignment(start_assign);
473  start_assign_i.source_location=start_loc;
474  initialize_instructions.push_front(start_assign_i);
475 
476  if(has_end) // Same for pointer to end of array
477  {
478  plus_exprt array_end(array_start, array_size_expr);
479 
480  symbol_exprt end_sym(d["end-symbol"].value, pointer_type(char_type()));
481  linker_values[d["end-symbol"].value]=std::make_pair(end_sym, array_end);
482 
483  auto entry = std::find_if(
484  data["addresses"].array.begin(),
485  data["addresses"].array.end(),
486  [&d](const jsont &add) {
487  return add["sym"].value == d["end-symbol"].value;
488  });
489  if(entry == data["addresses"].array.end())
490  {
491  error() << "Could not find address corresponding to symbol '"
492  << d["end-symbol"].value << "' (end of section)" << eom;
493  return 1;
494  }
495  source_locationt end_loc;
496  end_loc.set_file(linker_script);
497  std::ostringstream end_comment;
498  end_comment << "Pointer to end of object section '" << d["section"].value
499  << "'. Original address in object file"
500  << " is " << (*entry)["val"].value;
501  end_loc.set_comment(end_comment.str());
502  end_sym.add_source_location()=end_loc;
503 
504  code_assignt end_assign(end_sym, array_end);
505  end_assign.add_source_location()=end_loc;
506  goto_programt::instructiont end_assign_i;
507  end_assign_i.make_assignment(end_assign);
508  end_assign_i.source_location=end_loc;
509  initialize_instructions.push_front(end_assign_i);
510  }
511 
512  // Add the array to the symbol table. We don't add the pointer(s) to the
513  // symbol table because they were already there, but declared as extern and
514  // probably with a different type. We change the externness and type in
515  // pointerize_linker_defined_symbols.
516  symbolt array_sym;
517  array_sym.name=array_name.str();
518  array_sym.pretty_name=array_name.str();
519  array_sym.is_lvalue=array_sym.is_static_lifetime=true;
520  array_sym.type=array_type;
521  array_sym.location=array_loc;
522  symbol_table.add(array_sym);
523 
524  // Push the array initialization to the front now, so that it happens before
525  // the initialization of the symbols that point to it.
526  namespacet ns(symbol_table);
527  const auto zi = zero_initializer(array_type, array_loc, ns);
528  CHECK_RETURN(zi.has_value());
529  code_assignt array_assign(array_expr, *zi);
530  array_assign.add_source_location()=array_loc;
531  goto_programt::instructiont array_assign_i;
532  array_assign_i.make_assignment(array_assign);
533  array_assign_i.source_location=array_loc;
534  initialize_instructions.push_front(array_assign_i);
535  }
536 
537  // We've added every linker-defined symbol that marks the start or end of a
538  // region. But there might be other linker-defined symbols with some random
539  // address. These will have been declared extern too, so we need to give them
540  // a value also. Here, we give them the actual value that they have in the
541  // object file, since we're not assigning any object to them.
542  for(const auto &d : data["addresses"].array)
543  {
544  auto it=linker_values.find(irep_idt(d["sym"].value));
545  if(it!=linker_values.end())
546  // sym marks the start or end of a region; already dealt with.
547  continue;
548 
549  // Perhaps this is a size symbol for a section whose size we truncated
550  // earlier.
551  irep_idt symbol_value;
552  const auto &pair=truncated_symbols.find(d["sym"].value);
553  if(pair==truncated_symbols.end())
554  symbol_value=d["val"].value;
555  else
556  {
557  debug() << "Truncating the value of symbol " << d["sym"].value << " from "
558  << d["val"].value << " to " << MAX_FLATTENED_ARRAY_SIZE
559  << " because it corresponds to the size of a too-large section."
560  << eom;
562  }
563 
565  loc.set_file(linker_script);
566  loc.set_comment("linker script-defined symbol: char *"+
567  d["sym"].value+"="+"(char *)"+id2string(symbol_value)+"u;");
568 
569  symbol_exprt lhs(d["sym"].value, pointer_type(char_type()));
570 
571  constant_exprt rhs(
573  string2integer(id2string(symbol_value)),
574  unsigned_int_type().get_width()),
576 
577  exprt rhs_tc(rhs);
579 
580  linker_values[irep_idt(d["sym"].value)]=std::make_pair(lhs, rhs_tc);
581 
582  code_assignt assign(lhs, rhs_tc);
583  assign.add_source_location()=loc;
585  assign_i.make_assignment(assign);
586  assign_i.source_location=loc;
587  initialize_instructions.push_front(assign_i);
588  }
589  return 0;
590 }
591 #else
592 {
593  goto_programt::instructionst initialize_instructions=gp.instructions;
594  for(const auto &d : data["regions"].array)
595  {
596  unsigned start=safe_string2unsigned(d["start"].value);
597  unsigned size=safe_string2unsigned(d["size"].value);
598  constant_exprt first=from_integer(start, size_type());
599  constant_exprt second=from_integer(size, size_type());
600  const code_typet void_t({}, empty_typet());
602  symbol_exprt(CPROVER_PREFIX "allocated_memory", void_t), {first, second});
603 
605  loc.set_file(linker_script);
606  loc.set_comment("linker script-defined region:\n"+d["commt"].value+":\n"+
607  d["annot"].value);
608  f.add_source_location()=loc;
609 
611  i.make_function_call(f);
612  initialize_instructions.push_front(i);
613  }
614 
615  if(!symbol_table.has_symbol(CPROVER_PREFIX "allocated_memory"))
616  {
617  symbolt sym;
618  sym.name=CPROVER_PREFIX "allocated_memory";
619  sym.pretty_name=CPROVER_PREFIX "allocated_memory";
620  sym.is_lvalue=sym.is_static_lifetime=true;
621  const code_typet void_t({}, empty_typet());
622  sym.type=void_t;
623  symbol_table.add(sym);
624  }
625 
626  for(const auto &d : data["addresses"].array)
627  {
629  loc.set_file(linker_script);
630  loc.set_comment("linker script-defined symbol: char *"+
631  d["sym"].value+"="+"(char *)"+d["val"].value+"u;");
632 
633  symbol_exprt lhs(d["sym"].value, pointer_type(char_type()));
634 
635  constant_exprt rhs;
637  string2integer(d["val"].value), unsigned_int_type().get_width()));
638  rhs.type()=unsigned_int_type();
639 
640  exprt rhs_tc(rhs);
641  rhs_tc.make_typecast(pointer_type(char_type()));
642 
643  linker_values[irep_idt(d["sym"].value)]=std::make_pair(lhs, rhs_tc);
644 
645  code_assignt assign(lhs, rhs_tc);
646  assign.add_source_location()=loc;
648  assign_i.make_assignment(assign);
649  initialize_instructions.push_front(assign_i);
650  }
651  return 0;
652 }
653 #endif
654 
656  std::list<irep_idt> &linker_defined_symbols,
657  const symbol_tablet &symbol_table,
658  const std::string &out_file,
659  const std::string &def_out_file)
660 {
661  for(auto const &pair : symbol_table.symbols)
662  {
663  if(
664  pair.second.is_extern && pair.second.value.is_nil() &&
665  pair.second.name != CPROVER_PREFIX "memory")
666  {
667  linker_defined_symbols.push_back(pair.second.name);
668  }
669  }
670 
671  std::ostringstream linker_def_str;
672  std::copy(
673  linker_defined_symbols.begin(),
674  linker_defined_symbols.end(),
675  std::ostream_iterator<irep_idt>(linker_def_str, "\n"));
676  debug() << "Linker-defined symbols: [" << linker_def_str.str() << "]\n"
677  << eom;
678 
679  temporary_filet linker_def_infile("goto-cc-linker-defs", "");
680  std::ofstream linker_def_file(linker_def_infile());
681  linker_def_file << linker_def_str.str();
682  linker_def_file.close();
683 
684  std::vector<std::string> argv=
685  {
686  "ls_parse.py",
687  "--script", cmdline.get_value('T'),
688  "--object", out_file,
689  "--sym-file", linker_def_infile(),
690  "--out-file", def_out_file
691  };
692 
693  if(get_message_handler().get_verbosity() >= messaget::M_DEBUG)
694  argv.push_back("--very-verbose");
695  else if(get_message_handler().get_verbosity() > messaget::M_RESULT)
696  argv.push_back("--verbose");
697 
698  debug() << "RUN:";
699  for(std::size_t i=0; i<argv.size(); i++)
700  debug() << " " << argv[i];
701  debug() << eom;
702 
703  int rc = run(argv[0], argv, linker_def_infile(), def_out_file, "");
704  if(rc!=0)
705  warning() << "Problem parsing linker script" << eom;
706 
707  return rc;
708 }
709 
711  const std::list<irep_idt> &linker_defined_symbols,
712  const linker_valuest &linker_values)
713 {
714  int fail=0;
715  for(const auto &sym : linker_defined_symbols)
716  if(linker_values.find(sym)==linker_values.end())
717  {
718  fail=1;
719  error() << "Variable '" << sym << "' was declared extern but never given "
720  << "a value, even in a linker script" << eom;
721  }
722 
723  for(const auto &pair : linker_values)
724  {
725  auto it=std::find(linker_defined_symbols.begin(),
726  linker_defined_symbols.end(), pair.first);
727  if(it==linker_defined_symbols.end())
728  {
729  fail=1;
730  error() << "Linker script-defined symbol '" << pair.first << "' was "
731  << "either defined in the C source code, not declared extern in "
732  << "the C source code, or does not appear in the C source code"
733  << eom;
734  }
735  }
736  return fail;
737 }
738 
740 {
741  return (
742  !(data.is_object() && data.object.find("regions") != data.object.end() &&
743  data.object.find("addresses") != data.object.end() &&
744  data["regions"].is_array() && data["addresses"].is_array() &&
745  std::all_of(
746  data["addresses"].array.begin(),
747  data["addresses"].array.end(),
748  [](const jsont &j) {
749  return j.is_object() && j.object.find("val") != j.object.end() &&
750  j.object.find("sym") != j.object.end() &&
751  j["val"].is_number() && j["sym"].is_string();
752  }) &&
753  std::all_of(
754  data["regions"].array.begin(),
755  data["regions"].array.end(),
756  [](const jsont &j) {
757  return j.is_object() && j.object.find("start") != j.object.end() &&
758  j.object.find("size") != j.object.end() &&
759  j.object.find("annot") != j.object.end() &&
760  j.object.find("commt") != j.object.end() &&
761  j.object.find("start-symbol") != j.object.end() &&
762  j.object.find("has-end-symbol") != j.object.end() &&
763  j.object.find("section") != j.object.end() &&
764  j["start"].is_number() && j["size"].is_number() &&
765  j["annot"].is_string() && j["start-symbol"].is_string() &&
766  j["section"].is_string() && j["commt"].is_string() &&
767  ((j["has-end-symbol"].is_true() &&
768  j.object.find("end-symbol") != j.object.end() &&
769  j["end-symbol"].is_string()) ||
770  (j["has-end-symbol"].is_false() &&
771  j.object.find("size-symbol") != j.object.end() &&
772  j.object.find("end-symbol") == j.object.end() &&
773  j["size-symbol"].is_string()));
774  })));
775 }
messaget
Class that provides messages with a built-in verbosity 'level'.
Definition: message.h:144
Forall_goto_program_instructions
#define Forall_goto_program_instructions(it, program)
Definition: goto_program.h:809
symbol_table_baset::has_symbol
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
Definition: symbol_table_base.h:78
dstringt
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:35
array_name
std::string array_name(const namespacet &ns, const exprt &expr)
Definition: array_name.cpp:20
messaget::M_RESULT
Definition: message.h:160
tempfile.h
symbol_tablet
The symbol table.
Definition: symbol_table.h:19
goto_programt::instructiont::source_location
source_locationt source_location
The location of the instruction in the source file.
Definition: goto_program.h:187
source_locationt::set_comment
void set_comment(const irep_idt &comment)
Definition: source_location.h:137
linker_script_merget::linker_valuest
std::map< irep_idt, std::pair< symbol_exprt, exprt > > linker_valuest
Definition: linker_script_merge.h:82
linker_script_merge.h
Merge linker script-defined symbols into a goto-program.
arith_tools.h
cmdlinet::isset
virtual bool isset(char option) const
Definition: cmdline.cpp:27
linker_script_merget::get_linker_script_data
int get_linker_script_data(std::list< irep_idt > &linker_defined_symbols, const symbol_tablet &symbol_table, const std::string &out_file, const std::string &def_out_file)
Write linker script definitions to linker_data.
Definition: linker_script_merge.cpp:655
__CPROVER_allocated_memory
void __CPROVER_allocated_memory(__CPROVER_size_t address, __CPROVER_size_t extent)
read_goto_binary
bool read_goto_binary(const std::string &filename, goto_modelt &dest, message_handlert &message_handler)
Read a goto binary from a file, but do not update config.
Definition: read_goto_binary.cpp:36
symbolt::type
typet type
Type of symbol.
Definition: symbol.h:31
goto_programt::instructionst
std::list< instructiont > instructionst
Definition: goto_program.h:412
data
Definition: kdev_t.h:24
linker_script_merget::add_linker_script_definitions
int add_linker_script_definitions()
Add values of linkerscript-defined symbols to the goto-binary.
Definition: linker_script_merge.cpp:28
mp_integer
BigInt mp_integer
Definition: mp_arith.h:22
string2integer
const mp_integer string2integer(const std::string &n, unsigned base)
Definition: mp_arith.cpp:57
MAX_FLATTENED_ARRAY_SIZE
const std::size_t MAX_FLATTENED_ARRAY_SIZE
Definition: magic.h:11
irept::find
const irept & find(const irep_namet &name) const
Definition: irep.cpp:284
file
Definition: kdev_t.h:19
plus_exprt
The plus expression Associativity is not specified.
Definition: std_expr.h:1042
linker_script_merget::ls_data2instructions
int ls_data2instructions(jsont &data, const std::string &linker_script, goto_programt &gp, symbol_tablet &symbol_table, linker_valuest &linker_values)
Write a list of definitions derived from data into gp's instructions member.
Definition: linker_script_merge.cpp:393
run
int run(const std::string &what, const std::vector< std::string > &argv)
Definition: run.cpp:47
exprt
Base class for all expressions.
Definition: expr.h:54
linker_script_merget::replace_expr
int replace_expr(exprt &old_expr, const linker_valuest &linker_values, const symbol_exprt &old_symbol, const irep_idt &ident, const std::string &shape)
do the actual replacement of an expr with a new pointer expr
Definition: linker_script_merge.cpp:263
goto_modelt
Definition: goto_model.h:24
exprt::op0
exprt & op0()
Definition: expr.h:84
irep_idt
dstringt irep_idt
Definition: irep.h:32
to_string
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
Definition: string_constraint.cpp:53
messaget::eom
static eomt eom
Definition: message.h:284
goto_programt::instructiont::make_function_call
void make_function_call(const codet &_code)
Definition: goto_program.h:299
run.h
goto_functionst::function_map
function_mapt function_map
Definition: goto_functions.h:27
symbol_exprt
Expression to hold a symbol (variable)
Definition: std_expr.h:143
jsont
Definition: json.h:23
magic.h
Magic numbers used throughout the codebase.
zero_initializer
optionalt< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
Definition: expr_initializer.cpp:322
symbolt::pretty_name
irep_idt pretty_name
Language-specific display name.
Definition: symbol.h:52
linker_script_merget::goto_binary
const std::string & goto_binary
Definition: linker_script_merge.h:94
json_parser.h
is_false
bool is_false(const literalt &l)
Definition: literal.h:196
replacement_predicatet
Patterns of expressions that should be replaced.
Definition: linker_script_merge.h:23
namespacet
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:93
linker_script_merget::linker_data_is_malformed
int linker_data_is_malformed(const jsont &data) const
Validate output of the scripts/ls_parse.py tool.
Definition: linker_script_merge.cpp:739
linker_script_merget::pointerize_subexprs_of
int pointerize_subexprs_of(exprt &expr, std::list< symbol_exprt > &to_pointerize, const linker_valuest &linker_values, const namespacet &ns)
Definition: linker_script_merge.cpp:285
exprt::type
typet & type()
Return the type of the expression.
Definition: expr.h:68
code_function_callt
codet representation of a function call statement.
Definition: std_code.h:1036
cmdlinet
Definition: cmdline.h:19
expr_initializer.h
messaget::result
mstreamt & result() const
Definition: message.h:396
messaget::error
mstreamt & error() const
Definition: message.h:386
empty_typet
The empty type.
Definition: std_types.h:48
integer2bvrep
irep_idt integer2bvrep(const mp_integer &src, std::size_t width)
convert an integer to bit-vector representation with given width This uses two's complement for negat...
Definition: arith_tools.cpp:403
linker_script_merget::linker_script_merget
linker_script_merget(compilet &, const std::string &elf_binary, const std::string &goto_binary, const cmdlinet &, message_handlert &)
Definition: linker_script_merge.cpp:116
symbol_table_baset::get_writeable_ref
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
Definition: symbol_table_base.h:110
id2string
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
messaget::mstreamt::source_location
source_locationt source_location
Definition: message.h:236
symbol_exprt::get_identifier
const irep_idt & get_identifier() const
Definition: std_expr.h:176
source_locationt::set_file
void set_file(const irep_idt &file)
Definition: source_location.h:92
INITIALIZE_FUNCTION
#define INITIALIZE_FUNCTION
Definition: static_lifetime_init.h:23
cmdlinet::get_value
std::string get_value(char option) const
Definition: cmdline.cpp:45
exprt::op1
exprt & op1()
Definition: expr.h:87
linker_script_merget::cmdline
const cmdlinet & cmdline
Definition: linker_script_merge.h:95
linker_script_merget::symbols_to_pointerize
void symbols_to_pointerize(const linker_valuest &linker_values, const exprt &expr, std::list< symbol_exprt > &to_pointerize) const
fill to_pointerize with names of linker symbols appearing in expr
Definition: linker_script_merge.cpp:329
pointer_type
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:243
unsigned_int_type
unsignedbv_typet unsigned_int_type()
Definition: c_types.cpp:44
to_symbol_expr
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:251
code_typet
Base type of functions.
Definition: std_types.h:751
linker_script_merget::elf_binary
const std::string & elf_binary
Definition: linker_script_merge.h:93
irept::id
const irep_idt & id() const
Definition: irep.h:259
message_handlert
Definition: message.h:24
read_goto_binary.h
safe_string2unsigned
unsigned safe_string2unsigned(const std::string &str, int base)
Definition: string2int.cpp:54
linker_script_merget::goto_and_object_mismatch
int goto_and_object_mismatch(const std::list< irep_idt > &linker_defined_symbols, const linker_valuest &linker_values)
one-to-one correspondence between extern & linker symbols
Definition: linker_script_merge.cpp:710
goto_programt::instructiont::make_assignment
void make_assignment()
Definition: goto_program.h:254
char_type
bitvector_typet char_type()
Definition: c_types.cpp:114
source_locationt
Definition: source_location.h:20
symbol_table_baset::add
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
Definition: symbol_table_base.cpp:18
symbol_tablet::get_writeable
virtual symbolt * get_writeable(const irep_idt &name) override
Find a symbol in the symbol table for read-write access.
Definition: symbol_table.h:93
goto_programt::instructions
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:420
symbolt::value
exprt value
Initial value of symbol.
Definition: symbol.h:34
messaget::get_message_handler
message_handlert & get_message_handler()
Definition: message.h:174
compilet::write_object_file
bool write_object_file(const std::string &, const goto_modelt &)
writes the goto functions in the function table to a binary format object file.
Definition: compile.cpp:587
array_typet
Arrays with given size.
Definition: std_types.h:1000
symbolt::is_extern
bool is_extern
Definition: symbol.h:66
goto_modelt::goto_functions
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:32
symbolt::location
source_locationt location
Source code location of definition of symbol.
Definition: symbol.h:37
symbolt
Symbol table entry.
Definition: symbol.h:27
from_integer
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:123
symbol_table_baset::symbols
const symbolst & symbols
Definition: symbol_table_base.h:27
CPROVER_PREFIX
#define CPROVER_PREFIX
Definition: cprover_prefix.h:14
symbolt::is_static_lifetime
bool is_static_lifetime
Definition: symbol.h:65
goto_programt
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:72
loc
#define loc()
Definition: ansi_c_lex.yy.cpp:4256
linker_script_merget::replacement_predicates
std::list< replacement_predicatet > replacement_predicates
The "shapes" of expressions to be replaced by a pointer.
Definition: linker_script_merge.h:104
exprt::operands
operandst & operands()
Definition: expr.h:78
symbolt::is_lvalue
bool is_lvalue
Definition: symbol.h:66
messaget::debug
mstreamt & debug() const
Definition: message.h:416
messaget::M_DEBUG
Definition: message.h:161
linker_script_merget::compiler
compilet & compiler
Definition: linker_script_merge.h:92
index_exprt
Array index operator.
Definition: std_expr.h:1595
static_lifetime_init.h
address_of_exprt
Operator to return the address of an object.
Definition: std_expr.h:3255
exprt::add_source_location
source_locationt & add_source_location()
Definition: expr.h:233
compilet::goto_model
goto_modelt goto_model
Definition: compile.h:30
exprt::make_typecast
void make_typecast(const typet &_type)
Create a typecast_exprt to the given type.
Definition: expr.cpp:74
compilet
Definition: compile.h:25
size_type
unsignedbv_typet size_type()
Definition: c_types.cpp:58
code_assignt
A codet representing an assignment in the program.
Definition: std_code.h:256
constant_exprt
A constant literal expression.
Definition: std_expr.h:4384
messaget::warning
mstreamt & warning() const
Definition: message.h:391
linker_script_merget::pointerize_linker_defined_symbols
int pointerize_linker_defined_symbols(goto_modelt &, const linker_valuest &)
convert the type of linker script-defined symbols to char*
Definition: linker_script_merge.cpp:185
parse_json
bool parse_json(std::istream &in, const std::string &filename, message_handlert &message_handler, jsont &dest)
Definition: json_parser.cpp:16
goto_programt::instructiont
This class represents an instruction in the GOTO intermediate representation.
Definition: goto_program.h:178
constant_exprt::set_value
void set_value(const irep_idt &value)
Definition: std_expr.h:4408
exprt::source_location
const source_locationt & source_location() const
Definition: expr.h:228
goto_modelt::symbol_table
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:29
c_types.h
symbolt::name
irep_idt name
The unique identifier.
Definition: symbol.h:40
temporary_filet
Definition: tempfile.h:23
CHECK_RETURN
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:470