34 std::list<irep_idt> linker_defined_symbols;
36 linker_defined_symbols,
39 linker_def_outfile());
48 error() <<
"Problem parsing linker script JSON data" <<
eom;
55 error() <<
"Malformed linker-script JSON document" <<
eom;
67 error() <<
"Unable to read goto binary for linker script merging" <<
eom;
104 error() <<
"Could not pointerize all linker-defined expressions" <<
eom;
111 error() <<
"Could not write linkerscript-augmented binary" <<
eom;
118 const std::string &elf_binary,
119 const std::string &goto_binary,
122 messaget(message_handler), compiler(compiler),
123 elf_binary(elf_binary), goto_binary(goto_binary),
125 replacement_predicates(
132 return expr.
id()==ID_address_of &&
133 expr.
type().
id()==ID_pointer &&
135 expr.
op0().
id()==ID_index &&
141 expr.
op0().
op1().
id()==ID_constant &&
149 return expr.
id()==ID_address_of &&
150 expr.
type().
id()==ID_pointer &&
152 expr.
op0().
id()==ID_symbol &&
160 return expr.
id()==ID_address_of &&
161 expr.
type().
id()==ID_pointer &&
163 expr.
op0().
id()==ID_symbol &&
164 ns.follow(expr.
op0().
type()).
id()==ID_struct;
171 return expr.
id()==ID_symbol &&
172 expr.
type().
id()==ID_array;
179 return expr.
id()==ID_symbol &&
180 expr.
type().
id()==ID_pointer;
193 for(
const auto &pair : linker_values)
201 entry.
value=pair.second.second;
208 std::list<symbol_exprt> to_pointerize;
211 if(to_pointerize.empty())
213 debug() <<
"Pointerizing the symbol-table value of symbol " << pair.first
220 if(to_pointerize.empty() && fail==0)
223 for(
const auto &sym : to_pointerize)
224 error() <<
" Could not pointerize '" << sym.get_identifier()
225 <<
"' in symbol table entry " << pair.first <<
". Pretty:\n"
226 << sym.pretty() <<
"\n";
237 for(
exprt *insts : std::list<exprt *>({&(iit->code), &(iit->guard)}))
239 std::list<symbol_exprt> to_pointerize;
241 if(to_pointerize.empty())
243 debug() <<
"Pointerizing a program expression..." <<
eom;
245 *insts, to_pointerize, linker_values, ns);
246 if(to_pointerize.empty() && fail==0)
249 for(
const auto &sym : to_pointerize)
251 error() <<
" Could not pointerize '" << sym.get_identifier()
252 <<
"' in function " << gf.first <<
". Pretty:\n"
253 << sym.pretty() <<
"\n";
268 const std::string &shape)
270 auto it=linker_values.find(ident);
271 if(it==linker_values.end())
273 error() <<
"Could not find a new expression for linker script-defined "
274 <<
"symbol '" << ident <<
"'" <<
eom;
279 debug() <<
"Pointerizing linker-defined symbol '" << ident <<
"' of shape <"
280 << shape <<
">." <<
eom;
287 std::list<symbol_exprt> &to_pointerize,
292 for(
auto const &pair : linker_values)
295 if(!pattern.match(expr, ns))
298 const symbol_exprt inner_symbol=pattern.inner_symbol(expr);
301 tmp=
replace_expr(expr, linker_values, inner_symbol, pair.first,
302 pattern.description());
304 auto result=std::find(to_pointerize.begin(), to_pointerize.end(),
306 if(
result==to_pointerize.end())
313 to_pointerize.erase(
result);
332 std::list<symbol_exprt> &to_pointerize)
const
334 for(
const auto &op : expr.
operands())
336 if(op.id()!=ID_symbol)
340 if(pair!=linker_values.end())
341 to_pointerize.push_back(sym_exp);
343 for(
const auto &op : expr.
operands())
348 The current implementation of
this function is less precise than the
349 commented-out version below. To understand the difference between these
350 implementations, consider the following example:
352 Suppose we have a section in the linker script, 100 bytes long, where the
353 address of the symbol sec_start is the start of the section (value 4096) and the
354 address of sec_end is the end of that section (value 4196).
356 The current implementation synthesizes the
goto-version of the following C:
358 char __sec_array[100];
359 char *sec_start=(&__sec_array[0]);
360 char *sec_end=((&__sec_array[0])+100);
364 This is imprecise
for the following reason: the actual address of the array and
365 the pointers shall be some random CBMC-
internal address, instead of being 4096
366 and 4196. The linker script, on the other hand, would have specified the exact
367 position of the section, and we even know what the actual values of sec_start
368 and sec_end are from the
object file (these values are in the `addresses` list
369 of the `
data` argument to
this function). If the correctness of the code depends
370 on these actual values, then CBMCs model of the code is too imprecise to verify
373 The commented-out version of
this function below synthesizes the following:
375 char *sec_start=4096;
379 This code has both the actual addresses of the start and end of the section and
380 tells CBMC that the intermediate region is valid. However, the allocated_memory
381 macro does not currently allocate an actual
object at the address 4096, so
382 symbolic execution can fail. In particular, the
'allocated memory' is part of
383 __CPROVER_memory, which does not have a bounded size;
this means that (
for
384 example) calls to memcpy or memset fail, because the first and third arguments
385 do not have know n size. The commented-out implementation should be reinstated
388 In either
case, no other changes to the rest of the code (outside
this function)
389 should be necessary. The rest of
this file converts expressions containing the
390 linker-defined symbol into pointer types
if they were not already, and
this is
391 the right behaviour
for both implementations.
395 const std::string &linker_script,
402 std::map<irep_idt, std::size_t> truncated_symbols;
403 for(
auto &d :
data[
"regions"].array)
405 bool has_end=d[
"has-end-symbol"].is_true();
409 << d[
"start-symbol"].value;
418 warning() <<
"Object section '" << d[
"section"].value <<
"' of size "
419 << array_size <<
" is too large to model. Truncating to "
432 std::ostringstream array_comment;
433 array_comment <<
"Object section '" << d[
"section"].value <<
"' of size "
434 << array_size <<
" bytes";
435 array_loc.set_comment(array_comment.str());
436 array_expr.add_source_location()=array_loc;
444 linker_values[d[
"start-symbol"].value]=std::make_pair(start_sym,
449 auto it=std::find_if(
data[
"addresses"].array.begin(),
450 data[
"addresses"].array.end(),
451 [&d](
const jsont &add)
452 {
return add[
"sym"].value==d[
"start-symbol"].value; });
453 if(it==
data[
"addresses"].array.end())
455 error() <<
"Start: Could not find address corresponding to symbol '"
456 << d[
"start-symbol"].value <<
"' (start of section)" << eom;
461 std::ostringstream start_comment;
462 start_comment <<
"Pointer to beginning of object section '"
463 << d[
"section"].value <<
"'. Original address in object file"
464 <<
" is " << (*it)[
"val"].value;
474 initialize_instructions.push_front(start_assign_i);
478 plus_exprt array_end(array_start, array_size_expr);
481 linker_values[d[
"end-symbol"].value]=std::make_pair(end_sym, array_end);
483 auto entry = std::find_if(
484 data[
"addresses"].array.begin(),
485 data[
"addresses"].array.end(),
486 [&d](
const jsont &add) {
487 return add[
"sym"].value == d[
"end-symbol"].value;
489 if(entry ==
data[
"addresses"].array.end())
491 error() <<
"Could not find address corresponding to symbol '"
492 << d[
"end-symbol"].value <<
"' (end of section)" << eom;
497 std::ostringstream end_comment;
498 end_comment <<
"Pointer to end of object section '" << d[
"section"].value
499 <<
"'. Original address in object file"
500 <<
" is " << (*entry)[
"val"].value;
509 initialize_instructions.push_front(end_assign_i);
520 array_sym.
type=array_type;
522 symbol_table.
add(array_sym);
530 array_assign.add_source_location()=array_loc;
534 initialize_instructions.push_front(array_assign_i);
542 for(
const auto &d :
data[
"addresses"].array)
545 if(it!=linker_values.end())
552 const auto &pair=truncated_symbols.find(d[
"sym"].value);
553 if(pair==truncated_symbols.end())
554 symbol_value=d[
"val"].value;
557 debug() <<
"Truncating the value of symbol " << d[
"sym"].value <<
" from "
559 <<
" because it corresponds to the size of a too-large section."
565 loc.set_file(linker_script);
566 loc.set_comment(
"linker script-defined symbol: char *"+
567 d[
"sym"].value+
"="+
"(char *)"+
id2string(symbol_value)+
"u;");
580 linker_values[
irep_idt(d[
"sym"].value)]=std::make_pair(lhs, rhs_tc);
587 initialize_instructions.push_front(assign_i);
594 for(
const auto &d :
data[
"regions"].array)
605 loc.set_file(linker_script);
606 loc.set_comment(
"linker script-defined region:\n"+d[
"commt"].value+
":\n"+
608 f.add_source_location()=
loc;
612 initialize_instructions.push_front(i);
623 symbol_table.
add(sym);
626 for(
const auto &d :
data[
"addresses"].array)
629 loc.set_file(linker_script);
630 loc.set_comment(
"linker script-defined symbol: char *"+
631 d[
"sym"].value+
"="+
"(char *)"+d[
"val"].value+
"u;");
643 linker_values[
irep_idt(d[
"sym"].value)]=std::make_pair(lhs, rhs_tc);
646 assign.add_source_location()=
loc;
649 initialize_instructions.push_front(assign_i);
656 std::list<irep_idt> &linker_defined_symbols,
658 const std::string &out_file,
659 const std::string &def_out_file)
661 for(
auto const &pair : symbol_table.
symbols)
664 pair.second.is_extern && pair.second.value.is_nil() &&
667 linker_defined_symbols.push_back(pair.second.name);
671 std::ostringstream linker_def_str;
673 linker_defined_symbols.begin(),
674 linker_defined_symbols.end(),
675 std::ostream_iterator<irep_idt>(linker_def_str,
"\n"));
676 debug() <<
"Linker-defined symbols: [" << linker_def_str.str() <<
"]\n"
680 std::ofstream linker_def_file(linker_def_infile());
681 linker_def_file << linker_def_str.str();
682 linker_def_file.close();
684 std::vector<std::string> argv=
688 "--object", out_file,
689 "--sym-file", linker_def_infile(),
690 "--out-file", def_out_file
694 argv.push_back(
"--very-verbose");
696 argv.push_back(
"--verbose");
699 for(std::size_t i=0; i<argv.size(); i++)
700 debug() <<
" " << argv[i];
703 int rc =
run(argv[0], argv, linker_def_infile(), def_out_file,
"");
705 warning() <<
"Problem parsing linker script" <<
eom;
711 const std::list<irep_idt> &linker_defined_symbols,
715 for(
const auto &sym : linker_defined_symbols)
716 if(linker_values.find(sym)==linker_values.end())
719 error() <<
"Variable '" << sym <<
"' was declared extern but never given "
720 <<
"a value, even in a linker script" <<
eom;
723 for(
const auto &pair : linker_values)
725 auto it=std::find(linker_defined_symbols.begin(),
726 linker_defined_symbols.end(), pair.first);
727 if(it==linker_defined_symbols.end())
730 error() <<
"Linker script-defined symbol '" << pair.first <<
"' was "
731 <<
"either defined in the C source code, not declared extern in "
732 <<
"the C source code, or does not appear in the C source code"
742 !(
data.is_object() &&
data.object.find(
"regions") !=
data.object.end() &&
743 data.object.find(
"addresses") !=
data.object.end() &&
744 data[
"regions"].is_array() &&
data[
"addresses"].is_array() &&
746 data[
"addresses"].array.begin(),
747 data[
"addresses"].array.end(),
749 return j.is_object() && j.object.find(
"val") != j.object.end() &&
750 j.object.find(
"sym") != j.object.end() &&
751 j[
"val"].is_number() && j[
"sym"].is_string();
754 data[
"regions"].array.begin(),
755 data[
"regions"].array.end(),
757 return j.is_object() && j.object.find(
"start") != j.object.end() &&
758 j.object.find(
"size") != j.object.end() &&
759 j.object.find(
"annot") != j.object.end() &&
760 j.object.find(
"commt") != j.object.end() &&
761 j.object.find(
"start-symbol") != j.object.end() &&
762 j.object.find(
"has-end-symbol") != j.object.end() &&
763 j.object.find(
"section") != j.object.end() &&
764 j[
"start"].is_number() && j[
"size"].is_number() &&
765 j[
"annot"].is_string() && j[
"start-symbol"].is_string() &&
766 j[
"section"].is_string() && j[
"commt"].is_string() &&
767 ((j[
"has-end-symbol"].is_true() &&
768 j.object.find(
"end-symbol") != j.object.end() &&
769 j[
"end-symbol"].is_string()) ||
771 j.object.find(
"size-symbol") != j.object.end() &&
772 j.object.find(
"end-symbol") == j.object.end() &&
773 j[
"size-symbol"].is_string()));