cprover
string_constraint_generator_comparison.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Generates string constraints for function comparing strings,
4  such as: equals, equalsIgnoreCase, compareTo, hashCode, intern
5 
6 Author: Romain Brenguier, romain.brenguier@diffblue.com
7 
8 \*******************************************************************/
9 
13 
15 #include <util/deprecate.h>
16 
31 std::pair<exprt, string_constraintst> add_axioms_for_equals(
32  symbol_generatort &fresh_symbol,
34  array_poolt &pool)
35 {
36  PRECONDITION(f.type()==bool_typet() || f.type().id()==ID_c_bool);
37  PRECONDITION(f.arguments().size() == 2);
38 
39  string_constraintst constraints;
42  symbol_exprt eq = fresh_symbol("equal");
43  typecast_exprt tc_eq(eq, f.type());
44 
45  typet index_type=s1.length().type();
46 
47  // Axiom 1.
48  constraints.existential.push_back(
49  implies_exprt(eq, equal_exprt(s1.length(), s2.length())));
50 
51  // Axiom 2.
52  constraints.universal.push_back([&] {
53  const symbol_exprt qvar = fresh_symbol("QA_equal", index_type);
54  return string_constraintt(
55  qvar,
56  zero_if_negative(s1.length()),
57  implies_exprt(eq, equal_exprt(s1[qvar], s2[qvar])));
58  }());
59 
60  // Axiom 3.
61  constraints.existential.push_back([&] {
62  const symbol_exprt witness = fresh_symbol("witness_unequal", index_type);
63  const exprt zero = from_integer(0, index_type);
64  const and_exprt bound_witness(
65  binary_relation_exprt(witness, ID_lt, s1.length()),
66  binary_relation_exprt(witness, ID_ge, zero));
67  const and_exprt witnessing(
68  bound_witness, notequal_exprt(s1[witness], s2[witness]));
69  const and_exprt diff_length(
70  notequal_exprt(s1.length(), s2.length()),
71  equal_exprt(witness, from_integer(-1, index_type)));
72  return implies_exprt(not_exprt(eq), or_exprt(diff_length, witnessing));
73  }());
74 
75  return {tc_eq, std::move(constraints)};
76 }
77 
88  const exprt &char1,
89  const exprt &char2,
90  const exprt &char_a,
91  const exprt &char_A,
92  const exprt &char_Z)
93 {
94  const and_exprt is_upper_case_1(
95  binary_relation_exprt(char_A, ID_le, char1),
96  binary_relation_exprt(char1, ID_le, char_Z));
97  const and_exprt is_upper_case_2(
98  binary_relation_exprt(char_A, ID_le, char2),
99  binary_relation_exprt(char2, ID_le, char_Z));
100 
101  // Three possibilities:
102  // p1 : char1=char2
103  // p2 : (is_up1&&'a'-'A'+char1=char2)
104  // p3 : (is_up2&&'a'-'A'+char2=char1)
105  const equal_exprt p1(char1, char2);
106  const minus_exprt diff(char_a, char_A);
107 
108  // Overflow is not a problem here because is_upper_case conditions
109  // ensure that we are within a safe range.
110  const exprt p2 =
111  and_exprt(is_upper_case_1, equal_exprt(plus_exprt(diff, char1), char2));
112  const exprt p3 =
113  and_exprt(is_upper_case_2, equal_exprt(plus_exprt(diff, char2), char1));
114  return or_exprt(p1, p2, p3);
115 }
116 
132 std::pair<exprt, string_constraintst> add_axioms_for_equals_ignore_case(
133  symbol_generatort &fresh_symbol,
135  array_poolt &pool)
136 {
137  PRECONDITION(f.type()==bool_typet() || f.type().id()==ID_c_bool);
138  PRECONDITION(f.arguments().size() == 2);
139  string_constraintst constraints;
140  const symbol_exprt eq = fresh_symbol("equal_ignore_case");
141  const array_string_exprt s1 = get_string_expr(pool, f.arguments()[0]);
142  const array_string_exprt s2 = get_string_expr(pool, f.arguments()[1]);
143  const typet char_type = s1.content().type().subtype();
144  const exprt char_a = from_integer('a', char_type);
145  const exprt char_A = from_integer('A', char_type);
146  const exprt char_Z = from_integer('Z', char_type);
147  const typet index_type = s1.length().type();
148 
149  const implies_exprt a1(eq, equal_exprt(s1.length(), s2.length()));
150  constraints.existential.push_back(a1);
151 
152  const symbol_exprt qvar = fresh_symbol("QA_equal_ignore_case", index_type);
153  const exprt constr2 =
154  character_equals_ignore_case(s1[qvar], s2[qvar], char_a, char_A, char_Z);
155  const string_constraintt a2(
156  qvar, zero_if_negative(s1.length()), implies_exprt(eq, constr2));
157  constraints.universal.push_back(a2);
158 
159  const symbol_exprt witness =
160  fresh_symbol("witness_unequal_ignore_case", index_type);
161  const exprt zero = from_integer(0, witness.type());
162  const and_exprt bound_witness(
163  binary_relation_exprt(witness, ID_lt, s1.length()),
164  binary_relation_exprt(witness, ID_ge, zero));
165  const exprt witness_eq = character_equals_ignore_case(
166  s1[witness], s2[witness], char_a, char_A, char_Z);
167  const not_exprt witness_diff(witness_eq);
168  const implies_exprt a3(
169  not_exprt(eq),
170  or_exprt(
171  notequal_exprt(s1.length(), s2.length()),
172  and_exprt(bound_witness, witness_diff)));
173  constraints.existential.push_back(a3);
174 
175  return {typecast_exprt(eq, f.type()), std::move(constraints)};
176 }
177 
189 std::pair<exprt, string_constraintst>
191  symbol_generatort &fresh_symbol,
193  array_poolt &pool)
194 {
195  PRECONDITION(f.arguments().size() == 1);
197  const array_string_exprt str = get_string_expr(pool, f.arguments()[0]);
198  const typet &return_type = f.type();
199  const typet &index_type = str.length().type();
200 
201  auto pair=hash_code_of_string.insert(
202  std::make_pair(str, fresh_symbol("hash", return_type)));
203  const exprt hash = pair.first->second;
204 
205  for(auto it : hash_code_of_string)
206  {
207  const symbol_exprt i = fresh_symbol("index_hash", index_type);
208  const equal_exprt c1(it.second, hash);
209  const notequal_exprt c2(it.first.length(), str.length());
210  const and_exprt c3(
211  equal_exprt(it.first.length(), str.length()),
212  and_exprt(
213  notequal_exprt(str[i], it.first[i]),
214  and_exprt(length_gt(str, i), is_positive(i))));
215  constraints.existential.push_back(or_exprt(c1, or_exprt(c2, c3)));
216  }
217  return {hash, std::move(constraints)};
218 }
219 
241 std::pair<exprt, string_constraintst> add_axioms_for_compare_to(
242  symbol_generatort &fresh_symbol,
244  array_poolt &pool)
245 {
246  PRECONDITION(f.arguments().size() == 2);
247  const typet &return_type=f.type();
248  PRECONDITION(return_type.id() == ID_signedbv);
249  string_constraintst constraints;
250  const array_string_exprt &s1 = get_string_expr(pool, f.arguments()[0]);
251  const array_string_exprt &s2 = get_string_expr(pool, f.arguments()[1]);
252  const symbol_exprt res = fresh_symbol("compare_to", return_type);
253  const typet &index_type = s1.length().type();
254 
255  const equal_exprt res_null(res, from_integer(0, return_type));
256  const implies_exprt a1(res_null, equal_exprt(s1.length(), s2.length()));
257  constraints.existential.push_back(a1);
258 
259  const symbol_exprt i = fresh_symbol("QA_compare_to", index_type);
260  const string_constraintt a2(
261  i,
262  zero_if_negative(s1.length()),
263  implies_exprt(res_null, equal_exprt(s1[i], s2[i])));
264  constraints.universal.push_back(a2);
265 
266  const symbol_exprt x = fresh_symbol("index_compare_to", index_type);
267  const equal_exprt ret_char_diff(
268  res,
269  minus_exprt(
270  typecast_exprt(s1[x], return_type), typecast_exprt(s2[x], return_type)));
271  const equal_exprt ret_length_diff(
272  res,
273  minus_exprt(
274  typecast_exprt(s1.length(), return_type),
275  typecast_exprt(s2.length(), return_type)));
276  const or_exprt guard1(
277  and_exprt(length_le(s1, s2.length()), length_gt(s1, x)),
278  and_exprt(length_ge(s1, s2.length()), length_gt(s2, x)));
279  const and_exprt cond1(ret_char_diff, guard1);
280  const or_exprt guard2(
281  and_exprt(length_gt(s2, s1.length()), length_eq(s1, x)),
282  and_exprt(length_gt(s1, s2.length()), length_eq(s2, x)));
283  const and_exprt cond2(ret_length_diff, guard2);
284 
285  const implies_exprt a3(
286  not_exprt(res_null),
287  and_exprt(
288  binary_relation_exprt(x, ID_ge, from_integer(0, return_type)),
289  or_exprt(cond1, cond2)));
290  constraints.existential.push_back(a3);
291 
292  const symbol_exprt i2 = fresh_symbol("QA_compare_to", index_type);
293  const string_constraintt a4(
294  i2,
295  zero_if_negative(x),
296  implies_exprt(not_exprt(res_null), equal_exprt(s1[i2], s2[i2])));
297  constraints.universal.push_back(a4);
298 
299  return {res, std::move(constraints)};
300 }
301 
308 DEPRECATED("never tested")
310 string_constraint_generatort::add_axioms_for_intern(
311  symbol_generatort &fresh_symbol,
313 {
314  PRECONDITION(f.arguments().size() == 1);
315  string_constraintst constraints;
316  const array_string_exprt str = get_string_expr(array_pool, f.arguments()[0]);
317  // For now we only enforce content equality and not pointer equality
318  const typet &return_type=f.type();
319  const typet index_type = str.length().type();
320 
321  auto pair=intern_of_string.insert(
322  std::make_pair(str, fresh_symbol("pool", return_type)));
323  const symbol_exprt intern = pair.first->second;
324 
325  // intern(str)=s_0 || s_1 || ...
326  // for each string s.
327  // intern(str)=intern(s) || |str|!=|s|
328  // || (|str|==|s| &&exists i<|s|. s[i]!=str[i])
329 
330  exprt::operandst disj;
331  for(auto it : intern_of_string)
332  disj.push_back(equal_exprt(intern, it.second));
333  constraints.existential.push_back(disjunction(disj));
334 
335  // WARNING: the specification may be incomplete or incorrect
336  for(auto it : intern_of_string)
337  if(it.second!=str)
338  {
339  symbol_exprt i = fresh_symbol("index_intern", index_type);
340  constraints.existential.push_back(or_exprt(
341  equal_exprt(it.second, intern),
342  or_exprt(
343  notequal_exprt(str.length(), it.first.length()),
344  and_exprt(
345  equal_exprt(str.length(), it.first.length()),
346  and_exprt(
347  notequal_exprt(str[i], it.first[i]),
348  and_exprt(length_gt(str, i), is_positive(i)))))));
349  }
350 
351  return {intern, std::move(constraints)};
352 }
function_application_exprt::arguments
argumentst & arguments()
Definition: std_expr.h:4506
PRECONDITION
#define PRECONDITION(CONDITION)
Definition: invariant.h:438
typet::subtype
const typet & subtype() const
Definition: type.h:38
length_ge
binary_relation_exprt length_ge(const T &lhs, const exprt &rhs)
Definition: string_expr.h:21
length_eq
equal_exprt length_eq(const T &lhs, const exprt &rhs)
Definition: string_expr.h:54
typet
The type of an expression, extends irept.
Definition: type.h:27
array_poolt
Correspondance between arrays and pointers string representations.
Definition: string_constraint_generator.h:49
string_constraintt
Definition: string_constraint.h:58
length_gt
binary_relation_exprt length_gt(const T &lhs, const exprt &rhs)
Definition: string_expr.h:28
string_constraint_generatort::fresh_symbol
symbol_generatort fresh_symbol
Definition: string_constraint_generator.h:133
and_exprt
Boolean AND.
Definition: std_expr.h:2409
s1
int8_t s1
Definition: bytecode_info.h:59
plus_exprt
The plus expression Associativity is not specified.
Definition: std_expr.h:1042
exprt
Base class for all expressions.
Definition: expr.h:54
string_constraintst
Collection of constraints of different types: existential formulas, universal formulas,...
Definition: string_constraint_generator.h:108
bool_typet
The Boolean type.
Definition: std_types.h:28
symbol_exprt
Expression to hold a symbol (variable)
Definition: std_expr.h:143
index_type
bitvector_typet index_type()
Definition: c_types.cpp:16
equal_exprt
Equality.
Definition: std_expr.h:1484
string_constraint_generatort::hash_code_of_string
std::map< array_string_exprt, exprt > hash_code_of_string
Definition: string_constraint_generator.h:173
string_constraint_generatort
Definition: string_constraint_generator.h:120
notequal_exprt
Disequality.
Definition: std_expr.h:1545
get_string_expr
array_string_exprt get_string_expr(array_poolt &pool, const exprt &expr)
casts an expression to a string expression, or fetches the actual string_exprt in the case of a symbo...
Definition: string_constraint_generator_main.cpp:210
exprt::type
typet & type()
Return the type of the expression.
Definition: expr.h:68
symbol_generatort
Generation of fresh symbols of a given type.
Definition: string_constraint_generator.h:32
disjunction
exprt disjunction(const exprt::operandst &op)
1) generates a disjunction for two or more operands 2) for one operand, returns the operand 3) return...
Definition: std_expr.cpp:26
add_axioms_for_equals
std::pair< exprt, string_constraintst > add_axioms_for_equals(symbol_generatort &fresh_symbol, const function_application_exprt &f, array_poolt &pool)
Equality of the content of two strings.
Definition: string_constraint_generator_comparison.cpp:31
or_exprt
Boolean OR.
Definition: std_expr.h:2531
deprecate.h
add_axioms_for_compare_to
std::pair< exprt, string_constraintst > add_axioms_for_compare_to(symbol_generatort &fresh_symbol, const function_application_exprt &f, array_poolt &pool)
Lexicographic comparison of two strings.
Definition: string_constraint_generator_comparison.cpp:241
string_constraint_generatort::constraints
string_constraintst constraints
Definition: string_constraint_generator.h:137
function_application_exprt
Application of (mathematical) function.
Definition: std_expr.h:4481
irept::id
const irep_idt & id() const
Definition: irep.h:259
exprt::operandst
std::vector< exprt > operandst
Definition: expr.h:57
minus_exprt
Binary minus.
Definition: std_expr.h:1106
char_type
bitvector_typet char_type()
Definition: c_types.cpp:114
length_le
binary_relation_exprt length_le(const T &lhs, const exprt &rhs)
Definition: string_expr.h:41
DEPRECATED
#define DEPRECATED(msg)
Definition: deprecate.h:23
string_constraint_generatort::add_axioms_for_hash_code
std::pair< exprt, string_constraintst > add_axioms_for_hash_code(symbol_generatort &fresh_symbol, const function_application_exprt &f, array_poolt &pool)
Value that is identical for strings with the same content.
Definition: string_constraint_generator_comparison.cpp:190
from_integer
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:123
zero_if_negative
exprt zero_if_negative(const exprt &expr)
Returns a non-negative version of the argument.
Definition: string_constraint_generator_main.cpp:591
add_axioms_for_equals_ignore_case
std::pair< exprt, string_constraintst > add_axioms_for_equals_ignore_case(symbol_generatort &fresh_symbol, const function_application_exprt &f, array_poolt &pool)
Equality of the content ignoring case of characters.
Definition: string_constraint_generator_comparison.cpp:132
is_positive
exprt is_positive(const exprt &x)
Definition: string_constraint_generator_main.cpp:524
array_string_exprt::length
exprt & length()
Definition: string_expr.h:70
binary_relation_exprt
A base class for relations, i.e., binary predicates.
Definition: std_expr.h:878
implies_exprt
Boolean implication.
Definition: std_expr.h:2485
s2
int16_t s2
Definition: bytecode_info.h:60
character_equals_ignore_case
static exprt character_equals_ignore_case(const exprt &char1, const exprt &char2, const exprt &char_a, const exprt &char_A, const exprt &char_Z)
Returns an expression which is true when the two given characters are equal when ignoring case for AS...
Definition: string_constraint_generator_comparison.cpp:87
typecast_exprt
Semantic type conversion.
Definition: std_expr.h:2277
string_constraint_generator.h
array_string_exprt
Definition: string_expr.h:67
string_constraintst::existential
std::vector< exprt > existential
Definition: string_constraint_generator.h:110
not_exprt
Boolean negation.
Definition: std_expr.h:3308