GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/regexp_operation.cpp Lines: 723 1000 72.3 %
Date: 2021-05-22 Branches: 1525 4218 36.2 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Tianyi Liang, Andrew Reynolds, Mathias Preiner
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Symbolic Regular Expresion Operations
14
 */
15
16
#include "theory/strings/regexp_operation.h"
17
18
#include <sstream>
19
20
#include "expr/node_algorithm.h"
21
#include "options/strings_options.h"
22
#include "theory/rewriter.h"
23
#include "theory/strings/regexp_entail.h"
24
#include "theory/strings/theory_strings_utils.h"
25
#include "theory/strings/word.h"
26
27
using namespace cvc5::kind;
28
29
namespace cvc5 {
30
namespace theory {
31
namespace strings {
32
33
9464
RegExpOpr::RegExpOpr(SkolemCache* sc)
34
    : d_true(NodeManager::currentNM()->mkConst(true)),
35
      d_false(NodeManager::currentNM()->mkConst(false)),
36
      d_emptyRegexp(NodeManager::currentNM()->mkNode(kind::REGEXP_EMPTY,
37
18928
                                                     std::vector<Node>{})),
38
18928
      d_zero(NodeManager::currentNM()->mkConst(::cvc5::Rational(0))),
39
18928
      d_one(NodeManager::currentNM()->mkConst(::cvc5::Rational(1))),
40
      d_sigma(NodeManager::currentNM()->mkNode(kind::REGEXP_SIGMA,
41
18928
                                               std::vector<Node>{})),
42
      d_sigma_star(
43
          NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, d_sigma)),
44
85176
      d_sc(sc)
45
{
46
9464
  d_emptyString = Word::mkEmptyWord(NodeManager::currentNM()->stringType());
47
48
9464
  d_emptySingleton =
49
18928
      NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, d_emptyString);
50
9464
  d_lastchar = utils::getAlphabetCardinality() - 1;
51
9464
}
52
53
9464
RegExpOpr::~RegExpOpr() {}
54
55
696
bool RegExpOpr::checkConstRegExp( Node r ) {
56
696
  Assert(r.getType().isRegExp());
57
1392
  Trace("strings-regexp-cstre")
58
696
      << "RegExpOpr::checkConstRegExp /" << mkString(r) << "/" << std::endl;
59
696
  RegExpConstType rct = getRegExpConstType(r);
60
696
  return rct != RE_C_VARIABLE;
61
}
62
63
1284
RegExpConstType RegExpOpr::getRegExpConstType(Node r)
64
{
65
1284
  Assert(r.getType().isRegExp());
66
1284
  std::unordered_map<Node, RegExpConstType>::iterator it;
67
2568
  std::vector<TNode> visit;
68
2568
  TNode cur;
69
1284
  visit.push_back(r);
70
1523
  do
71
  {
72
2807
    cur = visit.back();
73
2807
    visit.pop_back();
74
2807
    it = d_constCache.find(cur);
75
76
2807
    Kind ck = cur.getKind();
77
2807
    if (it == d_constCache.end())
78
    {
79
884
      if (ck == STRING_TO_REGEXP)
80
      {
81
554
        Node tmp = Rewriter::rewrite(cur[0]);
82
277
        d_constCache[cur] =
83
277
            tmp.isConst() ? RE_C_CONRETE_CONSTANT : RE_C_VARIABLE;
84
      }
85
607
      else if (ck == REGEXP_SIGMA || ck == REGEXP_RANGE)
86
      {
87
89
        d_constCache[cur] = RE_C_CONSTANT;
88
      }
89
518
      else if (!utils::isRegExpKind(ck))
90
      {
91
        // non-regular expression applications, e.g. function applications
92
        // with regular expression return type are treated as variables.
93
2
        d_constCache[cur] = RE_C_VARIABLE;
94
      }
95
      else
96
      {
97
516
        d_constCache[cur] = RE_C_UNKNOWN;
98
516
        visit.push_back(cur);
99
516
        visit.insert(visit.end(), cur.begin(), cur.end());
100
      }
101
    }
102
1923
    else if (it->second == RE_C_UNKNOWN)
103
    {
104
516
      RegExpConstType ret = ck == REGEXP_COMPLEMENT ? RE_C_CONSTANT : RE_C_CONRETE_CONSTANT;
105
1523
      for (const Node& cn : cur)
106
      {
107
1007
        it = d_constCache.find(cn);
108
1007
        Assert(it != d_constCache.end());
109
1007
        if (it->second > ret)
110
        {
111
257
          ret = it->second;
112
        }
113
      }
114
516
      d_constCache[cur] = ret;
115
    }
116
2807
  } while (!visit.empty());
117
1284
  Assert(d_constCache.find(r) != d_constCache.end());
118
2568
  return d_constCache[r];
119
}
120
121
// 0-unknown, 1-yes, 2-no
122
1311
int RegExpOpr::delta( Node r, Node &exp ) {
123
  std::map<Node, std::pair<int, Node> >::const_iterator itd =
124
1311
      d_delta_cache.find(r);
125
1311
  if (itd != d_delta_cache.end())
126
  {
127
    // already computed
128
803
    exp = itd->second.second;
129
803
    return itd->second.first;
130
  }
131
508
  Trace("regexp-delta") << "RegExpOpr::delta: " << r << std::endl;
132
508
  int ret = 0;
133
508
  NodeManager* nm = NodeManager::currentNM();
134
508
  Kind k = r.getKind();
135
508
  switch (k)
136
  {
137
    case REGEXP_EMPTY:
138
    case REGEXP_SIGMA:
139
    case REGEXP_RANGE:
140
    {
141
      // does not contain empty string
142
      ret = 2;
143
      break;
144
    }
145
178
    case STRING_TO_REGEXP:
146
    {
147
356
      Node tmp = Rewriter::rewrite(r[0]);
148
178
      if (tmp.isConst())
149
      {
150
174
        if (tmp == d_emptyString)
151
        {
152
10
          ret = 1;
153
        } else {
154
164
          ret = 2;
155
        }
156
      }
157
      else
158
      {
159
4
        ret = 0;
160
4
        if (tmp.getKind() == STRING_CONCAT)
161
        {
162
          for (const Node& tmpc : tmp)
163
          {
164
            if (tmpc.isConst())
165
            {
166
              ret = 2;
167
              break;
168
            }
169
          }
170
        }
171
4
        if (ret == 0)
172
        {
173
4
          exp = r[0].eqNode(d_emptyString);
174
        }
175
      }
176
178
      break;
177
    }
178
253
    case REGEXP_CONCAT:
179
    case REGEXP_UNION:
180
    case REGEXP_INTER:
181
    {
182
      // has there been an unknown child?
183
253
      bool hasUnknownChild = false;
184
506
      std::vector<Node> vec;
185
253
      int checkTmp = k == REGEXP_UNION ? 1 : 2;
186
253
      int retTmp = k == REGEXP_UNION ? 2 : 1;
187
333
      for (const Node& rc : r)
188
      {
189
397
        Node exp2;
190
317
        int tmp = delta(rc, exp2);
191
317
        if (tmp == checkTmp)
192
        {
193
          // return is implied by the child's return value
194
237
          ret = checkTmp;
195
237
          break;
196
        }
197
80
        else if (tmp == 0)
198
        {
199
          // unknown if child contains empty string
200
4
          Assert(!exp2.isNull());
201
4
          vec.push_back(exp2);
202
4
          hasUnknownChild = true;
203
        }
204
      }
205
253
      if (ret != checkTmp)
206
      {
207
16
        if (!hasUnknownChild)
208
        {
209
16
          ret = retTmp;
210
        } else {
211
          Kind kr = k == REGEXP_UNION ? OR : AND;
212
          exp = vec.size() == 1 ? vec[0] : nm->mkNode(kr, vec);
213
        }
214
      }
215
253
      break;
216
    }
217
73
    case REGEXP_STAR:
218
    case REGEXP_OPT:
219
    {
220
      // contains empty string
221
73
      ret = 1;
222
73
      break;
223
    }
224
    case REGEXP_PLUS:
225
    {
226
      ret = delta(r[0], exp);
227
      break;
228
    }
229
    case REGEXP_LOOP:
230
    {
231
      uint32_t lo = utils::getLoopMinOccurrences(r);
232
      if (lo == 0)
233
      {
234
        ret = 1;
235
      }
236
      else
237
      {
238
        ret = delta(r[0], exp);
239
      }
240
      break;
241
    }
242
4
    case REGEXP_COMPLEMENT:
243
    {
244
4
      int tmp = delta(r[0], exp);
245
      // flip the result if known
246
4
      ret = tmp == 0 ? 0 : (3 - tmp);
247
4
      exp = exp.isNull() ? exp : exp.negate();
248
4
      break;
249
    }
250
    default:
251
    {
252
      Assert(!utils::isRegExpKind(k));
253
      break;
254
    }
255
  }
256
508
  if (!exp.isNull())
257
  {
258
4
    exp = Rewriter::rewrite(exp);
259
  }
260
1016
  std::pair<int, Node> p(ret, exp);
261
508
  d_delta_cache[r] = p;
262
1016
  Trace("regexp-delta") << "RegExpOpr::delta returns " << ret << " for " << r
263
508
                        << ", expr = " << exp << std::endl;
264
508
  return ret;
265
}
266
267
// 0-unknown, 1-yes, 2-no
268
24
int RegExpOpr::derivativeS(Node r, cvc5::String c, Node& retNode)
269
{
270
24
  Assert(c.size() < 2);
271
24
  Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl;
272
273
24
  int ret = 1;
274
24
  retNode = d_emptyRegexp;
275
24
  NodeManager* nm = NodeManager::currentNM();
276
24
  SkolemManager* sm = nm->getSkolemManager();
277
278
48
  PairNodeStr dv = std::make_pair( r, c );
279
24
  if( d_deriv_cache.find( dv ) != d_deriv_cache.end() ) {
280
4
    retNode = d_deriv_cache[dv].first;
281
4
    ret = d_deriv_cache[dv].second;
282
  }
283
20
  else if (c.empty())
284
  {
285
    Node expNode;
286
    ret = delta( r, expNode );
287
    if(ret == 0) {
288
      retNode = NodeManager::currentNM()->mkNode(kind::ITE, expNode, r, d_emptyRegexp);
289
    } else if(ret == 1) {
290
      retNode = r;
291
    }
292
    std::pair< Node, int > p(retNode, ret);
293
    d_deriv_cache[dv] = p;
294
  } else {
295
20
    switch( r.getKind() ) {
296
      case kind::REGEXP_EMPTY: {
297
        ret = 2;
298
        break;
299
      }
300
      case kind::REGEXP_SIGMA: {
301
        retNode = d_emptySingleton;
302
        break;
303
      }
304
      case kind::REGEXP_RANGE: {
305
        cvc5::String a = r[0].getConst<String>();
306
        cvc5::String b = r[1].getConst<String>();
307
        retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp;
308
        break;
309
      }
310
8
      case kind::STRING_TO_REGEXP: {
311
16
        Node tmp = Rewriter::rewrite(r[0]);
312
8
        if(tmp.isConst()) {
313
8
          if(tmp == d_emptyString) {
314
4
            ret = 2;
315
          } else {
316
4
            if (tmp.getConst<String>().front() == c.front())
317
            {
318
              retNode =
319
                  nm->mkNode(STRING_TO_REGEXP,
320
                             Word::getLength(tmp) == 1 ? d_emptyString
321
                                                       : Word::substr(tmp, 1));
322
            } else {
323
4
              ret = 2;
324
            }
325
          }
326
        } else {
327
          ret = 0;
328
          Node rest;
329
          if(tmp.getKind() == kind::STRING_CONCAT) {
330
            Node t2 = tmp[0];
331
            if(t2.isConst()) {
332
              if (t2.getConst<String>().front() == c.front())
333
              {
334
                Node n = nm->mkNode(STRING_TO_REGEXP,
335
                                    Word::getLength(tmp) == 1
336
                                        ? d_emptyString
337
                                        : Word::substr(tmp, 1));
338
                std::vector< Node > vec_nodes;
339
                vec_nodes.push_back(n);
340
                for(unsigned i=1; i<tmp.getNumChildren(); i++) {
341
                  vec_nodes.push_back(tmp[i]);
342
                }
343
                retNode = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes);
344
                ret = 1;
345
              } else {
346
                ret = 2;
347
              }
348
            } else {
349
              tmp = tmp[0];
350
              std::vector< Node > vec_nodes;
351
              for(unsigned i=1; i<tmp.getNumChildren(); i++) {
352
                vec_nodes.push_back(tmp[i]);
353
              }
354
              rest = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes);
355
            }
356
          }
357
          if(ret == 0) {
358
            Node sk =
359
                sm->mkDummySkolem("rsp", nm->stringType(), "Split RegExp");
360
            retNode = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, sk);
361
            if(!rest.isNull()) {
362
              retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, retNode, rest));
363
            }
364
            Node exp = tmp.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT,
365
                        NodeManager::currentNM()->mkConst(c), sk));
366
            retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, exp, retNode, d_emptyRegexp));
367
          }
368
        }
369
8
        break;
370
      }
371
4
      case kind::REGEXP_CONCAT: {
372
8
        std::vector< Node > vec_nodes;
373
8
        std::vector< Node > delta_nodes;
374
8
        Node dnode = d_true;
375
12
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
376
16
          Node dc;
377
16
          Node exp2;
378
8
          int rt = derivativeS(r[i], c, dc);
379
8
          if(rt != 2) {
380
            if(rt == 0) {
381
              ret = 0;
382
            }
383
            std::vector< Node > vec_nodes2;
384
            if(dc != d_emptySingleton) {
385
              vec_nodes2.push_back( dc );
386
            }
387
            for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
388
              if(r[j] != d_emptySingleton) {
389
                vec_nodes2.push_back( r[j] );
390
              }
391
            }
392
            Node tmp = vec_nodes2.size()==0 ? d_emptySingleton :
393
              vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
394
            if(dnode != d_true) {
395
              tmp = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, dnode, tmp, d_emptyRegexp));
396
              ret = 0;
397
            }
398
            if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
399
              vec_nodes.push_back( tmp );
400
            }
401
          }
402
16
          Node exp3;
403
8
          int rt2 = delta( r[i], exp3 );
404
8
          if( rt2 == 0 ) {
405
            dnode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::AND, dnode, exp3));
406
8
          } else if( rt2 == 2 ) {
407
            break;
408
          }
409
        }
410
4
        retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
411
              ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
412
4
        if(retNode == d_emptyRegexp) {
413
4
          ret = 2;
414
        }
415
4
        break;
416
      }
417
4
      case kind::REGEXP_UNION: {
418
8
        std::vector< Node > vec_nodes;
419
12
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
420
16
          Node dc;
421
8
          int rt = derivativeS(r[i], c, dc);
422
8
          if(rt == 0) {
423
            ret = 0;
424
          }
425
8
          if(rt != 2) {
426
            if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
427
              vec_nodes.push_back( dc );
428
            }
429
          }
430
          //Trace("regexp-derive") << "RegExp-derive OR R[" << i << "] " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
431
        }
432
4
        retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
433
              ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
434
4
        if(retNode == d_emptyRegexp) {
435
4
          ret = 2;
436
        }
437
4
        break;
438
      }
439
      case kind::REGEXP_INTER: {
440
        bool flag = true;
441
        bool flag_sg = false;
442
        std::vector< Node > vec_nodes;
443
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
444
          Node dc;
445
          int rt = derivativeS(r[i], c, dc);
446
          if(rt == 0) {
447
            ret = 0;
448
          } else if(rt == 2) {
449
            flag = false;
450
            break;
451
          }
452
          if(dc == d_sigma_star) {
453
            flag_sg = true;
454
          } else {
455
            if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
456
              vec_nodes.push_back( dc );
457
            }
458
          }
459
        }
460
        if(flag) {
461
          if(vec_nodes.size() == 0 && flag_sg) {
462
            retNode = d_sigma_star;
463
          } else {
464
            retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
465
                  ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
466
            if(retNode == d_emptyRegexp) {
467
              ret = 2;
468
            }
469
          }
470
        } else {
471
          retNode = d_emptyRegexp;
472
          ret = 2;
473
        }
474
        break;
475
      }
476
4
      case kind::REGEXP_STAR: {
477
8
        Node dc;
478
4
        ret = derivativeS(r[0], c, dc);
479
4
        retNode = dc==d_emptyRegexp ? dc : (dc==d_emptySingleton ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r ));
480
4
        break;
481
      }
482
      case kind::REGEXP_LOOP: {
483
        uint32_t l = utils::getLoopMinOccurrences(r);
484
        uint32_t u = utils::getLoopMaxOccurrences(r);
485
        if (l == u && l == 0)
486
        {
487
          ret = 2;
488
          //retNode = d_emptyRegexp;
489
        } else {
490
          Node dc;
491
          ret = derivativeS(r[0], c, dc);
492
          if(dc==d_emptyRegexp) {
493
            Node lop = nm->mkConst(RegExpLoop(l == 0 ? 0 : (l - 1), u - 1));
494
            Node r2 = nm->mkNode(REGEXP_LOOP, lop, r[0]);
495
            retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 );
496
          } else {
497
            retNode = d_emptyRegexp;
498
          }
499
        }
500
        break;
501
      }
502
      case kind::REGEXP_COMPLEMENT:
503
      {
504
        // don't know result
505
        return 0;
506
        break;
507
      }
508
      default: {
509
        Assert(!utils::isRegExpKind(r.getKind()));
510
        return 0;
511
        break;
512
      }
513
    }
514
20
    if(retNode != d_emptyRegexp) {
515
      retNode = Rewriter::rewrite( retNode );
516
    }
517
40
    std::pair< Node, int > p(retNode, ret);
518
20
    d_deriv_cache[dv] = p;
519
  }
520
521
24
  Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl;
522
24
  return ret;
523
}
524
525
970
Node RegExpOpr::derivativeSingle(Node r, cvc5::String c)
526
{
527
970
  Assert(c.size() < 2);
528
970
  Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl;
529
970
  Node retNode = d_emptyRegexp;
530
1940
  PairNodeStr dv = std::make_pair( r, c );
531
970
  NodeManager* nm = NodeManager::currentNM();
532
970
  if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {
533
290
    retNode = d_dv_cache[dv];
534
  }
535
680
  else if (c.empty())
536
  {
537
    Node exp;
538
    int tmp = delta( r, exp );
539
    if(tmp == 0) {
540
      // TODO variable
541
      retNode = d_emptyRegexp;
542
    } else if(tmp == 1) {
543
      retNode = r;
544
    } else {
545
      retNode = d_emptyRegexp;
546
    }
547
  } else {
548
680
    Kind k = r.getKind();
549
680
    switch( k ) {
550
      case kind::REGEXP_EMPTY: {
551
        retNode = d_emptyRegexp;
552
        break;
553
      }
554
      case kind::REGEXP_SIGMA: {
555
        retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
556
        break;
557
      }
558
      case kind::REGEXP_RANGE: {
559
        cvc5::String a = r[0].getConst<String>();
560
        cvc5::String b = r[1].getConst<String>();
561
        retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp;
562
        break;
563
      }
564
252
      case kind::STRING_TO_REGEXP: {
565
252
        if(r[0].isConst()) {
566
252
          if(r[0] == d_emptyString) {
567
8
            retNode = d_emptyRegexp;
568
          } else {
569
244
            if (r[0].getConst<String>().front() == c.front())
570
            {
571
188
              retNode = nm->mkNode(STRING_TO_REGEXP,
572
376
                                   Word::getLength(r[0]) == 1
573
376
                                       ? d_emptyString
574
                                       : Word::substr(r[0], 1));
575
            } else {
576
56
              retNode = d_emptyRegexp;
577
            }
578
          }
579
        } else {
580
          // TODO variable
581
          retNode = d_emptyRegexp;
582
        }
583
252
        break;
584
      }
585
269
      case kind::REGEXP_CONCAT: {
586
538
        Node rees = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
587
538
        std::vector< Node > vec_nodes;
588
357
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
589
435
          Node dc = derivativeSingle(r[i], c);
590
347
          if(dc != d_emptyRegexp) {
591
550
            std::vector< Node > vec_nodes2;
592
275
            if(dc != rees) {
593
196
              vec_nodes2.push_back( dc );
594
            }
595
554
            for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
596
279
              if(r[j] != rees) {
597
279
                vec_nodes2.push_back( r[j] );
598
              }
599
            }
600
275
            Node tmp = vec_nodes2.size()==0 ? rees :
601
550
              vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
602
275
            if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
603
275
              vec_nodes.push_back( tmp );
604
            }
605
          }
606
435
          Node exp;
607
347
          if( delta( r[i], exp ) != 1 ) {
608
259
            break;
609
          }
610
        }
611
771
        retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
612
502
              ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
613
269
        break;
614
      }
615
50
      case kind::REGEXP_UNION: {
616
100
        std::vector< Node > vec_nodes;
617
168
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
618
236
          Node dc = derivativeSingle(r[i], c);
619
118
          if(dc != d_emptyRegexp) {
620
52
            if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
621
50
              vec_nodes.push_back( dc );
622
            }
623
          }
624
          //Trace("regexp-derive") << "RegExp-derive OR R[" << i << "] /" << mkString(r[i]) << "/ returns /" << mkString(dc) << "/" << std::endl;
625
        }
626
150
        retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
627
100
              ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
628
50
        break;
629
      }
630
      case kind::REGEXP_INTER: {
631
        bool flag = true;
632
        bool flag_sg = false;
633
        std::vector< Node > vec_nodes;
634
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
635
          Node dc = derivativeSingle(r[i], c);
636
          if(dc != d_emptyRegexp) {
637
            if(dc == d_sigma_star) {
638
              flag_sg = true;
639
            } else {
640
              if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
641
                vec_nodes.push_back( dc );
642
              }
643
            }
644
          } else {
645
            flag = false;
646
            break;
647
          }
648
        }
649
        if(flag) {
650
          if(vec_nodes.size() == 0 && flag_sg) {
651
            retNode = d_sigma_star;
652
          } else {
653
            retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
654
                  ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
655
          }
656
        } else {
657
          retNode = d_emptyRegexp;
658
        }
659
        break;
660
      }
661
109
      case kind::REGEXP_STAR: {
662
218
        Node dc = derivativeSingle(r[0], c);
663
109
        if(dc != d_emptyRegexp) {
664
87
          retNode = dc==d_emptySingleton? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
665
        } else {
666
22
          retNode = d_emptyRegexp;
667
        }
668
109
        break;
669
      }
670
      case kind::REGEXP_LOOP: {
671
        uint32_t l = utils::getLoopMinOccurrences(r);
672
        uint32_t u = utils::getLoopMaxOccurrences(r);
673
        if (l == u || l == 0)
674
        {
675
          retNode = d_emptyRegexp;
676
        } else {
677
          Node dc = derivativeSingle(r[0], c);
678
          if(dc != d_emptyRegexp) {
679
            Node lop = nm->mkConst(RegExpLoop(l == 0 ? 0 : (l - 1), u - 1));
680
            Node r2 = nm->mkNode(REGEXP_LOOP, lop, r[0]);
681
            retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 );
682
          } else {
683
            retNode = d_emptyRegexp;
684
          }
685
        }
686
        //Trace("regexp-derive") << "RegExp-derive : REGEXP_LOOP returns /" << mkString(retNode) << "/" << std::endl;
687
        break;
688
      }
689
      case kind::REGEXP_COMPLEMENT:
690
      default: {
691
        Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
692
        Unreachable();
693
        break;
694
      }
695
    }
696
680
    if(retNode != d_emptyRegexp) {
697
584
      retNode = Rewriter::rewrite( retNode );
698
    }
699
680
    d_dv_cache[dv] = retNode;
700
  }
701
970
  Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl;
702
1940
  return retNode;
703
}
704
705
746
void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset)
706
{
707
746
  Trace("regexp-fset") << "Start FSET(" << mkString(r) << ")" << std::endl;
708
  std::map<Node, std::pair<std::set<unsigned>, SetNodes> >::const_iterator itr =
709
746
      d_fset_cache.find(r);
710
746
  if(itr != d_fset_cache.end()) {
711
226
    pcset.insert((itr->second).first.begin(), (itr->second).first.end());
712
226
    pvset.insert((itr->second).second.begin(), (itr->second).second.end());
713
  } else {
714
    // cset is code points
715
1040
    std::set<unsigned> cset;
716
1040
    SetNodes vset;
717
520
    Kind k = r.getKind();
718
520
    switch( k ) {
719
      case kind::REGEXP_EMPTY: {
720
        break;
721
      }
722
      case kind::REGEXP_RANGE: {
723
        unsigned a = r[0].getConst<String>().front();
724
        unsigned b = r[1].getConst<String>().front();
725
        Assert(a < b);
726
        Assert(b < std::numeric_limits<unsigned>::max());
727
        for (unsigned c = a; c <= b; c++)
728
        {
729
          cset.insert(c);
730
        }
731
        break;
732
      }
733
202
      case kind::STRING_TO_REGEXP: {
734
404
        Node st = Rewriter::rewrite(r[0]);
735
202
        if(st.isConst()) {
736
404
          String s = st.getConst<String>();
737
202
          if(s.size() != 0) {
738
196
            unsigned sc = s.front();
739
196
            cset.insert(sc);
740
          }
741
        }
742
        else if (st.getKind() == kind::STRING_CONCAT)
743
        {
744
          if(st[0].isConst()) {
745
            String s = st[0].getConst<String>();
746
            unsigned sc = s.front();
747
            cset.insert(sc);
748
          } else {
749
            vset.insert( st[0] );
750
          }
751
        }
752
        else
753
        {
754
          vset.insert(st);
755
        }
756
202
        break;
757
      }
758
221
      case kind::REGEXP_CONCAT: {
759
267
        for(unsigned i=0; i<r.getNumChildren(); i++) {
760
261
          firstChars(r[i], cset, vset);
761
307
          Node n = r[i];
762
307
          Node exp;
763
261
          if (delta(n, exp) != 1)
764
          {
765
215
            break;
766
          }
767
        }
768
221
        break;
769
      }
770
28
      case kind::REGEXP_UNION: {
771
92
        for(unsigned i=0; i<r.getNumChildren(); i++) {
772
64
          firstChars(r[i], cset, vset);
773
        }
774
28
        break;
775
      }
776
      case kind::REGEXP_INTER: {
777
        //TODO: Overapproximation for now
778
        //for(unsigned i=0; i<r.getNumChildren(); i++) {
779
        // firstChars(r[i], cset, vset);
780
        //}
781
        firstChars(r[0], cset, vset);
782
        break;
783
      }
784
69
      case kind::REGEXP_STAR: {
785
69
        firstChars(r[0], cset, vset);
786
69
        break;
787
      }
788
      case kind::REGEXP_LOOP: {
789
        firstChars(r[0], cset, vset);
790
        break;
791
      }
792
      case kind::REGEXP_SIGMA:
793
      case kind::REGEXP_COMPLEMENT:
794
      default: {
795
        // we do not expect to call this function on regular expressions that
796
        // aren't a standard regular expression kind. However, if we do, then
797
        // the following code is conservative and says that the current
798
        // regular expression can begin with any character.
799
        Assert(utils::isRegExpKind(k));
800
        // can start with any character
801
        Assert(d_lastchar < std::numeric_limits<unsigned>::max());
802
        for (unsigned i = 0; i <= d_lastchar; i++)
803
        {
804
          cset.insert(i);
805
        }
806
        break;
807
      }
808
    }
809
520
    pcset.insert(cset.begin(), cset.end());
810
520
    pvset.insert(vset.begin(), vset.end());
811
1040
    std::pair<std::set<unsigned>, SetNodes> p(cset, vset);
812
520
    d_fset_cache[r] = p;
813
  }
814
815
746
  if(Trace.isOn("regexp-fset")) {
816
    Trace("regexp-fset") << "END FSET(" << mkString(r) << ") = {";
817
    for (std::set<unsigned>::const_iterator it = pcset.begin();
818
         it != pcset.end();
819
         ++it)
820
    {
821
      if (it != pcset.begin())
822
      {
823
        Trace("regexp-fset") << ",";
824
      }
825
      Trace("regexp-fset") << (*it);
826
      }
827
    Trace("regexp-fset") << "}" << std::endl;
828
  }
829
746
}
830
831
374
Node RegExpOpr::simplify(Node t, bool polarity)
832
{
833
748
  Trace("strings-regexp-simpl")
834
374
      << "RegExpOpr::simplify: " << t << ", polarity=" << polarity << std::endl;
835
374
  Assert(t.getKind() == kind::STRING_IN_REGEXP);
836
748
  Node tlit = polarity ? t : t.notNode();
837
748
  Node conc;
838
374
  std::map<Node, Node>::const_iterator itr = d_simpCache.find(tlit);
839
374
  if (itr != d_simpCache.end())
840
  {
841
4
    return itr->second;
842
  }
843
370
  if (polarity)
844
  {
845
558
    std::vector<Node> newSkolems;
846
279
    conc = reduceRegExpPos(tlit, d_sc, newSkolems);
847
  }
848
  else
849
  {
850
    // see if we can use an optimized version of the reduction for re.++.
851
182
    Node r = t[1];
852
91
    if (r.getKind() == REGEXP_CONCAT)
853
    {
854
      // the index we are removing from the RE concatenation
855
70
      size_t index = 0;
856
      // As an optimization to the reduction, if we can determine that
857
      // all strings in the language of R1 have the same length, say n,
858
      // then the conclusion of the reduction is quantifier-free:
859
      //    ~( substr(s,0,n) in R1 ) OR ~( substr(s,n,len(s)-n) in R2)
860
140
      Node reLen = getRegExpConcatFixed(r, index);
861
70
      if (!reLen.isNull())
862
      {
863
52
        conc = reduceRegExpNegConcatFixed(tlit, reLen, index);
864
      }
865
    }
866
91
    if (conc.isNull())
867
    {
868
39
      conc = reduceRegExpNeg(tlit);
869
    }
870
  }
871
370
  d_simpCache[tlit] = conc;
872
740
  Trace("strings-regexp-simpl")
873
370
      << "RegExpOpr::simplify: returns " << conc << std::endl;
874
370
  return conc;
875
}
876
877
84
Node RegExpOpr::getRegExpConcatFixed(Node r, size_t& index)
878
{
879
84
  Assert(r.getKind() == REGEXP_CONCAT);
880
84
  index = 0;
881
168
  Node reLen = RegExpEntail::getFixedLengthForRegexp(r[0]);
882
84
  if (!reLen.isNull())
883
  {
884
56
    return reLen;
885
  }
886
  // try from the opposite end
887
28
  size_t indexE = r.getNumChildren() - 1;
888
28
  reLen = RegExpEntail::getFixedLengthForRegexp(r[indexE]);
889
28
  if (!reLen.isNull())
890
  {
891
8
    index = indexE;
892
8
    return reLen;
893
  }
894
20
  return Node::null();
895
}
896
897
45
Node RegExpOpr::reduceRegExpNeg(Node mem)
898
{
899
45
  Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP);
900
90
  Node s = mem[0][0];
901
90
  Node r = mem[0][1];
902
45
  NodeManager* nm = NodeManager::currentNM();
903
45
  Kind k = r.getKind();
904
90
  Node zero = nm->mkConst(Rational(0));
905
45
  Node conc;
906
45
  if (k == REGEXP_CONCAT)
907
  {
908
    // do not use length entailment, call regular expression concat
909
44
    Node reLen;
910
22
    size_t i = 0;
911
22
    conc = reduceRegExpNegConcatFixed(mem, reLen, i);
912
  }
913
23
  else if (k == REGEXP_STAR)
914
  {
915
44
    Node emp = Word::mkEmptyWord(s.getType());
916
44
    Node lens = nm->mkNode(STRING_LENGTH, s);
917
44
    Node sne = s.eqNode(emp).negate();
918
44
    Node b1 = nm->mkBoundVar(nm->integerType());
919
44
    Node b1v = nm->mkNode(BOUND_VAR_LIST, b1);
920
    Node g1 =
921
44
        nm->mkNode(AND, nm->mkNode(GT, b1, zero), nm->mkNode(GEQ, lens, b1));
922
    // internal
923
44
    Node s1 = nm->mkNode(STRING_SUBSTR, s, zero, b1);
924
44
    Node s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1));
925
44
    Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[0]).negate();
926
44
    Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r).negate();
927
928
22
    conc = nm->mkNode(OR, s1r1, s2r2);
929
22
    conc = nm->mkNode(IMPLIES, g1, conc);
930
22
    conc = nm->mkNode(FORALL, b1v, conc);
931
22
    conc = nm->mkNode(AND, sne, conc);
932
  }
933
  else
934
  {
935
1
    Assert(!utils::isRegExpKind(k));
936
  }
937
90
  return conc;
938
}
939
940
82
Node RegExpOpr::reduceRegExpNegConcatFixed(Node mem, Node reLen, size_t index)
941
{
942
82
  Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP);
943
164
  Node s = mem[0][0];
944
164
  Node r = mem[0][1];
945
82
  NodeManager* nm = NodeManager::currentNM();
946
82
  Assert(r.getKind() == REGEXP_CONCAT);
947
164
  Node zero = nm->mkConst(Rational(0));
948
  // The following simplification states that
949
  //    ~( s in R1 ++ R2 ++... ++ Rn )
950
  // is equivalent to
951
  //    forall x.
952
  //      0 <= x <= len(s) =>
953
  //        ~(substr(s,0,x) in R1) OR ~(substr(s,x,len(s)-x) in R2 ++ ... ++ Rn)
954
  // Index is the child index of r that we are stripping off, which is either
955
  // from the beginning or the end.
956
82
  Assert(index == 0 || index == r.getNumChildren() - 1);
957
164
  Node lens = nm->mkNode(STRING_LENGTH, s);
958
164
  Node b1;
959
164
  Node b1v;
960
164
  Node guard;
961
82
  if (reLen.isNull())
962
  {
963
22
    b1 = SkolemCache::mkIndexVar(mem);
964
22
    b1v = nm->mkNode(BOUND_VAR_LIST, b1);
965
66
    guard = nm->mkNode(AND,
966
44
                       nm->mkNode(GEQ, b1, zero),
967
44
                       nm->mkNode(GEQ, nm->mkNode(STRING_LENGTH, s), b1));
968
  }
969
  else
970
  {
971
60
    b1 = reLen;
972
  }
973
164
  Node s1;
974
164
  Node s2;
975
82
  if (index == 0)
976
  {
977
74
    s1 = nm->mkNode(STRING_SUBSTR, s, zero, b1);
978
74
    s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1));
979
  }
980
  else
981
  {
982
8
    s1 = nm->mkNode(STRING_SUBSTR, s, nm->mkNode(MINUS, lens, b1), b1);
983
8
    s2 = nm->mkNode(STRING_SUBSTR, s, zero, nm->mkNode(MINUS, lens, b1));
984
  }
985
164
  Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[index]).negate();
986
164
  std::vector<Node> nvec;
987
348
  for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; i++)
988
  {
989
266
    if (i != index)
990
    {
991
184
      nvec.push_back(r[i]);
992
    }
993
  }
994
164
  Node r2 = nvec.size() == 1 ? nvec[0] : nm->mkNode(REGEXP_CONCAT, nvec);
995
82
  r2 = Rewriter::rewrite(r2);
996
164
  Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r2).negate();
997
82
  Node conc = nm->mkNode(OR, s1r1, s2r2);
998
82
  if (!b1v.isNull())
999
  {
1000
22
    conc = nm->mkNode(OR, guard.negate(), conc);
1001
22
    conc = nm->mkNode(FORALL, b1v, conc);
1002
  }
1003
164
  return conc;
1004
}
1005
1006
493
Node RegExpOpr::reduceRegExpPos(Node mem,
1007
                                SkolemCache* sc,
1008
                                std::vector<Node>& newSkolems)
1009
{
1010
493
  Assert(mem.getKind() == STRING_IN_REGEXP);
1011
986
  Node s = mem[0];
1012
986
  Node r = mem[1];
1013
493
  NodeManager* nm = NodeManager::currentNM();
1014
493
  Kind k = r.getKind();
1015
493
  Node conc;
1016
493
  if (k == REGEXP_CONCAT)
1017
  {
1018
714
    std::vector<Node> nvec;
1019
714
    std::vector<Node> cc;
1020
    // get the (valid) existential for this membership
1021
714
    Node eform = getExistsForRegExpConcatMem(mem);
1022
357
    SkolemManager* sm = nm->getSkolemManager();
1023
    // Notice that this rule does not introduce witness terms, instead it
1024
    // uses skolems in the conclusion of the proof rule directly. Thus,
1025
    // the existential eform does not need to be explicitly justified by a
1026
    // proof here, since it is only being used as an intermediate formula in
1027
    // this inference. Hence we do not pass a proof generator to mkSkolemize.
1028
357
    sm->mkSkolemize(eform, newSkolems, "rc", "regexp concat skolem");
1029
357
    Assert(newSkolems.size() == r.getNumChildren());
1030
    // Look up skolems for each of the components. If sc has optimizations
1031
    // enabled, this will return arguments of str.to_re.
1032
1383
    for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; ++i)
1033
    {
1034
1026
      if (r[i].getKind() == STRING_TO_REGEXP)
1035
      {
1036
        // optimization, just take the body
1037
366
        newSkolems[i] = r[i][0];
1038
      }
1039
      else
1040
      {
1041
660
        nvec.push_back(nm->mkNode(STRING_IN_REGEXP, newSkolems[i], r[i]));
1042
      }
1043
    }
1044
    // (str.in_re x (re.++ R1 .... Rn)) =>
1045
    // (and (str.in_re k1 R1) ... (str.in_re kn Rn) (= x (str.++ k1 ... kn)))
1046
714
    Node lem = s.eqNode(nm->mkNode(STRING_CONCAT, newSkolems));
1047
357
    nvec.push_back(lem);
1048
357
    conc = nvec.size() == 1 ? nvec[0] : nm->mkNode(AND, nvec);
1049
  }
1050
136
  else if (k == REGEXP_STAR)
1051
  {
1052
268
    Node emp = Word::mkEmptyWord(s.getType());
1053
268
    Node se = s.eqNode(emp);
1054
268
    Node sinr = nm->mkNode(STRING_IN_REGEXP, s, r[0]);
1055
268
    Node reExpand = nm->mkNode(REGEXP_CONCAT, r[0], r, r[0]);
1056
268
    Node sinRExp = nm->mkNode(STRING_IN_REGEXP, s, reExpand);
1057
    // We unfold `x in R*` by considering three cases: `x` is empty, `x`
1058
    // is matched by `R`, or `x` is matched by two or more `R`s. For the
1059
    // last case, `x` will break into three pieces, making the beginning
1060
    // and the end each match `R` and the middle match `R*`. Matching the
1061
    // beginning and the end with `R` allows us to reason about the
1062
    // beginning and the end of `x` simultaneously.
1063
    //
1064
    // x in R* ---> (x = "") v (x in R) v (x in (re.++ R (re.* R) R))
1065
1066
    // We also immediately unfold the last disjunct for re.*. The advantage
1067
    // of doing this is that we use the same scheme for skolems above.
1068
268
    std::vector<Node> newSkolemsC;
1069
134
    sinRExp = reduceRegExpPos(sinRExp, sc, newSkolemsC);
1070
134
    Assert(newSkolemsC.size() == 3);
1071
    // make the return lemma
1072
    // can also assume the component match the first and last R are non-empty.
1073
    // This means that the overall conclusion is:
1074
    //   (x = "") v (x in R) v (x = (str.++ k1 k2 k3) ^
1075
    //                          k1 in R ^ k2 in (re.* R) ^ k3 in R ^
1076
    //                          k1 != ""  ^ k3 != "")
1077
134
    conc = nm->mkNode(OR,
1078
                      se,
1079
                      sinr,
1080
536
                      nm->mkNode(AND,
1081
                                 sinRExp,
1082
268
                                 newSkolemsC[0].eqNode(emp).negate(),
1083
268
                                 newSkolemsC[2].eqNode(emp).negate()));
1084
  }
1085
  else
1086
  {
1087
2
    Assert(!utils::isRegExpKind(k));
1088
  }
1089
986
  return conc;
1090
}
1091
1092
bool RegExpOpr::isPairNodesInSet(std::set< PairNodes > &s, Node n1, Node n2) {
1093
  for(std::set< PairNodes >::const_iterator itr = s.begin();
1094
      itr != s.end(); ++itr) {
1095
    if((itr->first == n1 && itr->second == n2) ||
1096
       (itr->first == n2 && itr->second == n1)) {
1097
      return true;
1098
    }
1099
  }
1100
  return false;
1101
}
1102
1103
346
bool RegExpOpr::containC2(unsigned cnt, Node n) {
1104
346
  if(n.getKind() == kind::REGEXP_RV) {
1105
132
    Assert(n[0].getConst<Rational>() <= Rational(String::maxSize()))
1106
        << "Exceeded UINT32_MAX in RegExpOpr::containC2";
1107
132
    unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt();
1108
132
    return cnt == y;
1109
214
  } else if(n.getKind() == kind::REGEXP_CONCAT) {
1110
16
    for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1111
12
      if(containC2(cnt, n[i])) {
1112
2
        return true;
1113
      }
1114
    }
1115
208
  } else if(n.getKind() == kind::REGEXP_STAR) {
1116
16
    return containC2(cnt, n[0]);
1117
192
  } else if(n.getKind() == kind::REGEXP_LOOP) {
1118
    return containC2(cnt, n[0]);
1119
192
  } else if(n.getKind() == kind::REGEXP_UNION) {
1120
18
    for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1121
14
      if(containC2(cnt, n[i])) {
1122
2
        return true;
1123
      }
1124
    }
1125
  }
1126
194
  return false;
1127
}
1128
176
Node RegExpOpr::convert1(unsigned cnt, Node n) {
1129
176
  Trace("regexp-debug") << "Converting " << n << " at " << cnt << "... " << std::endl;
1130
352
  Node r1, r2;
1131
176
  convert2(cnt, n, r1, r2);
1132
176
  Trace("regexp-debug") << "... getting r1=" << r1 << ", and r2=" << r2 << std::endl;
1133
176
  Node ret = r1==d_emptySingleton ? r2 : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
1134
176
     NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, r1), r2);
1135
176
  ret = Rewriter::rewrite( ret );
1136
176
  Trace("regexp-debug") << "... done convert at " << cnt << ", with return " << ret << std::endl;
1137
352
  return ret;
1138
}
1139
285
void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) {
1140
285
  if(n == d_emptyRegexp) {
1141
24
    r1 = d_emptyRegexp;
1142
24
    r2 = d_emptyRegexp;
1143
24
    return;
1144
261
  } else if(n == d_emptySingleton) {
1145
21
    r1 = d_emptySingleton;
1146
21
    r2 = d_emptySingleton;
1147
  }
1148
261
  Kind nk = n.getKind();
1149
261
  if (nk == REGEXP_RV)
1150
  {
1151
33
    Assert(n[0].getConst<Rational>() <= Rational(String::maxSize()))
1152
        << "Exceeded UINT32_MAX in RegExpOpr::convert2";
1153
33
    unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt();
1154
33
    r1 = d_emptySingleton;
1155
33
    if(cnt == y) {
1156
33
      r2 = d_emptyRegexp;
1157
    } else {
1158
      r2 = n;
1159
    }
1160
  }
1161
228
  else if (nk == REGEXP_CONCAT)
1162
  {
1163
148
    bool flag = true;
1164
296
    std::vector<Node> vr1, vr2;
1165
417
    for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1166
304
      if(containC2(cnt, n[i])) {
1167
70
        Node t1, t2;
1168
35
        convert2(cnt, n[i], t1, t2);
1169
35
        vr1.push_back(t1);
1170
35
        r1 = vr1.size()==0 ? d_emptyRegexp : vr1.size()==1 ? vr1[0] :
1171
             NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr1);
1172
35
        vr2.push_back(t2);
1173
35
        for( unsigned j=i+1; j<n.getNumChildren(); j++ ) {
1174
          vr2.push_back(n[j]);
1175
        }
1176
35
        r2 = vr2.size()==0 ? d_emptyRegexp : vr2.size()==1 ? vr2[0] :
1177
             NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr2);
1178
35
        flag = false;
1179
35
        break;
1180
      } else {
1181
269
        vr1.push_back(n[i]);
1182
      }
1183
    }
1184
148
    if(flag) {
1185
113
      r1 = d_emptySingleton;
1186
113
      r2 = n;
1187
    }
1188
  }
1189
80
  else if (nk == REGEXP_UNION)
1190
  {
1191
66
    std::vector<Node> vr1, vr2;
1192
107
    for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1193
148
      Node t1, t2;
1194
74
      convert2(cnt, n[i], t1, t2);
1195
74
      vr1.push_back(t1);
1196
74
      vr2.push_back(t2);
1197
    }
1198
33
    r1 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr1);
1199
33
    r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr2);
1200
  }
1201
47
  else if (nk == STRING_TO_REGEXP || nk == REGEXP_SIGMA || nk == REGEXP_RANGE
1202
           || nk == REGEXP_COMPLEMENT || nk == REGEXP_LOOP)
1203
  {
1204
    // this leaves n unchanged
1205
47
    r1 = d_emptySingleton;
1206
47
    r2 = n;
1207
  }
1208
  else
1209
  {
1210
    //is it possible?
1211
    Unreachable();
1212
  }
1213
}
1214
1215
251
Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > cache, unsigned cnt ) {
1216
  //Assert(checkConstRegExp(r1) && checkConstRegExp(r2));
1217
251
  if(r1 > r2) {
1218
52
    TNode tmpNode = r1;
1219
26
    r1 = r2;
1220
26
    r2 = tmpNode;
1221
  }
1222
251
  Trace("regexp-int") << "Starting INTERSECT(" << cnt << "):\n  "<< mkString(r1) << ",\n  " << mkString(r2) << std::endl;
1223
502
  std::pair < Node, Node > p(r1, r2);
1224
251
  std::map < PairNodes, Node >::const_iterator itr = d_inter_cache.find(p);
1225
251
  Node rNode;
1226
251
  if(itr != d_inter_cache.end()) {
1227
26
    rNode = itr->second;
1228
  } else {
1229
225
    Trace("regexp-int-debug") << " ... not in cache" << std::endl;
1230
225
    if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) {
1231
      Trace("regexp-int-debug") << " ... one is empty set" << std::endl;
1232
      rNode = d_emptyRegexp;
1233
225
    } else if(r1 == d_emptySingleton || r2 == d_emptySingleton) {
1234
18
      Trace("regexp-int-debug") << " ... one is empty singleton" << std::endl;
1235
36
      Node exp;
1236
18
      int r = delta((r1 == d_emptySingleton ? r2 : r1), exp);
1237
18
      if(r == 0) {
1238
        //TODO: variable
1239
        Unreachable();
1240
18
      } else if(r == 1) {
1241
14
        rNode = d_emptySingleton;
1242
      } else {
1243
4
        rNode = d_emptyRegexp;
1244
      }
1245
207
    } else if(r1 == r2) {
1246
2
      Trace("regexp-int-debug") << " ... equal" << std::endl;
1247
2
      rNode = r1; //convert1(cnt, r1);
1248
    } else {
1249
205
      Trace("regexp-int-debug") << " ... normal checking" << std::endl;
1250
205
      std::map< PairNodes, Node >::const_iterator itrcache = cache.find(p);
1251
205
      if(itrcache != cache.end()) {
1252
29
        rNode = itrcache->second;
1253
      } else {
1254
176
        Trace("regexp-int-debug") << " ... normal without cache" << std::endl;
1255
352
        std::vector<unsigned> cset;
1256
352
        std::set<unsigned> cset1, cset2;
1257
352
        std::set< Node > vset1, vset2;
1258
176
        firstChars(r1, cset1, vset1);
1259
176
        firstChars(r2, cset2, vset2);
1260
176
        Trace("regexp-int-debug") << " ... got fset" << std::endl;
1261
176
        std::set_intersection(cset1.begin(), cset1.end(), cset2.begin(), cset2.end(),
1262
176
             std::inserter(cset, cset.begin()));
1263
352
        std::vector< Node > vec_nodes;
1264
352
        Node delta_exp;
1265
176
        Trace("regexp-int-debug") << " ... try delta" << std::endl;
1266
176
        int flag = delta(r1, delta_exp);
1267
176
        int flag2 = delta(r2, delta_exp);
1268
176
        Trace("regexp-int-debug") << " ... delta1=" << flag << ", delta2=" << flag2 << std::endl;
1269
176
        if(flag != 2 && flag2 != 2) {
1270
19
          if(flag == 1 && flag2 == 1) {
1271
19
            vec_nodes.push_back(d_emptySingleton);
1272
          } else {
1273
            //TODO: variable
1274
            Unreachable();
1275
          }
1276
        }
1277
176
        if(Trace.isOn("regexp-int-debug")) {
1278
          Trace("regexp-int-debug") << "Try CSET(" << cset.size() << ") = {";
1279
          for (std::vector<unsigned>::const_iterator it = cset.begin();
1280
               it != cset.end();
1281
               ++it)
1282
          {
1283
            if (it != cset.begin())
1284
            {
1285
              Trace("regexp-int-debug") << ", ";
1286
            }
1287
            Trace("regexp-int-debug") << (*it);
1288
          }
1289
          Trace("regexp-int-debug") << std::endl;
1290
        }
1291
352
        std::map< PairNodes, Node > cacheX;
1292
374
        for (std::vector<unsigned>::const_iterator it = cset.begin();
1293
374
             it != cset.end();
1294
             ++it)
1295
        {
1296
396
          std::vector<unsigned> cvec;
1297
198
          cvec.push_back(*it);
1298
396
          String c(cvec);
1299
198
          Trace("regexp-int-debug") << "Try character " << c << " ... " << std::endl;
1300
396
          Node r1l = derivativeSingle(r1, c);
1301
396
          Node r2l = derivativeSingle(r2, c);
1302
198
          Trace("regexp-int-debug") << "  ... got partial(r1,c) = " << mkString(r1l) << std::endl;
1303
198
          Trace("regexp-int-debug") << "  ... got partial(r2,c) = " << mkString(r2l) << std::endl;
1304
396
          Node rt;
1305
1306
198
          if(r1l > r2l) {
1307
32
            Node tnode = r1l;
1308
16
            r1l = r2l; r2l = tnode;
1309
          }
1310
396
          PairNodes pp(r1l, r2l);
1311
198
          std::map< PairNodes, Node >::const_iterator itr2 = cacheX.find(pp);
1312
198
          if(itr2 != cacheX.end()) {
1313
4
            rt = itr2->second;
1314
          } else {
1315
388
            std::map< PairNodes, Node > cache2(cache);
1316
388
            cache2[p] = NodeManager::currentNM()->mkNode(
1317
                kind::REGEXP_RV,
1318
388
                NodeManager::currentNM()->mkConst(cvc5::Rational(cnt)));
1319
194
            rt = intersectInternal(r1l, r2l, cache2, cnt+1);
1320
194
            cacheX[ pp ] = rt;
1321
          }
1322
1323
594
          rt = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
1324
396
            NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c)), rt) );
1325
1326
198
          Trace("regexp-int-debug") << "  ... got p(r1,c) && p(r2,c) = " << mkString(rt) << std::endl;
1327
198
          vec_nodes.push_back(rt);
1328
        }
1329
176
        rNode = Rewriter::rewrite( vec_nodes.size()==0 ? d_emptyRegexp : vec_nodes.size()==1 ? vec_nodes[0] :
1330
            NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes) );
1331
176
        rNode = convert1(cnt, rNode);
1332
176
        rNode = Rewriter::rewrite( rNode );
1333
      }
1334
    }
1335
225
    Trace("regexp-int-debug") << "  ... try testing no RV of " << mkString(rNode) << std::endl;
1336
225
    if (!expr::hasSubtermKind(REGEXP_RV, rNode))
1337
    {
1338
101
      d_inter_cache[p] = rNode;
1339
    }
1340
  }
1341
251
  Trace("regexp-int") << "End(" << cnt << ") of INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl;
1342
502
  return rNode;
1343
}
1344
1345
114
Node RegExpOpr::removeIntersection(Node r) {
1346
114
  Assert(checkConstRegExp(r));
1347
114
  NodeManager* nm = NodeManager::currentNM();
1348
228
  std::unordered_map<TNode, Node> visited;
1349
114
  std::unordered_map<TNode, Node>::iterator it;
1350
228
  std::vector<TNode> visit;
1351
228
  TNode cur;
1352
114
  visit.push_back(r);
1353
1302
  do
1354
  {
1355
1416
    cur = visit.back();
1356
1416
    visit.pop_back();
1357
1416
    it = visited.find(cur);
1358
1359
1416
    if (it == visited.end())
1360
    {
1361
694
      visited[cur] = Node::null();
1362
694
      visit.push_back(cur);
1363
1302
      for (const Node& cn : cur)
1364
      {
1365
608
        visit.push_back(cn);
1366
      }
1367
    }
1368
722
    else if (it->second.isNull())
1369
    {
1370
694
      Kind ck = cur.getKind();
1371
1388
      Node ret;
1372
694
      bool childChanged = false;
1373
1388
      std::vector<Node> children;
1374
694
      if (cur.getMetaKind() == metakind::PARAMETERIZED)
1375
      {
1376
        children.push_back(cur.getOperator());
1377
      }
1378
1302
      for (const Node& cn : cur)
1379
      {
1380
608
        it = visited.find(cn);
1381
608
        Assert(it != visited.end());
1382
608
        Assert(!it->second.isNull());
1383
608
        if (ck == REGEXP_INTER)
1384
        {
1385
4
          if (ret.isNull())
1386
          {
1387
2
            ret = it->second;
1388
          }
1389
          else
1390
          {
1391
2
            ret = intersect(ret, it->second);
1392
          }
1393
        }
1394
        else
1395
        {
1396
          // will construct below
1397
604
          childChanged = childChanged || cn != it->second;
1398
604
          children.push_back(it->second);
1399
        }
1400
      }
1401
694
      if (ck != REGEXP_INTER)
1402
      {
1403
692
        if (childChanged)
1404
        {
1405
2
          ret = nm->mkNode(cur.getKind(), children);
1406
        }
1407
        else
1408
        {
1409
690
          ret = cur;
1410
        }
1411
      }
1412
694
      visited[cur] = ret;
1413
    }
1414
1416
  } while (!visit.empty());
1415
114
  Assert(visited.find(r) != visited.end());
1416
114
  Assert(!visited.find(r)->second.isNull());
1417
114
  if (Trace.isOn("regexp-intersect"))
1418
  {
1419
    Trace("regexp-intersect") << "Remove INTERSECTION( " << mkString(r)
1420
                              << " ) = " << mkString(visited[r]) << std::endl;
1421
  }
1422
228
  return visited[r];
1423
}
1424
1425
57
Node RegExpOpr::intersect(Node r1, Node r2)
1426
{
1427
57
  if (!checkConstRegExp(r1) || !checkConstRegExp(r2))
1428
  {
1429
    return Node::null();
1430
  }
1431
114
  Node rr1 = removeIntersection(r1);
1432
114
  Node rr2 = removeIntersection(r2);
1433
114
  std::map<PairNodes, Node> cache;
1434
57
  Trace("regexp-intersect-node") << "Intersect (1): " << rr1 << std::endl;
1435
57
  Trace("regexp-intersect-node") << "Intersect (2): " << rr2 << std::endl;
1436
114
  Trace("regexp-intersect") << "Start INTERSECTION(\n\t" << mkString(r1)
1437
57
                            << ",\n\t" << mkString(r2) << ")" << std::endl;
1438
114
  Node retNode = intersectInternal(rr1, rr2, cache, 1);
1439
114
  Trace("regexp-intersect")
1440
114
      << "End INTERSECTION(\n\t" << mkString(r1) << ",\n\t" << mkString(r2)
1441
57
      << ") =\n\t" << mkString(retNode) << std::endl;
1442
57
  Trace("regexp-intersect-node") << "Intersect finished." << std::endl;
1443
57
  return retNode;
1444
}
1445
1446
//printing
1447
11129
std::string RegExpOpr::niceChar(Node r) {
1448
11129
  if(r.isConst()) {
1449
22070
    std::string s = r.getConst<String>().toString();
1450
11035
    return s == "." ? "\\." : s;
1451
  } else {
1452
188
    std::string ss = "$" + r.toString();
1453
94
    return ss;
1454
  }
1455
}
1456
21308
std::string RegExpOpr::mkString( Node r ) {
1457
21308
  std::string retStr;
1458
21308
  if(r.isNull()) {
1459
    retStr = "\\E";
1460
  } else {
1461
21308
    int k = r.getKind();
1462
21308
    switch( k ) {
1463
300
      case kind::REGEXP_EMPTY: {
1464
300
        retStr += "\\E";
1465
300
        break;
1466
      }
1467
283
      case kind::REGEXP_SIGMA: {
1468
283
        retStr += ".";
1469
283
        break;
1470
      }
1471
10595
      case kind::STRING_TO_REGEXP: {
1472
21190
        std::string tmp( niceChar( r[0] ) );
1473
10595
        retStr += tmp.size()==1? tmp : "(" + tmp + ")";
1474
10595
        break;
1475
      }
1476
3664
      case kind::REGEXP_CONCAT: {
1477
3664
        retStr += "(";
1478
11681
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
1479
          //if(i != 0) retStr += ".";
1480
8017
          retStr += mkString( r[i] );
1481
        }
1482
3664
        retStr += ")";
1483
3664
        break;
1484
      }
1485
1236
      case kind::REGEXP_UNION: {
1486
1236
        retStr += "(";
1487
4100
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
1488
2864
          if(i != 0) retStr += "|";
1489
2864
          retStr += mkString( r[i] );
1490
        }
1491
1236
        retStr += ")";
1492
1236
        break;
1493
      }
1494
60
      case kind::REGEXP_INTER: {
1495
60
        retStr += "(";
1496
180
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
1497
120
          if(i != 0) retStr += "&";
1498
120
          retStr += mkString( r[i] );
1499
        }
1500
60
        retStr += ")";
1501
60
        break;
1502
      }
1503
4456
      case kind::REGEXP_STAR: {
1504
4456
        retStr += mkString( r[0] );
1505
4456
        retStr += "*";
1506
4456
        break;
1507
      }
1508
      case kind::REGEXP_PLUS: {
1509
        retStr += mkString( r[0] );
1510
        retStr += "+";
1511
        break;
1512
      }
1513
      case kind::REGEXP_OPT: {
1514
        retStr += mkString( r[0] );
1515
        retStr += "?";
1516
        break;
1517
      }
1518
267
      case kind::REGEXP_RANGE: {
1519
267
        retStr += "[";
1520
267
        retStr += niceChar( r[0] );
1521
267
        retStr += "-";
1522
267
        retStr += niceChar( r[1] );
1523
267
        retStr += "]";
1524
267
        break;
1525
      }
1526
      case kind::REGEXP_LOOP: {
1527
        uint32_t l = utils::getLoopMinOccurrences(r);
1528
        std::stringstream ss;
1529
        ss << "(" << mkString(r[0]) << "){" << l << ",";
1530
        if(r.getNumChildren() == 3) {
1531
          uint32_t u = utils::getLoopMaxOccurrences(r);
1532
          ss << u;
1533
        }
1534
        ss << "}";
1535
        retStr += ss.str();
1536
        break;
1537
      }
1538
382
      case kind::REGEXP_RV: {
1539
382
        retStr += "<";
1540
382
        retStr += r[0].getConst<Rational>().getNumerator().toString();
1541
382
        retStr += ">";
1542
382
        break;
1543
      }
1544
62
      case REGEXP_COMPLEMENT:
1545
      {
1546
62
        retStr += "^(";
1547
62
        retStr += mkString(r[0]);
1548
62
        retStr += ")";
1549
62
        break;
1550
      }
1551
3
      default:
1552
      {
1553
6
        std::stringstream ss;
1554
3
        ss << r;
1555
3
        retStr = ss.str();
1556
3
        Assert(!utils::isRegExpKind(r.getKind()));
1557
3
        break;
1558
      }
1559
    }
1560
  }
1561
1562
21308
  return retStr;
1563
}
1564
1565
777
bool RegExpOpr::regExpIncludes(Node r1, Node r2)
1566
{
1567
777
  const auto& it = d_inclusionCache.find(std::make_pair(r1, r2));
1568
777
  if (it != d_inclusionCache.end())
1569
  {
1570
418
    return (*it).second;
1571
  }
1572
359
  bool result = RegExpEntail::regExpIncludes(r1, r2);
1573
359
  d_inclusionCache[std::make_pair(r1, r2)] = result;
1574
359
  return result;
1575
}
1576
1577
/**
1578
 * Associating formulas with their "exists form", or an existentially
1579
 * quantified formula that is equivalent to it. This is currently used
1580
 * for regular expression memberships in the method below.
1581
 */
1582
struct ExistsFormAttributeId
1583
{
1584
};
1585
typedef expr::Attribute<ExistsFormAttributeId, Node> ExistsFormAttribute;
1586
1587
357
Node RegExpOpr::getExistsForRegExpConcatMem(Node mem)
1588
{
1589
  // get or make the exists form of the membership
1590
  ExistsFormAttribute efa;
1591
357
  if (mem.hasAttribute(efa))
1592
  {
1593
    // already computed
1594
96
    return mem.getAttribute(efa);
1595
  }
1596
261
  Assert(mem.getKind() == STRING_IN_REGEXP);
1597
522
  Node x = mem[0];
1598
522
  Node r = mem[1];
1599
261
  Assert(r.getKind() == REGEXP_CONCAT);
1600
261
  NodeManager* nm = NodeManager::currentNM();
1601
522
  TypeNode xtn = x.getType();
1602
522
  std::vector<Node> vars;
1603
522
  std::vector<Node> mems;
1604
1036
  for (const Node& rc : r)
1605
  {
1606
1550
    Node v = nm->mkBoundVar(xtn);
1607
775
    vars.push_back(v);
1608
775
    mems.push_back(nm->mkNode(STRING_IN_REGEXP, v, rc));
1609
  }
1610
522
  Node sconcat = nm->mkNode(STRING_CONCAT, vars);
1611
522
  Node eq = x.eqNode(sconcat);
1612
261
  mems.insert(mems.begin(), eq);
1613
522
  Node bvl = nm->mkNode(BOUND_VAR_LIST, vars);
1614
522
  Node ebody = nm->mkNode(AND, mems);
1615
522
  Node eform = nm->mkNode(EXISTS, bvl, ebody);
1616
261
  mem.setAttribute(efa, eform);
1617
261
  Trace("regexp-opr") << "Exists form " << mem << " : " << eform << std::endl;
1618
261
  return eform;
1619
}
1620
1621
}  // namespace strings
1622
}  // namespace theory
1623
28194
}  // namespace cvc5