GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/regexp_operation.cpp Lines: 698 977 71.4 %
Date: 2021-08-06 Branches: 1485 4106 36.2 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Tianyi Liang, Andrew Reynolds, Mathias Preiner
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Symbolic Regular Expresion Operations
14
 */
15
16
#include "theory/strings/regexp_operation.h"
17
18
#include <sstream>
19
20
#include "expr/node_algorithm.h"
21
#include "options/strings_options.h"
22
#include "theory/rewriter.h"
23
#include "theory/strings/regexp_entail.h"
24
#include "theory/strings/theory_strings_utils.h"
25
#include "theory/strings/word.h"
26
#include "util/regexp.h"
27
28
using namespace cvc5::kind;
29
30
namespace cvc5 {
31
namespace theory {
32
namespace strings {
33
34
9857
RegExpOpr::RegExpOpr(SkolemCache* sc)
35
    : d_true(NodeManager::currentNM()->mkConst(true)),
36
      d_false(NodeManager::currentNM()->mkConst(false)),
37
      d_emptyRegexp(NodeManager::currentNM()->mkNode(kind::REGEXP_EMPTY,
38
19714
                                                     std::vector<Node>{})),
39
19714
      d_zero(NodeManager::currentNM()->mkConst(::cvc5::Rational(0))),
40
19714
      d_one(NodeManager::currentNM()->mkConst(::cvc5::Rational(1))),
41
      d_sigma(NodeManager::currentNM()->mkNode(kind::REGEXP_SIGMA,
42
19714
                                               std::vector<Node>{})),
43
      d_sigma_star(
44
          NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, d_sigma)),
45
88713
      d_sc(sc)
46
{
47
9857
  d_emptyString = Word::mkEmptyWord(NodeManager::currentNM()->stringType());
48
49
9857
  d_emptySingleton =
50
19714
      NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, d_emptyString);
51
9857
  d_lastchar = utils::getAlphabetCardinality() - 1;
52
9857
}
53
54
9857
RegExpOpr::~RegExpOpr() {}
55
56
882
bool RegExpOpr::checkConstRegExp( Node r ) {
57
882
  Assert(r.getType().isRegExp());
58
1764
  Trace("strings-regexp-cstre")
59
882
      << "RegExpOpr::checkConstRegExp /" << mkString(r) << "/" << std::endl;
60
882
  RegExpConstType rct = getRegExpConstType(r);
61
882
  return rct != RE_C_VARIABLE;
62
}
63
64
1466
RegExpConstType RegExpOpr::getRegExpConstType(Node r)
65
{
66
1466
  Assert(r.getType().isRegExp());
67
1466
  std::unordered_map<Node, RegExpConstType>::iterator it;
68
2932
  std::vector<TNode> visit;
69
2932
  TNode cur;
70
1466
  visit.push_back(r);
71
1953
  do
72
  {
73
3419
    cur = visit.back();
74
3419
    visit.pop_back();
75
3419
    it = d_constCache.find(cur);
76
77
3419
    Kind ck = cur.getKind();
78
3419
    if (it == d_constCache.end())
79
    {
80
1098
      if (ck == STRING_TO_REGEXP)
81
      {
82
656
        Node tmp = Rewriter::rewrite(cur[0]);
83
328
        d_constCache[cur] =
84
328
            tmp.isConst() ? RE_C_CONRETE_CONSTANT : RE_C_VARIABLE;
85
      }
86
770
      else if (ck == REGEXP_SIGMA || ck == REGEXP_RANGE)
87
      {
88
105
        d_constCache[cur] = RE_C_CONSTANT;
89
      }
90
665
      else if (!utils::isRegExpKind(ck))
91
      {
92
        // non-regular expression applications, e.g. function applications
93
        // with regular expression return type are treated as variables.
94
2
        d_constCache[cur] = RE_C_VARIABLE;
95
      }
96
      else
97
      {
98
663
        d_constCache[cur] = RE_C_UNKNOWN;
99
663
        visit.push_back(cur);
100
663
        visit.insert(visit.end(), cur.begin(), cur.end());
101
      }
102
    }
103
2321
    else if (it->second == RE_C_UNKNOWN)
104
    {
105
663
      RegExpConstType ret = ck == REGEXP_COMPLEMENT ? RE_C_CONSTANT : RE_C_CONRETE_CONSTANT;
106
1953
      for (const Node& cn : cur)
107
      {
108
1290
        it = d_constCache.find(cn);
109
1290
        Assert(it != d_constCache.end());
110
1290
        if (it->second > ret)
111
        {
112
308
          ret = it->second;
113
        }
114
      }
115
663
      d_constCache[cur] = ret;
116
    }
117
3419
  } while (!visit.empty());
118
1466
  Assert(d_constCache.find(r) != d_constCache.end());
119
2932
  return d_constCache[r];
120
}
121
122
// 0-unknown, 1-yes, 2-no
123
1655
int RegExpOpr::delta( Node r, Node &exp ) {
124
  std::map<Node, std::pair<int, Node> >::const_iterator itd =
125
1655
      d_delta_cache.find(r);
126
1655
  if (itd != d_delta_cache.end())
127
  {
128
    // already computed
129
1091
    exp = itd->second.second;
130
1091
    return itd->second.first;
131
  }
132
564
  Trace("regexp-delta") << "RegExpOpr::delta: " << r << std::endl;
133
564
  int ret = 0;
134
564
  NodeManager* nm = NodeManager::currentNM();
135
564
  Kind k = r.getKind();
136
564
  switch (k)
137
  {
138
    case REGEXP_EMPTY:
139
    case REGEXP_SIGMA:
140
    case REGEXP_RANGE:
141
    {
142
      // does not contain empty string
143
      ret = 2;
144
      break;
145
    }
146
182
    case STRING_TO_REGEXP:
147
    {
148
364
      Node tmp = Rewriter::rewrite(r[0]);
149
182
      if (tmp.isConst())
150
      {
151
178
        if (tmp == d_emptyString)
152
        {
153
12
          ret = 1;
154
        } else {
155
166
          ret = 2;
156
        }
157
      }
158
      else
159
      {
160
4
        ret = 0;
161
4
        if (tmp.getKind() == STRING_CONCAT)
162
        {
163
          for (const Node& tmpc : tmp)
164
          {
165
            if (tmpc.isConst())
166
            {
167
              ret = 2;
168
              break;
169
            }
170
          }
171
        }
172
4
        if (ret == 0)
173
        {
174
4
          exp = r[0].eqNode(d_emptyString);
175
        }
176
      }
177
182
      break;
178
    }
179
291
    case REGEXP_CONCAT:
180
    case REGEXP_UNION:
181
    case REGEXP_INTER:
182
    {
183
      // has there been an unknown child?
184
291
      bool hasUnknownChild = false;
185
582
      std::vector<Node> vec;
186
291
      int checkTmp = k == REGEXP_UNION ? 1 : 2;
187
291
      int retTmp = k == REGEXP_UNION ? 2 : 1;
188
415
      for (const Node& rc : r)
189
      {
190
515
        Node exp2;
191
391
        int tmp = delta(rc, exp2);
192
391
        if (tmp == checkTmp)
193
        {
194
          // return is implied by the child's return value
195
267
          ret = checkTmp;
196
267
          break;
197
        }
198
124
        else if (tmp == 0)
199
        {
200
          // unknown if child contains empty string
201
4
          Assert(!exp2.isNull());
202
4
          vec.push_back(exp2);
203
4
          hasUnknownChild = true;
204
        }
205
      }
206
291
      if (ret != checkTmp)
207
      {
208
24
        if (!hasUnknownChild)
209
        {
210
24
          ret = retTmp;
211
        } else {
212
          Kind kr = k == REGEXP_UNION ? OR : AND;
213
          exp = vec.size() == 1 ? vec[0] : nm->mkNode(kr, vec);
214
        }
215
      }
216
291
      break;
217
    }
218
87
    case REGEXP_STAR:
219
    case REGEXP_OPT:
220
    {
221
      // contains empty string
222
87
      ret = 1;
223
87
      break;
224
    }
225
    case REGEXP_PLUS:
226
    {
227
      ret = delta(r[0], exp);
228
      break;
229
    }
230
    case REGEXP_LOOP:
231
    {
232
      uint32_t lo = utils::getLoopMinOccurrences(r);
233
      if (lo == 0)
234
      {
235
        ret = 1;
236
      }
237
      else
238
      {
239
        ret = delta(r[0], exp);
240
      }
241
      break;
242
    }
243
4
    case REGEXP_COMPLEMENT:
244
    {
245
4
      int tmp = delta(r[0], exp);
246
      // flip the result if known
247
4
      ret = tmp == 0 ? 0 : (3 - tmp);
248
4
      exp = exp.isNull() ? exp : exp.negate();
249
4
      break;
250
    }
251
    default:
252
    {
253
      Assert(!utils::isRegExpKind(k));
254
      break;
255
    }
256
  }
257
564
  if (!exp.isNull())
258
  {
259
4
    exp = Rewriter::rewrite(exp);
260
  }
261
1128
  std::pair<int, Node> p(ret, exp);
262
564
  d_delta_cache[r] = p;
263
1128
  Trace("regexp-delta") << "RegExpOpr::delta returns " << ret << " for " << r
264
564
                        << ", expr = " << exp << std::endl;
265
564
  return ret;
266
}
267
268
// 0-unknown, 1-yes, 2-no
269
24
int RegExpOpr::derivativeS(Node r, cvc5::String c, Node& retNode)
270
{
271
24
  Assert(c.size() < 2);
272
24
  Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl;
273
274
24
  int ret = 1;
275
24
  retNode = d_emptyRegexp;
276
24
  NodeManager* nm = NodeManager::currentNM();
277
24
  SkolemManager* sm = nm->getSkolemManager();
278
279
48
  PairNodeStr dv = std::make_pair( r, c );
280
24
  if( d_deriv_cache.find( dv ) != d_deriv_cache.end() ) {
281
4
    retNode = d_deriv_cache[dv].first;
282
4
    ret = d_deriv_cache[dv].second;
283
  }
284
20
  else if (c.empty())
285
  {
286
    Node expNode;
287
    ret = delta( r, expNode );
288
    if(ret == 0) {
289
      retNode = NodeManager::currentNM()->mkNode(kind::ITE, expNode, r, d_emptyRegexp);
290
    } else if(ret == 1) {
291
      retNode = r;
292
    }
293
    std::pair< Node, int > p(retNode, ret);
294
    d_deriv_cache[dv] = p;
295
  } else {
296
20
    switch( r.getKind() ) {
297
      case kind::REGEXP_EMPTY: {
298
        ret = 2;
299
        break;
300
      }
301
      case kind::REGEXP_SIGMA: {
302
        retNode = d_emptySingleton;
303
        break;
304
      }
305
      case kind::REGEXP_RANGE: {
306
        cvc5::String a = r[0].getConst<String>();
307
        cvc5::String b = r[1].getConst<String>();
308
        retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp;
309
        break;
310
      }
311
8
      case kind::STRING_TO_REGEXP: {
312
16
        Node tmp = Rewriter::rewrite(r[0]);
313
8
        if(tmp.isConst()) {
314
8
          if(tmp == d_emptyString) {
315
4
            ret = 2;
316
          } else {
317
4
            if (tmp.getConst<String>().front() == c.front())
318
            {
319
              retNode =
320
                  nm->mkNode(STRING_TO_REGEXP,
321
                             Word::getLength(tmp) == 1 ? d_emptyString
322
                                                       : Word::substr(tmp, 1));
323
            } else {
324
4
              ret = 2;
325
            }
326
          }
327
        } else {
328
          ret = 0;
329
          Node rest;
330
          if(tmp.getKind() == kind::STRING_CONCAT) {
331
            Node t2 = tmp[0];
332
            if(t2.isConst()) {
333
              if (t2.getConst<String>().front() == c.front())
334
              {
335
                Node n = nm->mkNode(STRING_TO_REGEXP,
336
                                    Word::getLength(tmp) == 1
337
                                        ? d_emptyString
338
                                        : Word::substr(tmp, 1));
339
                std::vector< Node > vec_nodes;
340
                vec_nodes.push_back(n);
341
                for(unsigned i=1; i<tmp.getNumChildren(); i++) {
342
                  vec_nodes.push_back(tmp[i]);
343
                }
344
                retNode = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes);
345
                ret = 1;
346
              } else {
347
                ret = 2;
348
              }
349
            } else {
350
              tmp = tmp[0];
351
              std::vector< Node > vec_nodes;
352
              for(unsigned i=1; i<tmp.getNumChildren(); i++) {
353
                vec_nodes.push_back(tmp[i]);
354
              }
355
              rest = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes);
356
            }
357
          }
358
          if(ret == 0) {
359
            Node sk =
360
                sm->mkDummySkolem("rsp", nm->stringType(), "Split RegExp");
361
            retNode = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, sk);
362
            if(!rest.isNull()) {
363
              retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, retNode, rest));
364
            }
365
            Node exp = tmp.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT,
366
                        NodeManager::currentNM()->mkConst(c), sk));
367
            retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, exp, retNode, d_emptyRegexp));
368
          }
369
        }
370
8
        break;
371
      }
372
4
      case kind::REGEXP_CONCAT: {
373
8
        std::vector< Node > vec_nodes;
374
8
        std::vector< Node > delta_nodes;
375
8
        Node dnode = d_true;
376
12
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
377
16
          Node dc;
378
16
          Node exp2;
379
8
          int rt = derivativeS(r[i], c, dc);
380
8
          if(rt != 2) {
381
            if(rt == 0) {
382
              ret = 0;
383
            }
384
            std::vector< Node > vec_nodes2;
385
            if(dc != d_emptySingleton) {
386
              vec_nodes2.push_back( dc );
387
            }
388
            for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
389
              if(r[j] != d_emptySingleton) {
390
                vec_nodes2.push_back( r[j] );
391
              }
392
            }
393
            Node tmp = vec_nodes2.size()==0 ? d_emptySingleton :
394
              vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
395
            if(dnode != d_true) {
396
              tmp = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, dnode, tmp, d_emptyRegexp));
397
              ret = 0;
398
            }
399
            if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
400
              vec_nodes.push_back( tmp );
401
            }
402
          }
403
16
          Node exp3;
404
8
          int rt2 = delta( r[i], exp3 );
405
8
          if( rt2 == 0 ) {
406
            dnode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::AND, dnode, exp3));
407
8
          } else if( rt2 == 2 ) {
408
            break;
409
          }
410
        }
411
4
        retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
412
              ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
413
4
        if(retNode == d_emptyRegexp) {
414
4
          ret = 2;
415
        }
416
4
        break;
417
      }
418
4
      case kind::REGEXP_UNION: {
419
8
        std::vector< Node > vec_nodes;
420
12
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
421
16
          Node dc;
422
8
          int rt = derivativeS(r[i], c, dc);
423
8
          if(rt == 0) {
424
            ret = 0;
425
          }
426
8
          if(rt != 2) {
427
            if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
428
              vec_nodes.push_back( dc );
429
            }
430
          }
431
          //Trace("regexp-derive") << "RegExp-derive OR R[" << i << "] " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
432
        }
433
4
        retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
434
              ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
435
4
        if(retNode == d_emptyRegexp) {
436
4
          ret = 2;
437
        }
438
4
        break;
439
      }
440
      case kind::REGEXP_INTER: {
441
        bool flag = true;
442
        bool flag_sg = false;
443
        std::vector< Node > vec_nodes;
444
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
445
          Node dc;
446
          int rt = derivativeS(r[i], c, dc);
447
          if(rt == 0) {
448
            ret = 0;
449
          } else if(rt == 2) {
450
            flag = false;
451
            break;
452
          }
453
          if(dc == d_sigma_star) {
454
            flag_sg = true;
455
          } else {
456
            if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
457
              vec_nodes.push_back( dc );
458
            }
459
          }
460
        }
461
        if(flag) {
462
          if(vec_nodes.size() == 0 && flag_sg) {
463
            retNode = d_sigma_star;
464
          } else {
465
            retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
466
                  ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
467
            if(retNode == d_emptyRegexp) {
468
              ret = 2;
469
            }
470
          }
471
        } else {
472
          retNode = d_emptyRegexp;
473
          ret = 2;
474
        }
475
        break;
476
      }
477
4
      case kind::REGEXP_STAR: {
478
8
        Node dc;
479
4
        ret = derivativeS(r[0], c, dc);
480
4
        retNode = dc==d_emptyRegexp ? dc : (dc==d_emptySingleton ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r ));
481
4
        break;
482
      }
483
      case kind::REGEXP_LOOP: {
484
        uint32_t l = utils::getLoopMinOccurrences(r);
485
        uint32_t u = utils::getLoopMaxOccurrences(r);
486
        if (l == u && l == 0)
487
        {
488
          ret = 2;
489
          //retNode = d_emptyRegexp;
490
        } else {
491
          Node dc;
492
          ret = derivativeS(r[0], c, dc);
493
          if(dc==d_emptyRegexp) {
494
            Node lop = nm->mkConst(RegExpLoop(l == 0 ? 0 : (l - 1), u - 1));
495
            Node r2 = nm->mkNode(REGEXP_LOOP, lop, r[0]);
496
            retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 );
497
          } else {
498
            retNode = d_emptyRegexp;
499
          }
500
        }
501
        break;
502
      }
503
      case kind::REGEXP_COMPLEMENT:
504
      {
505
        // don't know result
506
        return 0;
507
        break;
508
      }
509
      default: {
510
        Assert(!utils::isRegExpKind(r.getKind()));
511
        return 0;
512
        break;
513
      }
514
    }
515
20
    if(retNode != d_emptyRegexp) {
516
      retNode = Rewriter::rewrite( retNode );
517
    }
518
40
    std::pair< Node, int > p(retNode, ret);
519
20
    d_deriv_cache[dv] = p;
520
  }
521
522
24
  Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl;
523
24
  return ret;
524
}
525
526
1410
Node RegExpOpr::derivativeSingle(Node r, cvc5::String c)
527
{
528
1410
  Assert(c.size() < 2);
529
1410
  Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl;
530
1410
  Node retNode = d_emptyRegexp;
531
2820
  PairNodeStr dv = std::make_pair( r, c );
532
1410
  NodeManager* nm = NodeManager::currentNM();
533
1410
  if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {
534
526
    retNode = d_dv_cache[dv];
535
  }
536
884
  else if (c.empty())
537
  {
538
    Node exp;
539
    int tmp = delta( r, exp );
540
    if(tmp == 0) {
541
      // TODO variable
542
      retNode = d_emptyRegexp;
543
    } else if(tmp == 1) {
544
      retNode = r;
545
    } else {
546
      retNode = d_emptyRegexp;
547
    }
548
  } else {
549
884
    Kind k = r.getKind();
550
884
    switch( k ) {
551
      case kind::REGEXP_EMPTY: {
552
        retNode = d_emptyRegexp;
553
        break;
554
      }
555
      case kind::REGEXP_SIGMA: {
556
        retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
557
        break;
558
      }
559
      case kind::REGEXP_RANGE: {
560
        cvc5::String a = r[0].getConst<String>();
561
        cvc5::String b = r[1].getConst<String>();
562
        retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp;
563
        break;
564
      }
565
270
      case kind::STRING_TO_REGEXP: {
566
270
        if(r[0].isConst()) {
567
270
          if(r[0] == d_emptyString) {
568
14
            retNode = d_emptyRegexp;
569
          } else {
570
256
            if (r[0].getConst<String>().front() == c.front())
571
            {
572
192
              retNode = nm->mkNode(STRING_TO_REGEXP,
573
384
                                   Word::getLength(r[0]) == 1
574
384
                                       ? d_emptyString
575
                                       : Word::substr(r[0], 1));
576
            } else {
577
64
              retNode = d_emptyRegexp;
578
            }
579
          }
580
        } else {
581
          // TODO variable
582
          retNode = d_emptyRegexp;
583
        }
584
270
        break;
585
      }
586
355
      case kind::REGEXP_CONCAT: {
587
710
        Node rees = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
588
710
        std::vector< Node > vec_nodes;
589
531
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
590
685
          Node dc = derivativeSingle(r[i], c);
591
509
          if(dc != d_emptyRegexp) {
592
714
            std::vector< Node > vec_nodes2;
593
357
            if(dc != rees) {
594
236
              vec_nodes2.push_back( dc );
595
            }
596
768
            for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
597
411
              if(r[j] != rees) {
598
411
                vec_nodes2.push_back( r[j] );
599
              }
600
            }
601
357
            Node tmp = vec_nodes2.size()==0 ? rees :
602
714
              vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
603
357
            if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
604
355
              vec_nodes.push_back( tmp );
605
            }
606
          }
607
685
          Node exp;
608
509
          if( delta( r[i], exp ) != 1 ) {
609
333
            break;
610
          }
611
        }
612
999
        retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
613
644
              ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
614
355
        break;
615
      }
616
108
      case kind::REGEXP_UNION: {
617
216
        std::vector< Node > vec_nodes;
618
346
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
619
476
          Node dc = derivativeSingle(r[i], c);
620
238
          if(dc != d_emptyRegexp) {
621
100
            if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
622
98
              vec_nodes.push_back( dc );
623
            }
624
          }
625
          //Trace("regexp-derive") << "RegExp-derive OR R[" << i << "] /" << mkString(r[i]) << "/ returns /" << mkString(dc) << "/" << std::endl;
626
        }
627
286
        retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
628
178
              ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
629
108
        break;
630
      }
631
      case kind::REGEXP_INTER: {
632
        bool flag = true;
633
        bool flag_sg = false;
634
        std::vector< Node > vec_nodes;
635
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
636
          Node dc = derivativeSingle(r[i], c);
637
          if(dc != d_emptyRegexp) {
638
            if(dc == d_sigma_star) {
639
              flag_sg = true;
640
            } else {
641
              if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
642
                vec_nodes.push_back( dc );
643
              }
644
            }
645
          } else {
646
            flag = false;
647
            break;
648
          }
649
        }
650
        if(flag) {
651
          if(vec_nodes.size() == 0 && flag_sg) {
652
            retNode = d_sigma_star;
653
          } else {
654
            retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
655
                  ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
656
          }
657
        } else {
658
          retNode = d_emptyRegexp;
659
        }
660
        break;
661
      }
662
151
      case kind::REGEXP_STAR: {
663
302
        Node dc = derivativeSingle(r[0], c);
664
151
        if(dc != d_emptyRegexp) {
665
115
          retNode = dc==d_emptySingleton? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
666
        } else {
667
36
          retNode = d_emptyRegexp;
668
        }
669
151
        break;
670
      }
671
      case kind::REGEXP_LOOP: {
672
        uint32_t l = utils::getLoopMinOccurrences(r);
673
        uint32_t u = utils::getLoopMaxOccurrences(r);
674
        if (l == u || l == 0)
675
        {
676
          retNode = d_emptyRegexp;
677
        } else {
678
          Node dc = derivativeSingle(r[0], c);
679
          if(dc != d_emptyRegexp) {
680
            Node lop = nm->mkConst(RegExpLoop(l == 0 ? 0 : (l - 1), u - 1));
681
            Node r2 = nm->mkNode(REGEXP_LOOP, lop, r[0]);
682
            retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 );
683
          } else {
684
            retNode = d_emptyRegexp;
685
          }
686
        }
687
        //Trace("regexp-derive") << "RegExp-derive : REGEXP_LOOP returns /" << mkString(retNode) << "/" << std::endl;
688
        break;
689
      }
690
      case kind::REGEXP_COMPLEMENT:
691
      default: {
692
        Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
693
        Unreachable();
694
        break;
695
      }
696
    }
697
884
    if(retNode != d_emptyRegexp) {
698
732
      retNode = Rewriter::rewrite( retNode );
699
    }
700
884
    d_dv_cache[dv] = retNode;
701
  }
702
1410
  Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl;
703
2820
  return retNode;
704
}
705
706
898
void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset)
707
{
708
898
  Trace("regexp-fset") << "Start FSET(" << mkString(r) << ")" << std::endl;
709
  std::map<Node, std::pair<std::set<unsigned>, SetNodes> >::const_iterator itr =
710
898
      d_fset_cache.find(r);
711
898
  if(itr != d_fset_cache.end()) {
712
312
    pcset.insert((itr->second).first.begin(), (itr->second).first.end());
713
312
    pvset.insert((itr->second).second.begin(), (itr->second).second.end());
714
  } else {
715
    // cset is code points
716
1172
    std::set<unsigned> cset;
717
1172
    SetNodes vset;
718
586
    Kind k = r.getKind();
719
586
    switch( k ) {
720
      case kind::REGEXP_EMPTY: {
721
        break;
722
      }
723
      case kind::REGEXP_RANGE: {
724
        unsigned a = r[0].getConst<String>().front();
725
        unsigned b = r[1].getConst<String>().front();
726
        Assert(a < b);
727
        Assert(b < std::numeric_limits<unsigned>::max());
728
        for (unsigned c = a; c <= b; c++)
729
        {
730
          cset.insert(c);
731
        }
732
        break;
733
      }
734
204
      case kind::STRING_TO_REGEXP: {
735
408
        Node st = Rewriter::rewrite(r[0]);
736
204
        if(st.isConst()) {
737
408
          String s = st.getConst<String>();
738
204
          if(s.size() != 0) {
739
196
            unsigned sc = s.front();
740
196
            cset.insert(sc);
741
          }
742
        }
743
        else if (st.getKind() == kind::STRING_CONCAT)
744
        {
745
          if(st[0].isConst()) {
746
            String s = st[0].getConst<String>();
747
            unsigned sc = s.front();
748
            cset.insert(sc);
749
          } else {
750
            vset.insert( st[0] );
751
          }
752
        }
753
        else
754
        {
755
          vset.insert(st);
756
        }
757
204
        break;
758
      }
759
253
      case kind::REGEXP_CONCAT: {
760
329
        for(unsigned i=0; i<r.getNumChildren(); i++) {
761
319
          firstChars(r[i], cset, vset);
762
395
          Node n = r[i];
763
395
          Node exp;
764
319
          if (delta(n, exp) != 1)
765
          {
766
243
            break;
767
          }
768
        }
769
253
        break;
770
      }
771
46
      case kind::REGEXP_UNION: {
772
146
        for(unsigned i=0; i<r.getNumChildren(); i++) {
773
100
          firstChars(r[i], cset, vset);
774
        }
775
46
        break;
776
      }
777
      case kind::REGEXP_INTER: {
778
        //TODO: Overapproximation for now
779
        //for(unsigned i=0; i<r.getNumChildren(); i++) {
780
        // firstChars(r[i], cset, vset);
781
        //}
782
        firstChars(r[0], cset, vset);
783
        break;
784
      }
785
83
      case kind::REGEXP_STAR: {
786
83
        firstChars(r[0], cset, vset);
787
83
        break;
788
      }
789
      case kind::REGEXP_LOOP: {
790
        firstChars(r[0], cset, vset);
791
        break;
792
      }
793
      case kind::REGEXP_SIGMA:
794
      case kind::REGEXP_COMPLEMENT:
795
      default: {
796
        // we do not expect to call this function on regular expressions that
797
        // aren't a standard regular expression kind. However, if we do, then
798
        // the following code is conservative and says that the current
799
        // regular expression can begin with any character.
800
        Assert(utils::isRegExpKind(k));
801
        // can start with any character
802
        Assert(d_lastchar < std::numeric_limits<unsigned>::max());
803
        for (unsigned i = 0; i <= d_lastchar; i++)
804
        {
805
          cset.insert(i);
806
        }
807
        break;
808
      }
809
    }
810
586
    pcset.insert(cset.begin(), cset.end());
811
586
    pvset.insert(vset.begin(), vset.end());
812
1172
    std::pair<std::set<unsigned>, SetNodes> p(cset, vset);
813
586
    d_fset_cache[r] = p;
814
  }
815
816
898
  if(Trace.isOn("regexp-fset")) {
817
    Trace("regexp-fset") << "END FSET(" << mkString(r) << ") = {";
818
    for (std::set<unsigned>::const_iterator it = pcset.begin();
819
         it != pcset.end();
820
         ++it)
821
    {
822
      if (it != pcset.begin())
823
      {
824
        Trace("regexp-fset") << ",";
825
      }
826
      Trace("regexp-fset") << (*it);
827
      }
828
    Trace("regexp-fset") << "}" << std::endl;
829
  }
830
898
}
831
832
470
Node RegExpOpr::simplify(Node t, bool polarity)
833
{
834
940
  Trace("strings-regexp-simpl")
835
470
      << "RegExpOpr::simplify: " << t << ", polarity=" << polarity << std::endl;
836
470
  Assert(t.getKind() == kind::STRING_IN_REGEXP);
837
940
  Node tlit = polarity ? t : t.notNode();
838
940
  Node conc;
839
470
  std::map<Node, Node>::const_iterator itr = d_simpCache.find(tlit);
840
470
  if (itr != d_simpCache.end())
841
  {
842
8
    return itr->second;
843
  }
844
462
  if (polarity)
845
  {
846
694
    std::vector<Node> newSkolems;
847
347
    conc = reduceRegExpPos(tlit, d_sc, newSkolems);
848
  }
849
  else
850
  {
851
    // see if we can use an optimized version of the reduction for re.++.
852
230
    Node r = t[1];
853
115
    if (r.getKind() == REGEXP_CONCAT)
854
    {
855
      // the index we are removing from the RE concatenation
856
94
      size_t index = 0;
857
      // As an optimization to the reduction, if we can determine that
858
      // all strings in the language of R1 have the same length, say n,
859
      // then the conclusion of the reduction is quantifier-free:
860
      //    ~( substr(s,0,n) in R1 ) OR ~( substr(s,n,len(s)-n) in R2)
861
188
      Node reLen = getRegExpConcatFixed(r, index);
862
94
      if (!reLen.isNull())
863
      {
864
82
        conc = reduceRegExpNegConcatFixed(tlit, reLen, index);
865
      }
866
    }
867
115
    if (conc.isNull())
868
    {
869
33
      conc = reduceRegExpNeg(tlit);
870
    }
871
  }
872
462
  d_simpCache[tlit] = conc;
873
924
  Trace("strings-regexp-simpl")
874
462
      << "RegExpOpr::simplify: returns " << conc << std::endl;
875
462
  return conc;
876
}
877
878
123
Node RegExpOpr::getRegExpConcatFixed(Node r, size_t& index)
879
{
880
123
  Assert(r.getKind() == REGEXP_CONCAT);
881
123
  index = 0;
882
246
  Node reLen = RegExpEntail::getFixedLengthForRegexp(r[0]);
883
123
  if (!reLen.isNull())
884
  {
885
83
    return reLen;
886
  }
887
  // try from the opposite end
888
40
  size_t indexE = r.getNumChildren() - 1;
889
40
  reLen = RegExpEntail::getFixedLengthForRegexp(r[indexE]);
890
40
  if (!reLen.isNull())
891
  {
892
26
    index = indexE;
893
26
    return reLen;
894
  }
895
14
  return Node::null();
896
}
897
898
39
Node RegExpOpr::reduceRegExpNeg(Node mem)
899
{
900
39
  Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP);
901
78
  Node s = mem[0][0];
902
78
  Node r = mem[0][1];
903
39
  NodeManager* nm = NodeManager::currentNM();
904
39
  Kind k = r.getKind();
905
78
  Node zero = nm->mkConst(Rational(0));
906
39
  Node conc;
907
39
  if (k == REGEXP_CONCAT)
908
  {
909
    // do not use length entailment, call regular expression concat
910
32
    Node reLen;
911
16
    size_t i = 0;
912
16
    conc = reduceRegExpNegConcatFixed(mem, reLen, i);
913
  }
914
23
  else if (k == REGEXP_STAR)
915
  {
916
44
    Node emp = Word::mkEmptyWord(s.getType());
917
44
    Node lens = nm->mkNode(STRING_LENGTH, s);
918
44
    Node sne = s.eqNode(emp).negate();
919
44
    Node b1 = nm->mkBoundVar(nm->integerType());
920
44
    Node b1v = nm->mkNode(BOUND_VAR_LIST, b1);
921
    Node g1 =
922
44
        nm->mkNode(AND, nm->mkNode(GT, b1, zero), nm->mkNode(GEQ, lens, b1));
923
    // internal
924
44
    Node s1 = nm->mkNode(STRING_SUBSTR, s, zero, b1);
925
44
    Node s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1));
926
44
    Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[0]).negate();
927
44
    Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r).negate();
928
929
22
    conc = nm->mkNode(OR, s1r1, s2r2);
930
22
    conc = nm->mkNode(IMPLIES, g1, conc);
931
    // must mark as an internal quantifier
932
22
    conc = utils::mkForallInternal(b1v, conc);
933
22
    conc = nm->mkNode(AND, sne, conc);
934
  }
935
  else
936
  {
937
1
    Assert(!utils::isRegExpKind(k));
938
  }
939
78
  return conc;
940
}
941
942
116
Node RegExpOpr::reduceRegExpNegConcatFixed(Node mem, Node reLen, size_t index)
943
{
944
116
  Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP);
945
232
  Node s = mem[0][0];
946
232
  Node r = mem[0][1];
947
116
  NodeManager* nm = NodeManager::currentNM();
948
116
  Assert(r.getKind() == REGEXP_CONCAT);
949
232
  Node zero = nm->mkConst(Rational(0));
950
  // The following simplification states that
951
  //    ~( s in R1 ++ R2 ++... ++ Rn )
952
  // is equivalent to
953
  //    forall x.
954
  //      0 <= x <= len(s) =>
955
  //        ~(substr(s,0,x) in R1) OR ~(substr(s,x,len(s)-x) in R2 ++ ... ++ Rn)
956
  // Index is the child index of r that we are stripping off, which is either
957
  // from the beginning or the end.
958
116
  Assert(index == 0 || index == r.getNumChildren() - 1);
959
232
  Node lens = nm->mkNode(STRING_LENGTH, s);
960
232
  Node b1;
961
232
  Node b1v;
962
232
  Node guard;
963
116
  if (reLen.isNull())
964
  {
965
16
    b1 = SkolemCache::mkIndexVar(mem);
966
16
    b1v = nm->mkNode(BOUND_VAR_LIST, b1);
967
48
    guard = nm->mkNode(AND,
968
32
                       nm->mkNode(GEQ, b1, zero),
969
32
                       nm->mkNode(GEQ, nm->mkNode(STRING_LENGTH, s), b1));
970
  }
971
  else
972
  {
973
100
    b1 = reLen;
974
  }
975
232
  Node s1;
976
232
  Node s2;
977
116
  if (index == 0)
978
  {
979
92
    s1 = nm->mkNode(STRING_SUBSTR, s, zero, b1);
980
92
    s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1));
981
  }
982
  else
983
  {
984
24
    s1 = nm->mkNode(STRING_SUBSTR, s, nm->mkNode(MINUS, lens, b1), b1);
985
24
    s2 = nm->mkNode(STRING_SUBSTR, s, zero, nm->mkNode(MINUS, lens, b1));
986
  }
987
232
  Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[index]).negate();
988
232
  std::vector<Node> nvec;
989
502
  for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; i++)
990
  {
991
386
    if (i != index)
992
    {
993
270
      nvec.push_back(r[i]);
994
    }
995
  }
996
232
  Node r2 = nvec.size() == 1 ? nvec[0] : nm->mkNode(REGEXP_CONCAT, nvec);
997
116
  r2 = Rewriter::rewrite(r2);
998
232
  Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r2).negate();
999
116
  Node conc = nm->mkNode(OR, s1r1, s2r2);
1000
116
  if (!b1v.isNull())
1001
  {
1002
16
    conc = nm->mkNode(OR, guard.negate(), conc);
1003
    // must mark as an internal quantifier
1004
16
    conc = utils::mkForallInternal(b1v, conc);
1005
  }
1006
232
  return conc;
1007
}
1008
1009
625
Node RegExpOpr::reduceRegExpPos(Node mem,
1010
                                SkolemCache* sc,
1011
                                std::vector<Node>& newSkolems)
1012
{
1013
625
  Assert(mem.getKind() == STRING_IN_REGEXP);
1014
1250
  Node s = mem[0];
1015
1250
  Node r = mem[1];
1016
625
  NodeManager* nm = NodeManager::currentNM();
1017
625
  Kind k = r.getKind();
1018
625
  Node conc;
1019
625
  if (k == REGEXP_CONCAT)
1020
  {
1021
878
    std::vector<Node> nvec;
1022
878
    std::vector<Node> cc;
1023
439
    SkolemManager* sm = nm->getSkolemManager();
1024
    // Look up skolems for each of the components. If sc has optimizations
1025
    // enabled, this will return arguments of str.to_re.
1026
1696
    for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; ++i)
1027
    {
1028
1257
      if (r[i].getKind() == STRING_TO_REGEXP)
1029
      {
1030
        // optimization, just take the body
1031
439
        newSkolems.push_back(r[i][0]);
1032
      }
1033
      else
1034
      {
1035
1636
        Node ivalue = nm->mkConst(Rational(i));
1036
        Node sk = sm->mkSkolemFunction(SkolemFunId::RE_UNFOLD_POS_COMPONENT,
1037
1636
                                       s.getType(),
1038
3272
                                       {mem[0], mem[1], ivalue});
1039
818
        newSkolems.push_back(sk);
1040
818
        nvec.push_back(nm->mkNode(STRING_IN_REGEXP, newSkolems[i], r[i]));
1041
      }
1042
    }
1043
    // (str.in_re x (re.++ R0 .... Rn)) =>
1044
    // (and (= x (str.++ k0 ... kn)) (str.in_re k0 R0) ... (str.in_re kn Rn) )
1045
878
    Node lem = s.eqNode(nm->mkNode(STRING_CONCAT, newSkolems));
1046
439
    nvec.insert(nvec.begin(), lem);
1047
439
    conc = nvec.size() == 1 ? nvec[0] : nm->mkNode(AND, nvec);
1048
  }
1049
186
  else if (k == REGEXP_STAR)
1050
  {
1051
368
    Node emp = Word::mkEmptyWord(s.getType());
1052
368
    Node se = s.eqNode(emp);
1053
368
    Node sinr = nm->mkNode(STRING_IN_REGEXP, s, r[0]);
1054
368
    Node reExpand = nm->mkNode(REGEXP_CONCAT, r[0], r, r[0]);
1055
368
    Node sinRExp = nm->mkNode(STRING_IN_REGEXP, s, reExpand);
1056
    // We unfold `x in R*` by considering three cases: `x` is empty, `x`
1057
    // is matched by `R`, or `x` is matched by two or more `R`s. For the
1058
    // last case, `x` will break into three pieces, making the beginning
1059
    // and the end each match `R` and the middle match `R*`. Matching the
1060
    // beginning and the end with `R` allows us to reason about the
1061
    // beginning and the end of `x` simultaneously.
1062
    //
1063
    // x in R* ---> (x = "") v (x in R) v (x in (re.++ R (re.* R) R))
1064
1065
    // We also immediately unfold the last disjunct for re.*. The advantage
1066
    // of doing this is that we use the same scheme for skolems above.
1067
368
    std::vector<Node> newSkolemsC;
1068
184
    sinRExp = reduceRegExpPos(sinRExp, sc, newSkolemsC);
1069
184
    Assert(newSkolemsC.size() == 3);
1070
    // make the return lemma
1071
    // can also assume the component match the first and last R are non-empty.
1072
    // This means that the overall conclusion is:
1073
    //   (x = "") v (x in R) v (x = (str.++ k1 k2 k3) ^
1074
    //                          k1 in R ^ k2 in (re.* R) ^ k3 in R ^
1075
    //                          k1 != ""  ^ k3 != "")
1076
184
    conc = nm->mkNode(OR,
1077
                      se,
1078
                      sinr,
1079
736
                      nm->mkNode(AND,
1080
                                 sinRExp,
1081
368
                                 newSkolemsC[0].eqNode(emp).negate(),
1082
368
                                 newSkolemsC[2].eqNode(emp).negate()));
1083
  }
1084
  else
1085
  {
1086
2
    Assert(!utils::isRegExpKind(k));
1087
  }
1088
1250
  return conc;
1089
}
1090
1091
bool RegExpOpr::isPairNodesInSet(std::set< PairNodes > &s, Node n1, Node n2) {
1092
  for(std::set< PairNodes >::const_iterator itr = s.begin();
1093
      itr != s.end(); ++itr) {
1094
    if((itr->first == n1 && itr->second == n2) ||
1095
       (itr->first == n2 && itr->second == n1)) {
1096
      return true;
1097
    }
1098
  }
1099
  return false;
1100
}
1101
1102
654
bool RegExpOpr::containC2(unsigned cnt, Node n) {
1103
654
  if(n.getKind() == kind::REGEXP_RV) {
1104
202
    Assert(n[0].getConst<Rational>() <= Rational(String::maxSize()))
1105
        << "Exceeded UINT32_MAX in RegExpOpr::containC2";
1106
202
    unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt();
1107
202
    return cnt == y;
1108
452
  } else if(n.getKind() == kind::REGEXP_CONCAT) {
1109
102
    for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1110
80
      if(containC2(cnt, n[i])) {
1111
12
        return true;
1112
      }
1113
    }
1114
418
  } else if(n.getKind() == kind::REGEXP_STAR) {
1115
34
    return containC2(cnt, n[0]);
1116
384
  } else if(n.getKind() == kind::REGEXP_LOOP) {
1117
    return containC2(cnt, n[0]);
1118
384
  } else if(n.getKind() == kind::REGEXP_UNION) {
1119
118
    for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1120
88
      if(containC2(cnt, n[i])) {
1121
12
        return true;
1122
      }
1123
    }
1124
  }
1125
394
  return false;
1126
}
1127
198
Node RegExpOpr::convert1(unsigned cnt, Node n) {
1128
198
  Trace("regexp-debug") << "Converting " << n << " at " << cnt << "... " << std::endl;
1129
396
  Node r1, r2;
1130
198
  convert2(cnt, n, r1, r2);
1131
198
  Trace("regexp-debug") << "... getting r1=" << r1 << ", and r2=" << r2 << std::endl;
1132
198
  Node ret = r1==d_emptySingleton ? r2 : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
1133
198
     NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, r1), r2);
1134
198
  ret = Rewriter::rewrite( ret );
1135
198
  Trace("regexp-debug") << "... done convert at " << cnt << ", with return " << ret << std::endl;
1136
396
  return ret;
1137
}
1138
421
void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) {
1139
421
  if(n == d_emptyRegexp) {
1140
24
    r1 = d_emptyRegexp;
1141
24
    r2 = d_emptyRegexp;
1142
24
    return;
1143
397
  } else if(n == d_emptySingleton) {
1144
25
    r1 = d_emptySingleton;
1145
25
    r2 = d_emptySingleton;
1146
  }
1147
397
  Kind nk = n.getKind();
1148
397
  if (nk == REGEXP_RV)
1149
  {
1150
61
    Assert(n[0].getConst<Rational>() <= Rational(String::maxSize()))
1151
        << "Exceeded UINT32_MAX in RegExpOpr::convert2";
1152
61
    unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt();
1153
61
    r1 = d_emptySingleton;
1154
61
    if(cnt == y) {
1155
61
      r2 = d_emptyRegexp;
1156
    } else {
1157
      r2 = n;
1158
    }
1159
  }
1160
336
  else if (nk == REGEXP_CONCAT)
1161
  {
1162
212
    bool flag = true;
1163
424
    std::vector<Node> vr1, vr2;
1164
591
    for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1165
452
      if(containC2(cnt, n[i])) {
1166
146
        Node t1, t2;
1167
73
        convert2(cnt, n[i], t1, t2);
1168
73
        vr1.push_back(t1);
1169
73
        r1 = vr1.size()==0 ? d_emptyRegexp : vr1.size()==1 ? vr1[0] :
1170
             NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr1);
1171
73
        vr2.push_back(t2);
1172
73
        for( unsigned j=i+1; j<n.getNumChildren(); j++ ) {
1173
          vr2.push_back(n[j]);
1174
        }
1175
73
        r2 = vr2.size()==0 ? d_emptyRegexp : vr2.size()==1 ? vr2[0] :
1176
             NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr2);
1177
73
        flag = false;
1178
73
        break;
1179
      } else {
1180
379
        vr1.push_back(n[i]);
1181
      }
1182
    }
1183
212
    if(flag) {
1184
139
      r1 = d_emptySingleton;
1185
139
      r2 = n;
1186
    }
1187
  }
1188
124
  else if (nk == REGEXP_UNION)
1189
  {
1190
126
    std::vector<Node> vr1, vr2;
1191
213
    for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1192
300
      Node t1, t2;
1193
150
      convert2(cnt, n[i], t1, t2);
1194
150
      vr1.push_back(t1);
1195
150
      vr2.push_back(t2);
1196
    }
1197
63
    r1 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr1);
1198
63
    r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr2);
1199
  }
1200
61
  else if (nk == STRING_TO_REGEXP || nk == REGEXP_SIGMA || nk == REGEXP_RANGE
1201
           || nk == REGEXP_COMPLEMENT || nk == REGEXP_LOOP)
1202
  {
1203
    // this leaves n unchanged
1204
61
    r1 = d_emptySingleton;
1205
61
    r2 = n;
1206
  }
1207
  else
1208
  {
1209
    //is it possible?
1210
    Unreachable();
1211
  }
1212
}
1213
1214
317
Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > cache, unsigned cnt ) {
1215
  //Assert(checkConstRegExp(r1) && checkConstRegExp(r2));
1216
317
  if(r1 > r2) {
1217
60
    TNode tmpNode = r1;
1218
30
    r1 = r2;
1219
30
    r2 = tmpNode;
1220
  }
1221
317
  Trace("regexp-int") << "Starting INTERSECT(" << cnt << "):\n  "<< mkString(r1) << ",\n  " << mkString(r2) << std::endl;
1222
634
  std::pair < Node, Node > p(r1, r2);
1223
317
  std::map < PairNodes, Node >::const_iterator itr = d_inter_cache.find(p);
1224
317
  Node rNode;
1225
317
  if(itr != d_inter_cache.end()) {
1226
38
    rNode = itr->second;
1227
  } else {
1228
279
    Trace("regexp-int-debug") << " ... not in cache" << std::endl;
1229
279
    if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) {
1230
      Trace("regexp-int-debug") << " ... one is empty set" << std::endl;
1231
      rNode = d_emptyRegexp;
1232
279
    } else if(r1 == d_emptySingleton || r2 == d_emptySingleton) {
1233
24
      Trace("regexp-int-debug") << " ... one is empty singleton" << std::endl;
1234
48
      Node exp;
1235
24
      int r = delta((r1 == d_emptySingleton ? r2 : r1), exp);
1236
24
      if(r == 0) {
1237
        //TODO: variable
1238
        Unreachable();
1239
24
      } else if(r == 1) {
1240
18
        rNode = d_emptySingleton;
1241
      } else {
1242
6
        rNode = d_emptyRegexp;
1243
      }
1244
255
    } else if(r1 == r2) {
1245
      Trace("regexp-int-debug") << " ... equal" << std::endl;
1246
      rNode = r1; //convert1(cnt, r1);
1247
    } else {
1248
255
      Trace("regexp-int-debug") << " ... normal checking" << std::endl;
1249
255
      std::map< PairNodes, Node >::const_iterator itrcache = cache.find(p);
1250
255
      if(itrcache != cache.end()) {
1251
57
        rNode = itrcache->second;
1252
      } else {
1253
198
        Trace("regexp-int-debug") << " ... normal without cache" << std::endl;
1254
396
        std::vector<unsigned> cset;
1255
396
        std::set<unsigned> cset1, cset2;
1256
396
        std::set< Node > vset1, vset2;
1257
198
        firstChars(r1, cset1, vset1);
1258
198
        firstChars(r2, cset2, vset2);
1259
198
        Trace("regexp-int-debug") << " ... got fset" << std::endl;
1260
198
        std::set_intersection(cset1.begin(), cset1.end(), cset2.begin(), cset2.end(),
1261
198
             std::inserter(cset, cset.begin()));
1262
396
        std::vector< Node > vec_nodes;
1263
396
        Node delta_exp;
1264
198
        Trace("regexp-int-debug") << " ... try delta" << std::endl;
1265
198
        int flag = delta(r1, delta_exp);
1266
198
        int flag2 = delta(r2, delta_exp);
1267
198
        Trace("regexp-int-debug") << " ... delta1=" << flag << ", delta2=" << flag2 << std::endl;
1268
198
        if(flag != 2 && flag2 != 2) {
1269
21
          if(flag == 1 && flag2 == 1) {
1270
21
            vec_nodes.push_back(d_emptySingleton);
1271
          } else {
1272
            //TODO: variable
1273
            Unreachable();
1274
          }
1275
        }
1276
198
        if(Trace.isOn("regexp-int-debug")) {
1277
          Trace("regexp-int-debug") << "Try CSET(" << cset.size() << ") = {";
1278
          for (std::vector<unsigned>::const_iterator it = cset.begin();
1279
               it != cset.end();
1280
               ++it)
1281
          {
1282
            if (it != cset.begin())
1283
            {
1284
              Trace("regexp-int-debug") << ", ";
1285
            }
1286
            Trace("regexp-int-debug") << (*it);
1287
          }
1288
          Trace("regexp-int-debug") << std::endl;
1289
        }
1290
396
        std::map< PairNodes, Node > cacheX;
1291
454
        for (std::vector<unsigned>::const_iterator it = cset.begin();
1292
454
             it != cset.end();
1293
             ++it)
1294
        {
1295
512
          std::vector<unsigned> cvec;
1296
256
          cvec.push_back(*it);
1297
512
          String c(cvec);
1298
256
          Trace("regexp-int-debug") << "Try character " << c << " ... " << std::endl;
1299
512
          Node r1l = derivativeSingle(r1, c);
1300
512
          Node r2l = derivativeSingle(r2, c);
1301
256
          Trace("regexp-int-debug") << "  ... got partial(r1,c) = " << mkString(r1l) << std::endl;
1302
256
          Trace("regexp-int-debug") << "  ... got partial(r2,c) = " << mkString(r2l) << std::endl;
1303
512
          Node rt;
1304
1305
256
          if(r1l > r2l) {
1306
68
            Node tnode = r1l;
1307
34
            r1l = r2l; r2l = tnode;
1308
          }
1309
512
          PairNodes pp(r1l, r2l);
1310
256
          std::map< PairNodes, Node >::const_iterator itr2 = cacheX.find(pp);
1311
256
          if(itr2 != cacheX.end()) {
1312
8
            rt = itr2->second;
1313
          } else {
1314
496
            std::map< PairNodes, Node > cache2(cache);
1315
496
            cache2[p] = NodeManager::currentNM()->mkNode(
1316
                kind::REGEXP_RV,
1317
496
                NodeManager::currentNM()->mkConst(cvc5::Rational(cnt)));
1318
248
            rt = intersectInternal(r1l, r2l, cache2, cnt+1);
1319
248
            cacheX[ pp ] = rt;
1320
          }
1321
1322
768
          rt = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
1323
512
            NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c)), rt) );
1324
1325
256
          Trace("regexp-int-debug") << "  ... got p(r1,c) && p(r2,c) = " << mkString(rt) << std::endl;
1326
256
          vec_nodes.push_back(rt);
1327
        }
1328
198
        rNode = Rewriter::rewrite( vec_nodes.size()==0 ? d_emptyRegexp : vec_nodes.size()==1 ? vec_nodes[0] :
1329
            NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes) );
1330
198
        rNode = convert1(cnt, rNode);
1331
198
        rNode = Rewriter::rewrite( rNode );
1332
      }
1333
    }
1334
279
    Trace("regexp-int-debug") << "  ... try testing no RV of " << mkString(rNode) << std::endl;
1335
279
    if (!expr::hasSubtermKind(REGEXP_RV, rNode))
1336
    {
1337
115
      d_inter_cache[p] = rNode;
1338
    }
1339
  }
1340
317
  Trace("regexp-int") << "End(" << cnt << ") of INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl;
1341
634
  return rNode;
1342
}
1343
1344
138
Node RegExpOpr::removeIntersection(Node r) {
1345
138
  Assert(checkConstRegExp(r));
1346
138
  NodeManager* nm = NodeManager::currentNM();
1347
276
  std::unordered_map<TNode, Node> visited;
1348
138
  std::unordered_map<TNode, Node>::iterator it;
1349
276
  std::vector<TNode> visit;
1350
276
  TNode cur;
1351
138
  visit.push_back(r);
1352
1948
  do
1353
  {
1354
2086
    cur = visit.back();
1355
2086
    visit.pop_back();
1356
2086
    it = visited.find(cur);
1357
1358
2086
    if (it == visited.end())
1359
    {
1360
986
      visited[cur] = Node::null();
1361
986
      visit.push_back(cur);
1362
1948
      for (const Node& cn : cur)
1363
      {
1364
962
        visit.push_back(cn);
1365
      }
1366
    }
1367
1100
    else if (it->second.isNull())
1368
    {
1369
986
      Kind ck = cur.getKind();
1370
1972
      Node ret;
1371
986
      bool childChanged = false;
1372
1972
      std::vector<Node> children;
1373
986
      if (cur.getMetaKind() == metakind::PARAMETERIZED)
1374
      {
1375
        children.push_back(cur.getOperator());
1376
      }
1377
1948
      for (const Node& cn : cur)
1378
      {
1379
962
        it = visited.find(cn);
1380
962
        Assert(it != visited.end());
1381
962
        Assert(!it->second.isNull());
1382
962
        if (ck == REGEXP_INTER)
1383
        {
1384
4
          if (ret.isNull())
1385
          {
1386
2
            ret = it->second;
1387
          }
1388
          else
1389
          {
1390
2
            ret = intersect(ret, it->second);
1391
          }
1392
        }
1393
        else
1394
        {
1395
          // will construct below
1396
958
          childChanged = childChanged || cn != it->second;
1397
958
          children.push_back(it->second);
1398
        }
1399
      }
1400
986
      if (ck != REGEXP_INTER)
1401
      {
1402
984
        if (childChanged)
1403
        {
1404
2
          ret = nm->mkNode(cur.getKind(), children);
1405
        }
1406
        else
1407
        {
1408
982
          ret = cur;
1409
        }
1410
      }
1411
986
      visited[cur] = ret;
1412
    }
1413
2086
  } while (!visit.empty());
1414
138
  Assert(visited.find(r) != visited.end());
1415
138
  Assert(!visited.find(r)->second.isNull());
1416
138
  if (Trace.isOn("regexp-intersect"))
1417
  {
1418
    Trace("regexp-intersect") << "Remove INTERSECTION( " << mkString(r)
1419
                              << " ) = " << mkString(visited[r]) << std::endl;
1420
  }
1421
276
  return visited[r];
1422
}
1423
1424
69
Node RegExpOpr::intersect(Node r1, Node r2)
1425
{
1426
69
  if (!checkConstRegExp(r1) || !checkConstRegExp(r2))
1427
  {
1428
    return Node::null();
1429
  }
1430
138
  Node rr1 = removeIntersection(r1);
1431
138
  Node rr2 = removeIntersection(r2);
1432
138
  std::map<PairNodes, Node> cache;
1433
69
  Trace("regexp-intersect-node") << "Intersect (1): " << rr1 << std::endl;
1434
69
  Trace("regexp-intersect-node") << "Intersect (2): " << rr2 << std::endl;
1435
138
  Trace("regexp-intersect") << "Start INTERSECTION(\n\t" << mkString(r1)
1436
69
                            << ",\n\t" << mkString(r2) << ")" << std::endl;
1437
138
  Node retNode = intersectInternal(rr1, rr2, cache, 1);
1438
138
  Trace("regexp-intersect")
1439
138
      << "End INTERSECTION(\n\t" << mkString(r1) << ",\n\t" << mkString(r2)
1440
69
      << ") =\n\t" << mkString(retNode) << std::endl;
1441
69
  Trace("regexp-intersect-node") << "Intersect finished." << std::endl;
1442
69
  return retNode;
1443
}
1444
1445
//printing
1446
19003
std::string RegExpOpr::niceChar(Node r) {
1447
19003
  if(r.isConst()) {
1448
37778
    std::string s = r.getConst<String>().toString();
1449
18889
    return s == "." ? "\\." : s;
1450
  } else {
1451
228
    std::string ss = "$" + r.toString();
1452
114
    return ss;
1453
  }
1454
}
1455
36511
std::string RegExpOpr::mkString( Node r ) {
1456
36511
  std::string retStr;
1457
36511
  if(r.isNull()) {
1458
    retStr = "\\E";
1459
  } else {
1460
36511
    int k = r.getKind();
1461
36511
    switch( k ) {
1462
472
      case kind::REGEXP_EMPTY: {
1463
472
        retStr += "\\E";
1464
472
        break;
1465
      }
1466
370
      case kind::REGEXP_SIGMA: {
1467
370
        retStr += ".";
1468
370
        break;
1469
      }
1470
18445
      case kind::STRING_TO_REGEXP: {
1471
36890
        std::string tmp( niceChar( r[0] ) );
1472
18445
        retStr += tmp.size()==1? tmp : "(" + tmp + ")";
1473
18445
        break;
1474
      }
1475
5381
      case kind::REGEXP_CONCAT: {
1476
5381
        retStr += "(";
1477
18596
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
1478
          //if(i != 0) retStr += ".";
1479
13215
          retStr += mkString( r[i] );
1480
        }
1481
5381
        retStr += ")";
1482
5381
        break;
1483
      }
1484
3888
      case kind::REGEXP_UNION: {
1485
3888
        retStr += "(";
1486
12366
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
1487
8478
          if(i != 0) retStr += "|";
1488
8478
          retStr += mkString( r[i] );
1489
        }
1490
3888
        retStr += ")";
1491
3888
        break;
1492
      }
1493
52
      case kind::REGEXP_INTER: {
1494
52
        retStr += "(";
1495
156
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
1496
104
          if(i != 0) retStr += "&";
1497
104
          retStr += mkString( r[i] );
1498
        }
1499
52
        retStr += ")";
1500
52
        break;
1501
      }
1502
7035
      case kind::REGEXP_STAR: {
1503
7035
        retStr += mkString( r[0] );
1504
7035
        retStr += "*";
1505
7035
        break;
1506
      }
1507
      case kind::REGEXP_PLUS: {
1508
        retStr += mkString( r[0] );
1509
        retStr += "+";
1510
        break;
1511
      }
1512
      case kind::REGEXP_OPT: {
1513
        retStr += mkString( r[0] );
1514
        retStr += "?";
1515
        break;
1516
      }
1517
279
      case kind::REGEXP_RANGE: {
1518
279
        retStr += "[";
1519
279
        retStr += niceChar( r[0] );
1520
279
        retStr += "-";
1521
279
        retStr += niceChar( r[1] );
1522
279
        retStr += "]";
1523
279
        break;
1524
      }
1525
      case kind::REGEXP_LOOP: {
1526
        uint32_t l = utils::getLoopMinOccurrences(r);
1527
        std::stringstream ss;
1528
        ss << "(" << mkString(r[0]) << "){" << l << ",";
1529
        if(r.getNumChildren() == 3) {
1530
          uint32_t u = utils::getLoopMaxOccurrences(r);
1531
          ss << u;
1532
        }
1533
        ss << "}";
1534
        retStr += ss.str();
1535
        break;
1536
      }
1537
532
      case kind::REGEXP_RV: {
1538
532
        retStr += "<";
1539
532
        retStr += r[0].getConst<Rational>().getNumerator().toString();
1540
532
        retStr += ">";
1541
532
        break;
1542
      }
1543
54
      case REGEXP_COMPLEMENT:
1544
      {
1545
54
        retStr += "^(";
1546
54
        retStr += mkString(r[0]);
1547
54
        retStr += ")";
1548
54
        break;
1549
      }
1550
3
      default:
1551
      {
1552
6
        std::stringstream ss;
1553
3
        ss << r;
1554
3
        retStr = ss.str();
1555
3
        Assert(!utils::isRegExpKind(r.getKind()));
1556
3
        break;
1557
      }
1558
    }
1559
  }
1560
1561
36511
  return retStr;
1562
}
1563
1564
1150
bool RegExpOpr::regExpIncludes(Node r1, Node r2)
1565
{
1566
1150
  const auto& it = d_inclusionCache.find(std::make_pair(r1, r2));
1567
1150
  if (it != d_inclusionCache.end())
1568
  {
1569
628
    return (*it).second;
1570
  }
1571
522
  bool result = RegExpEntail::regExpIncludes(r1, r2);
1572
522
  d_inclusionCache[std::make_pair(r1, r2)] = result;
1573
522
  return result;
1574
}
1575
1576
}  // namespace strings
1577
}  // namespace theory
1578
29322
}  // namespace cvc5