GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/regexp_operation.cpp Lines: 698 977 71.4 %
Date: 2021-09-29 Branches: 1485 4104 36.2 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Tianyi Liang, Andrew Reynolds, Mathias Preiner
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Symbolic Regular Expresion Operations
14
 */
15
16
#include "theory/strings/regexp_operation.h"
17
18
#include <sstream>
19
20
#include "expr/node_algorithm.h"
21
#include "options/strings_options.h"
22
#include "theory/rewriter.h"
23
#include "theory/strings/regexp_entail.h"
24
#include "theory/strings/theory_strings_utils.h"
25
#include "theory/strings/word.h"
26
#include "util/regexp.h"
27
28
using namespace cvc5::kind;
29
30
namespace cvc5 {
31
namespace theory {
32
namespace strings {
33
34
6275
RegExpOpr::RegExpOpr(SkolemCache* sc)
35
    : d_true(NodeManager::currentNM()->mkConst(true)),
36
      d_false(NodeManager::currentNM()->mkConst(false)),
37
      d_emptyRegexp(NodeManager::currentNM()->mkNode(kind::REGEXP_EMPTY,
38
12550
                                                     std::vector<Node>{})),
39
12550
      d_zero(NodeManager::currentNM()->mkConst(::cvc5::Rational(0))),
40
12550
      d_one(NodeManager::currentNM()->mkConst(::cvc5::Rational(1))),
41
      d_sigma(NodeManager::currentNM()->mkNode(kind::REGEXP_SIGMA,
42
12550
                                               std::vector<Node>{})),
43
      d_sigma_star(
44
          NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, d_sigma)),
45
56475
      d_sc(sc)
46
{
47
6275
  d_emptyString = Word::mkEmptyWord(NodeManager::currentNM()->stringType());
48
49
6275
  d_emptySingleton =
50
12550
      NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, d_emptyString);
51
6275
  d_lastchar = utils::getAlphabetCardinality() - 1;
52
6275
}
53
54
6272
RegExpOpr::~RegExpOpr() {}
55
56
628
bool RegExpOpr::checkConstRegExp( Node r ) {
57
628
  Assert(r.getType().isRegExp());
58
1256
  Trace("strings-regexp-cstre")
59
628
      << "RegExpOpr::checkConstRegExp /" << mkString(r) << "/" << std::endl;
60
628
  RegExpConstType rct = getRegExpConstType(r);
61
628
  return rct != RE_C_VARIABLE;
62
}
63
64
1133
RegExpConstType RegExpOpr::getRegExpConstType(Node r)
65
{
66
1133
  Assert(r.getType().isRegExp());
67
1133
  std::unordered_map<Node, RegExpConstType>::iterator it;
68
2266
  std::vector<TNode> visit;
69
2266
  TNode cur;
70
1133
  visit.push_back(r);
71
1482
  do
72
  {
73
2615
    cur = visit.back();
74
2615
    visit.pop_back();
75
2615
    it = d_constCache.find(cur);
76
77
2615
    Kind ck = cur.getKind();
78
2615
    if (it == d_constCache.end())
79
    {
80
755
      if (ck == STRING_TO_REGEXP)
81
      {
82
408
        Node tmp = Rewriter::rewrite(cur[0]);
83
204
        d_constCache[cur] =
84
204
            tmp.isConst() ? RE_C_CONRETE_CONSTANT : RE_C_VARIABLE;
85
      }
86
551
      else if (ck == REGEXP_SIGMA || ck == REGEXP_RANGE)
87
      {
88
75
        d_constCache[cur] = RE_C_CONSTANT;
89
      }
90
476
      else if (!utils::isRegExpKind(ck))
91
      {
92
        // non-regular expression applications, e.g. function applications
93
        // with regular expression return type are treated as variables.
94
2
        d_constCache[cur] = RE_C_VARIABLE;
95
      }
96
      else
97
      {
98
474
        d_constCache[cur] = RE_C_UNKNOWN;
99
474
        visit.push_back(cur);
100
474
        visit.insert(visit.end(), cur.begin(), cur.end());
101
      }
102
    }
103
1860
    else if (it->second == RE_C_UNKNOWN)
104
    {
105
474
      RegExpConstType ret = ck == REGEXP_COMPLEMENT ? RE_C_CONSTANT : RE_C_CONRETE_CONSTANT;
106
1482
      for (const Node& cn : cur)
107
      {
108
1008
        it = d_constCache.find(cn);
109
1008
        Assert(it != d_constCache.end());
110
1008
        if (it->second > ret)
111
        {
112
220
          ret = it->second;
113
        }
114
      }
115
474
      d_constCache[cur] = ret;
116
    }
117
2615
  } while (!visit.empty());
118
1133
  Assert(d_constCache.find(r) != d_constCache.end());
119
2266
  return d_constCache[r];
120
}
121
122
// 0-unknown, 1-yes, 2-no
123
943
int RegExpOpr::delta( Node r, Node &exp ) {
124
  std::map<Node, std::pair<int, Node> >::const_iterator itd =
125
943
      d_delta_cache.find(r);
126
943
  if (itd != d_delta_cache.end())
127
  {
128
    // already computed
129
682
    exp = itd->second.second;
130
682
    return itd->second.first;
131
  }
132
261
  Trace("regexp-delta") << "RegExpOpr::delta: " << r << std::endl;
133
261
  int ret = 0;
134
261
  NodeManager* nm = NodeManager::currentNM();
135
261
  Kind k = r.getKind();
136
261
  switch (k)
137
  {
138
    case REGEXP_EMPTY:
139
    case REGEXP_SIGMA:
140
    case REGEXP_RANGE:
141
    {
142
      // does not contain empty string
143
      ret = 2;
144
      break;
145
    }
146
67
    case STRING_TO_REGEXP:
147
    {
148
134
      Node tmp = Rewriter::rewrite(r[0]);
149
67
      if (tmp.isConst())
150
      {
151
66
        if (tmp == d_emptyString)
152
        {
153
6
          ret = 1;
154
        } else {
155
60
          ret = 2;
156
        }
157
      }
158
      else
159
      {
160
1
        ret = 0;
161
1
        if (tmp.getKind() == STRING_CONCAT)
162
        {
163
          for (const Node& tmpc : tmp)
164
          {
165
            if (tmpc.isConst())
166
            {
167
              ret = 2;
168
              break;
169
            }
170
          }
171
        }
172
1
        if (ret == 0)
173
        {
174
1
          exp = r[0].eqNode(d_emptyString);
175
        }
176
      }
177
67
      break;
178
    }
179
141
    case REGEXP_CONCAT:
180
    case REGEXP_UNION:
181
    case REGEXP_INTER:
182
    {
183
      // has there been an unknown child?
184
141
      bool hasUnknownChild = false;
185
282
      std::vector<Node> vec;
186
141
      int checkTmp = k == REGEXP_UNION ? 1 : 2;
187
141
      int retTmp = k == REGEXP_UNION ? 2 : 1;
188
241
      for (const Node& rc : r)
189
      {
190
320
        Node exp2;
191
220
        int tmp = delta(rc, exp2);
192
220
        if (tmp == checkTmp)
193
        {
194
          // return is implied by the child's return value
195
120
          ret = checkTmp;
196
120
          break;
197
        }
198
100
        else if (tmp == 0)
199
        {
200
          // unknown if child contains empty string
201
1
          Assert(!exp2.isNull());
202
1
          vec.push_back(exp2);
203
1
          hasUnknownChild = true;
204
        }
205
      }
206
141
      if (ret != checkTmp)
207
      {
208
21
        if (!hasUnknownChild)
209
        {
210
21
          ret = retTmp;
211
        } else {
212
          Kind kr = k == REGEXP_UNION ? OR : AND;
213
          exp = vec.size() == 1 ? vec[0] : nm->mkNode(kr, vec);
214
        }
215
      }
216
141
      break;
217
    }
218
52
    case REGEXP_STAR:
219
    case REGEXP_OPT:
220
    {
221
      // contains empty string
222
52
      ret = 1;
223
52
      break;
224
    }
225
    case REGEXP_PLUS:
226
    {
227
      ret = delta(r[0], exp);
228
      break;
229
    }
230
    case REGEXP_LOOP:
231
    {
232
      uint32_t lo = utils::getLoopMinOccurrences(r);
233
      if (lo == 0)
234
      {
235
        ret = 1;
236
      }
237
      else
238
      {
239
        ret = delta(r[0], exp);
240
      }
241
      break;
242
    }
243
1
    case REGEXP_COMPLEMENT:
244
    {
245
1
      int tmp = delta(r[0], exp);
246
      // flip the result if known
247
1
      ret = tmp == 0 ? 0 : (3 - tmp);
248
1
      exp = exp.isNull() ? exp : exp.negate();
249
1
      break;
250
    }
251
    default:
252
    {
253
      Assert(!utils::isRegExpKind(k));
254
      break;
255
    }
256
  }
257
261
  if (!exp.isNull())
258
  {
259
1
    exp = Rewriter::rewrite(exp);
260
  }
261
522
  std::pair<int, Node> p(ret, exp);
262
261
  d_delta_cache[r] = p;
263
522
  Trace("regexp-delta") << "RegExpOpr::delta returns " << ret << " for " << r
264
261
                        << ", expr = " << exp << std::endl;
265
261
  return ret;
266
}
267
268
// 0-unknown, 1-yes, 2-no
269
6
int RegExpOpr::derivativeS(Node r, cvc5::String c, Node& retNode)
270
{
271
6
  Assert(c.size() < 2);
272
6
  Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl;
273
274
6
  int ret = 1;
275
6
  retNode = d_emptyRegexp;
276
6
  NodeManager* nm = NodeManager::currentNM();
277
6
  SkolemManager* sm = nm->getSkolemManager();
278
279
12
  PairNodeStr dv = std::make_pair( r, c );
280
6
  if( d_deriv_cache.find( dv ) != d_deriv_cache.end() ) {
281
1
    retNode = d_deriv_cache[dv].first;
282
1
    ret = d_deriv_cache[dv].second;
283
  }
284
5
  else if (c.empty())
285
  {
286
    Node expNode;
287
    ret = delta( r, expNode );
288
    if(ret == 0) {
289
      retNode = NodeManager::currentNM()->mkNode(kind::ITE, expNode, r, d_emptyRegexp);
290
    } else if(ret == 1) {
291
      retNode = r;
292
    }
293
    std::pair< Node, int > p(retNode, ret);
294
    d_deriv_cache[dv] = p;
295
  } else {
296
5
    switch( r.getKind() ) {
297
      case kind::REGEXP_EMPTY: {
298
        ret = 2;
299
        break;
300
      }
301
      case kind::REGEXP_SIGMA: {
302
        retNode = d_emptySingleton;
303
        break;
304
      }
305
      case kind::REGEXP_RANGE: {
306
        cvc5::String a = r[0].getConst<String>();
307
        cvc5::String b = r[1].getConst<String>();
308
        retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp;
309
        break;
310
      }
311
2
      case kind::STRING_TO_REGEXP: {
312
4
        Node tmp = Rewriter::rewrite(r[0]);
313
2
        if(tmp.isConst()) {
314
2
          if(tmp == d_emptyString) {
315
1
            ret = 2;
316
          } else {
317
1
            if (tmp.getConst<String>().front() == c.front())
318
            {
319
              retNode =
320
                  nm->mkNode(STRING_TO_REGEXP,
321
                             Word::getLength(tmp) == 1 ? d_emptyString
322
                                                       : Word::substr(tmp, 1));
323
            } else {
324
1
              ret = 2;
325
            }
326
          }
327
        } else {
328
          ret = 0;
329
          Node rest;
330
          if(tmp.getKind() == kind::STRING_CONCAT) {
331
            Node t2 = tmp[0];
332
            if(t2.isConst()) {
333
              if (t2.getConst<String>().front() == c.front())
334
              {
335
                Node n = nm->mkNode(STRING_TO_REGEXP,
336
                                    Word::getLength(tmp) == 1
337
                                        ? d_emptyString
338
                                        : Word::substr(tmp, 1));
339
                std::vector< Node > vec_nodes;
340
                vec_nodes.push_back(n);
341
                for(unsigned i=1; i<tmp.getNumChildren(); i++) {
342
                  vec_nodes.push_back(tmp[i]);
343
                }
344
                retNode = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes);
345
                ret = 1;
346
              } else {
347
                ret = 2;
348
              }
349
            } else {
350
              tmp = tmp[0];
351
              std::vector< Node > vec_nodes;
352
              for(unsigned i=1; i<tmp.getNumChildren(); i++) {
353
                vec_nodes.push_back(tmp[i]);
354
              }
355
              rest = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes);
356
            }
357
          }
358
          if(ret == 0) {
359
            Node sk =
360
                sm->mkDummySkolem("rsp", nm->stringType(), "Split RegExp");
361
            retNode = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, sk);
362
            if(!rest.isNull()) {
363
              retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, retNode, rest));
364
            }
365
            Node exp = tmp.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT,
366
                        NodeManager::currentNM()->mkConst(c), sk));
367
            retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, exp, retNode, d_emptyRegexp));
368
          }
369
        }
370
2
        break;
371
      }
372
1
      case kind::REGEXP_CONCAT: {
373
2
        std::vector< Node > vec_nodes;
374
2
        std::vector< Node > delta_nodes;
375
2
        Node dnode = d_true;
376
3
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
377
4
          Node dc;
378
4
          Node exp2;
379
2
          int rt = derivativeS(r[i], c, dc);
380
2
          if(rt != 2) {
381
            if(rt == 0) {
382
              ret = 0;
383
            }
384
            std::vector< Node > vec_nodes2;
385
            if(dc != d_emptySingleton) {
386
              vec_nodes2.push_back( dc );
387
            }
388
            for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
389
              if(r[j] != d_emptySingleton) {
390
                vec_nodes2.push_back( r[j] );
391
              }
392
            }
393
            Node tmp = vec_nodes2.size()==0 ? d_emptySingleton :
394
              vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
395
            if(dnode != d_true) {
396
              tmp = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, dnode, tmp, d_emptyRegexp));
397
              ret = 0;
398
            }
399
            if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
400
              vec_nodes.push_back( tmp );
401
            }
402
          }
403
4
          Node exp3;
404
2
          int rt2 = delta( r[i], exp3 );
405
2
          if( rt2 == 0 ) {
406
            dnode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::AND, dnode, exp3));
407
2
          } else if( rt2 == 2 ) {
408
            break;
409
          }
410
        }
411
1
        retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
412
              ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
413
1
        if(retNode == d_emptyRegexp) {
414
1
          ret = 2;
415
        }
416
1
        break;
417
      }
418
1
      case kind::REGEXP_UNION: {
419
2
        std::vector< Node > vec_nodes;
420
3
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
421
4
          Node dc;
422
2
          int rt = derivativeS(r[i], c, dc);
423
2
          if(rt == 0) {
424
            ret = 0;
425
          }
426
2
          if(rt != 2) {
427
            if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
428
              vec_nodes.push_back( dc );
429
            }
430
          }
431
          //Trace("regexp-derive") << "RegExp-derive OR R[" << i << "] " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
432
        }
433
1
        retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
434
              ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
435
1
        if(retNode == d_emptyRegexp) {
436
1
          ret = 2;
437
        }
438
1
        break;
439
      }
440
      case kind::REGEXP_INTER: {
441
        bool flag = true;
442
        bool flag_sg = false;
443
        std::vector< Node > vec_nodes;
444
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
445
          Node dc;
446
          int rt = derivativeS(r[i], c, dc);
447
          if(rt == 0) {
448
            ret = 0;
449
          } else if(rt == 2) {
450
            flag = false;
451
            break;
452
          }
453
          if(dc == d_sigma_star) {
454
            flag_sg = true;
455
          } else {
456
            if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
457
              vec_nodes.push_back( dc );
458
            }
459
          }
460
        }
461
        if(flag) {
462
          if(vec_nodes.size() == 0 && flag_sg) {
463
            retNode = d_sigma_star;
464
          } else {
465
            retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
466
                  ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
467
            if(retNode == d_emptyRegexp) {
468
              ret = 2;
469
            }
470
          }
471
        } else {
472
          retNode = d_emptyRegexp;
473
          ret = 2;
474
        }
475
        break;
476
      }
477
1
      case kind::REGEXP_STAR: {
478
2
        Node dc;
479
1
        ret = derivativeS(r[0], c, dc);
480
1
        retNode = dc==d_emptyRegexp ? dc : (dc==d_emptySingleton ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r ));
481
1
        break;
482
      }
483
      case kind::REGEXP_LOOP: {
484
        uint32_t l = utils::getLoopMinOccurrences(r);
485
        uint32_t u = utils::getLoopMaxOccurrences(r);
486
        if (l == u && l == 0)
487
        {
488
          ret = 2;
489
          //retNode = d_emptyRegexp;
490
        } else {
491
          Node dc;
492
          ret = derivativeS(r[0], c, dc);
493
          if(dc==d_emptyRegexp) {
494
            Node lop = nm->mkConst(RegExpLoop(l == 0 ? 0 : (l - 1), u - 1));
495
            Node r2 = nm->mkNode(REGEXP_LOOP, lop, r[0]);
496
            retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 );
497
          } else {
498
            retNode = d_emptyRegexp;
499
          }
500
        }
501
        break;
502
      }
503
      case kind::REGEXP_COMPLEMENT:
504
      {
505
        // don't know result
506
        return 0;
507
        break;
508
      }
509
      default: {
510
        Assert(!utils::isRegExpKind(r.getKind()));
511
        return 0;
512
        break;
513
      }
514
    }
515
5
    if(retNode != d_emptyRegexp) {
516
      retNode = Rewriter::rewrite( retNode );
517
    }
518
10
    std::pair< Node, int > p(retNode, ret);
519
5
    d_deriv_cache[dv] = p;
520
  }
521
522
6
  Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl;
523
6
  return ret;
524
}
525
526
975
Node RegExpOpr::derivativeSingle(Node r, cvc5::String c)
527
{
528
975
  Assert(c.size() < 2);
529
975
  Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl;
530
975
  Node retNode = d_emptyRegexp;
531
1950
  PairNodeStr dv = std::make_pair( r, c );
532
975
  NodeManager* nm = NodeManager::currentNM();
533
975
  if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {
534
418
    retNode = d_dv_cache[dv];
535
  }
536
557
  else if (c.empty())
537
  {
538
    Node exp;
539
    int tmp = delta( r, exp );
540
    if(tmp == 0) {
541
      // TODO variable
542
      retNode = d_emptyRegexp;
543
    } else if(tmp == 1) {
544
      retNode = r;
545
    } else {
546
      retNode = d_emptyRegexp;
547
    }
548
  } else {
549
557
    Kind k = r.getKind();
550
557
    switch( k ) {
551
      case kind::REGEXP_EMPTY: {
552
        retNode = d_emptyRegexp;
553
        break;
554
      }
555
      case kind::REGEXP_SIGMA: {
556
        retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
557
        break;
558
      }
559
      case kind::REGEXP_RANGE: {
560
        cvc5::String a = r[0].getConst<String>();
561
        cvc5::String b = r[1].getConst<String>();
562
        retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp;
563
        break;
564
      }
565
134
      case kind::STRING_TO_REGEXP: {
566
134
        if(r[0].isConst()) {
567
134
          if(r[0] == d_emptyString) {
568
11
            retNode = d_emptyRegexp;
569
          } else {
570
123
            if (r[0].getConst<String>().front() == c.front())
571
            {
572
74
              retNode = nm->mkNode(STRING_TO_REGEXP,
573
148
                                   Word::getLength(r[0]) == 1
574
148
                                       ? d_emptyString
575
                                       : Word::substr(r[0], 1));
576
            } else {
577
49
              retNode = d_emptyRegexp;
578
            }
579
          }
580
        } else {
581
          // TODO variable
582
          retNode = d_emptyRegexp;
583
        }
584
134
        break;
585
      }
586
214
      case kind::REGEXP_CONCAT: {
587
428
        Node rees = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
588
428
        std::vector< Node > vec_nodes;
589
369
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
590
502
          Node dc = derivativeSingle(r[i], c);
591
347
          if(dc != d_emptyRegexp) {
592
420
            std::vector< Node > vec_nodes2;
593
210
            if(dc != rees) {
594
118
              vec_nodes2.push_back( dc );
595
            }
596
471
            for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
597
261
              if(r[j] != rees) {
598
261
                vec_nodes2.push_back( r[j] );
599
              }
600
            }
601
210
            Node tmp = vec_nodes2.size()==0 ? rees :
602
420
              vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
603
210
            if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
604
208
              vec_nodes.push_back( tmp );
605
            }
606
          }
607
502
          Node exp;
608
347
          if( delta( r[i], exp ) != 1 ) {
609
192
            break;
610
          }
611
        }
612
582
        retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
613
368
              ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
614
214
        break;
615
      }
616
99
      case kind::REGEXP_UNION: {
617
198
        std::vector< Node > vec_nodes;
618
319
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
619
440
          Node dc = derivativeSingle(r[i], c);
620
220
          if(dc != d_emptyRegexp) {
621
91
            if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
622
89
              vec_nodes.push_back( dc );
623
            }
624
          }
625
          //Trace("regexp-derive") << "RegExp-derive OR R[" << i << "] /" << mkString(r[i]) << "/ returns /" << mkString(dc) << "/" << std::endl;
626
        }
627
259
        retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
628
160
              ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
629
99
        break;
630
      }
631
      case kind::REGEXP_INTER: {
632
        bool flag = true;
633
        bool flag_sg = false;
634
        std::vector< Node > vec_nodes;
635
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
636
          Node dc = derivativeSingle(r[i], c);
637
          if(dc != d_emptyRegexp) {
638
            if(dc == d_sigma_star) {
639
              flag_sg = true;
640
            } else {
641
              if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
642
                vec_nodes.push_back( dc );
643
              }
644
            }
645
          } else {
646
            flag = false;
647
            break;
648
          }
649
        }
650
        if(flag) {
651
          if(vec_nodes.size() == 0 && flag_sg) {
652
            retNode = d_sigma_star;
653
          } else {
654
            retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
655
                  ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
656
          }
657
        } else {
658
          retNode = d_emptyRegexp;
659
        }
660
        break;
661
      }
662
110
      case kind::REGEXP_STAR: {
663
220
        Node dc = derivativeSingle(r[0], c);
664
110
        if(dc != d_emptyRegexp) {
665
80
          retNode = dc==d_emptySingleton? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
666
        } else {
667
30
          retNode = d_emptyRegexp;
668
        }
669
110
        break;
670
      }
671
      case kind::REGEXP_LOOP: {
672
        uint32_t l = utils::getLoopMinOccurrences(r);
673
        uint32_t u = utils::getLoopMaxOccurrences(r);
674
        if (l == u || l == 0)
675
        {
676
          retNode = d_emptyRegexp;
677
        } else {
678
          Node dc = derivativeSingle(r[0], c);
679
          if(dc != d_emptyRegexp) {
680
            Node lop = nm->mkConst(RegExpLoop(l == 0 ? 0 : (l - 1), u - 1));
681
            Node r2 = nm->mkNode(REGEXP_LOOP, lop, r[0]);
682
            retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 );
683
          } else {
684
            retNode = d_emptyRegexp;
685
          }
686
        }
687
        //Trace("regexp-derive") << "RegExp-derive : REGEXP_LOOP returns /" << mkString(retNode) << "/" << std::endl;
688
        break;
689
      }
690
      case kind::REGEXP_COMPLEMENT:
691
      default: {
692
        Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
693
        Unreachable();
694
        break;
695
      }
696
    }
697
557
    if(retNode != d_emptyRegexp) {
698
429
      retNode = Rewriter::rewrite( retNode );
699
    }
700
557
    d_dv_cache[dv] = retNode;
701
  }
702
975
  Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl;
703
1950
  return retNode;
704
}
705
706
493
void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset)
707
{
708
493
  Trace("regexp-fset") << "Start FSET(" << mkString(r) << ")" << std::endl;
709
  std::map<Node, std::pair<std::set<unsigned>, SetNodes> >::const_iterator itr =
710
493
      d_fset_cache.find(r);
711
493
  if(itr != d_fset_cache.end()) {
712
204
    pcset.insert((itr->second).first.begin(), (itr->second).first.end());
713
204
    pvset.insert((itr->second).second.begin(), (itr->second).second.end());
714
  } else {
715
    // cset is code points
716
578
    std::set<unsigned> cset;
717
578
    SetNodes vset;
718
289
    Kind k = r.getKind();
719
289
    switch( k ) {
720
      case kind::REGEXP_EMPTY: {
721
        break;
722
      }
723
      case kind::REGEXP_RANGE: {
724
        unsigned a = r[0].getConst<String>().front();
725
        unsigned b = r[1].getConst<String>().front();
726
        Assert(a < b);
727
        Assert(b < std::numeric_limits<unsigned>::max());
728
        for (unsigned c = a; c <= b; c++)
729
        {
730
          cset.insert(c);
731
        }
732
        break;
733
      }
734
80
      case kind::STRING_TO_REGEXP: {
735
160
        Node st = Rewriter::rewrite(r[0]);
736
80
        if(st.isConst()) {
737
160
          String s = st.getConst<String>();
738
80
          if(s.size() != 0) {
739
75
            unsigned sc = s.front();
740
75
            cset.insert(sc);
741
          }
742
        }
743
        else if (st.getKind() == kind::STRING_CONCAT)
744
        {
745
          if(st[0].isConst()) {
746
            String s = st[0].getConst<String>();
747
            unsigned sc = s.front();
748
            cset.insert(sc);
749
          } else {
750
            vset.insert( st[0] );
751
          }
752
        }
753
        else
754
        {
755
          vset.insert(st);
756
        }
757
80
        break;
758
      }
759
118
      case kind::REGEXP_CONCAT: {
760
182
        for(unsigned i=0; i<r.getNumChildren(); i++) {
761
172
          firstChars(r[i], cset, vset);
762
236
          Node n = r[i];
763
236
          Node exp;
764
172
          if (delta(n, exp) != 1)
765
          {
766
108
            break;
767
          }
768
        }
769
118
        break;
770
      }
771
40
      case kind::REGEXP_UNION: {
772
128
        for(unsigned i=0; i<r.getNumChildren(); i++) {
773
88
          firstChars(r[i], cset, vset);
774
        }
775
40
        break;
776
      }
777
      case kind::REGEXP_INTER: {
778
        //TODO: Overapproximation for now
779
        //for(unsigned i=0; i<r.getNumChildren(); i++) {
780
        // firstChars(r[i], cset, vset);
781
        //}
782
        firstChars(r[0], cset, vset);
783
        break;
784
      }
785
51
      case kind::REGEXP_STAR: {
786
51
        firstChars(r[0], cset, vset);
787
51
        break;
788
      }
789
      case kind::REGEXP_LOOP: {
790
        firstChars(r[0], cset, vset);
791
        break;
792
      }
793
      case kind::REGEXP_SIGMA:
794
      case kind::REGEXP_COMPLEMENT:
795
      default: {
796
        // we do not expect to call this function on regular expressions that
797
        // aren't a standard regular expression kind. However, if we do, then
798
        // the following code is conservative and says that the current
799
        // regular expression can begin with any character.
800
        Assert(utils::isRegExpKind(k));
801
        // can start with any character
802
        Assert(d_lastchar < std::numeric_limits<unsigned>::max());
803
        for (unsigned i = 0; i <= d_lastchar; i++)
804
        {
805
          cset.insert(i);
806
        }
807
        break;
808
      }
809
    }
810
289
    pcset.insert(cset.begin(), cset.end());
811
289
    pvset.insert(vset.begin(), vset.end());
812
578
    std::pair<std::set<unsigned>, SetNodes> p(cset, vset);
813
289
    d_fset_cache[r] = p;
814
  }
815
816
493
  if(Trace.isOn("regexp-fset")) {
817
    Trace("regexp-fset") << "END FSET(" << mkString(r) << ") = {";
818
    for (std::set<unsigned>::const_iterator it = pcset.begin();
819
         it != pcset.end();
820
         ++it)
821
    {
822
      if (it != pcset.begin())
823
      {
824
        Trace("regexp-fset") << ",";
825
      }
826
      Trace("regexp-fset") << (*it);
827
      }
828
    Trace("regexp-fset") << "}" << std::endl;
829
  }
830
493
}
831
832
352
Node RegExpOpr::simplify(Node t, bool polarity)
833
{
834
704
  Trace("strings-regexp-simpl")
835
352
      << "RegExpOpr::simplify: " << t << ", polarity=" << polarity << std::endl;
836
352
  Assert(t.getKind() == kind::STRING_IN_REGEXP);
837
704
  Node tlit = polarity ? t : t.notNode();
838
704
  Node conc;
839
352
  std::map<Node, Node>::const_iterator itr = d_simpCache.find(tlit);
840
352
  if (itr != d_simpCache.end())
841
  {
842
4
    return itr->second;
843
  }
844
348
  if (polarity)
845
  {
846
530
    std::vector<Node> newSkolems;
847
265
    conc = reduceRegExpPos(tlit, d_sc, newSkolems);
848
  }
849
  else
850
  {
851
    // see if we can use an optimized version of the reduction for re.++.
852
166
    Node r = t[1];
853
83
    if (r.getKind() == REGEXP_CONCAT)
854
    {
855
      // the index we are removing from the RE concatenation
856
68
      size_t index = 0;
857
      // As an optimization to the reduction, if we can determine that
858
      // all strings in the language of R1 have the same length, say n,
859
      // then the conclusion of the reduction is quantifier-free:
860
      //    ~( substr(s,0,n) in R1 ) OR ~( substr(s,n,len(s)-n) in R2)
861
136
      Node reLen = getRegExpConcatFixed(r, index);
862
68
      if (!reLen.isNull())
863
      {
864
62
        conc = reduceRegExpNegConcatFixed(tlit, reLen, index);
865
      }
866
    }
867
83
    if (conc.isNull())
868
    {
869
21
      conc = reduceRegExpNeg(tlit);
870
    }
871
  }
872
348
  d_simpCache[tlit] = conc;
873
696
  Trace("strings-regexp-simpl")
874
348
      << "RegExpOpr::simplify: returns " << conc << std::endl;
875
348
  return conc;
876
}
877
878
68
Node RegExpOpr::getRegExpConcatFixed(Node r, size_t& index)
879
{
880
68
  Assert(r.getKind() == REGEXP_CONCAT);
881
68
  index = 0;
882
136
  Node reLen = RegExpEntail::getFixedLengthForRegexp(r[0]);
883
68
  if (!reLen.isNull())
884
  {
885
47
    return reLen;
886
  }
887
  // try from the opposite end
888
21
  size_t indexE = r.getNumChildren() - 1;
889
21
  reLen = RegExpEntail::getFixedLengthForRegexp(r[indexE]);
890
21
  if (!reLen.isNull())
891
  {
892
15
    index = indexE;
893
15
    return reLen;
894
  }
895
6
  return Node::null();
896
}
897
898
21
Node RegExpOpr::reduceRegExpNeg(Node mem)
899
{
900
21
  Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP);
901
42
  Node s = mem[0][0];
902
42
  Node r = mem[0][1];
903
21
  NodeManager* nm = NodeManager::currentNM();
904
21
  Kind k = r.getKind();
905
42
  Node zero = nm->mkConst(Rational(0));
906
21
  Node conc;
907
21
  if (k == REGEXP_CONCAT)
908
  {
909
    // do not use length entailment, call regular expression concat
910
12
    Node reLen;
911
6
    size_t i = 0;
912
6
    conc = reduceRegExpNegConcatFixed(mem, reLen, i);
913
  }
914
15
  else if (k == REGEXP_STAR)
915
  {
916
28
    Node emp = Word::mkEmptyWord(s.getType());
917
28
    Node lens = nm->mkNode(STRING_LENGTH, s);
918
28
    Node sne = s.eqNode(emp).negate();
919
28
    Node b1 = nm->mkBoundVar(nm->integerType());
920
28
    Node b1v = nm->mkNode(BOUND_VAR_LIST, b1);
921
    Node g1 =
922
28
        nm->mkNode(AND, nm->mkNode(GT, b1, zero), nm->mkNode(GEQ, lens, b1));
923
    // internal
924
28
    Node s1 = nm->mkNode(STRING_SUBSTR, s, zero, b1);
925
28
    Node s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1));
926
28
    Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[0]).negate();
927
28
    Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r).negate();
928
929
14
    conc = nm->mkNode(OR, s1r1, s2r2);
930
14
    conc = nm->mkNode(IMPLIES, g1, conc);
931
    // must mark as an internal quantifier
932
14
    conc = utils::mkForallInternal(b1v, conc);
933
14
    conc = nm->mkNode(AND, sne, conc);
934
  }
935
  else
936
  {
937
1
    Assert(!utils::isRegExpKind(k));
938
  }
939
42
  return conc;
940
}
941
942
68
Node RegExpOpr::reduceRegExpNegConcatFixed(Node mem, Node reLen, size_t index)
943
{
944
68
  Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP);
945
136
  Node s = mem[0][0];
946
136
  Node r = mem[0][1];
947
68
  NodeManager* nm = NodeManager::currentNM();
948
68
  Assert(r.getKind() == REGEXP_CONCAT);
949
136
  Node zero = nm->mkConst(Rational(0));
950
  // The following simplification states that
951
  //    ~( s in R1 ++ R2 ++... ++ Rn )
952
  // is equivalent to
953
  //    forall x.
954
  //      0 <= x <= len(s) =>
955
  //        ~(substr(s,0,x) in R1) OR ~(substr(s,x,len(s)-x) in R2 ++ ... ++ Rn)
956
  // Index is the child index of r that we are stripping off, which is either
957
  // from the beginning or the end.
958
68
  Assert(index == 0 || index == r.getNumChildren() - 1);
959
136
  Node lens = nm->mkNode(STRING_LENGTH, s);
960
136
  Node b1;
961
136
  Node b1v;
962
136
  Node guard;
963
68
  if (reLen.isNull())
964
  {
965
6
    b1 = SkolemCache::mkIndexVar(mem);
966
6
    b1v = nm->mkNode(BOUND_VAR_LIST, b1);
967
18
    guard = nm->mkNode(AND,
968
12
                       nm->mkNode(GEQ, b1, zero),
969
12
                       nm->mkNode(GEQ, nm->mkNode(STRING_LENGTH, s), b1));
970
  }
971
  else
972
  {
973
62
    b1 = reLen;
974
  }
975
136
  Node s1;
976
136
  Node s2;
977
68
  if (index == 0)
978
  {
979
53
    s1 = nm->mkNode(STRING_SUBSTR, s, zero, b1);
980
53
    s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1));
981
  }
982
  else
983
  {
984
15
    s1 = nm->mkNode(STRING_SUBSTR, s, nm->mkNode(MINUS, lens, b1), b1);
985
15
    s2 = nm->mkNode(STRING_SUBSTR, s, zero, nm->mkNode(MINUS, lens, b1));
986
  }
987
136
  Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[index]).negate();
988
136
  std::vector<Node> nvec;
989
270
  for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; i++)
990
  {
991
202
    if (i != index)
992
    {
993
134
      nvec.push_back(r[i]);
994
    }
995
  }
996
136
  Node r2 = nvec.size() == 1 ? nvec[0] : nm->mkNode(REGEXP_CONCAT, nvec);
997
68
  r2 = Rewriter::rewrite(r2);
998
136
  Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r2).negate();
999
68
  Node conc = nm->mkNode(OR, s1r1, s2r2);
1000
68
  if (!b1v.isNull())
1001
  {
1002
6
    conc = nm->mkNode(OR, guard.negate(), conc);
1003
    // must mark as an internal quantifier
1004
6
    conc = utils::mkForallInternal(b1v, conc);
1005
  }
1006
136
  return conc;
1007
}
1008
1009
366
Node RegExpOpr::reduceRegExpPos(Node mem,
1010
                                SkolemCache* sc,
1011
                                std::vector<Node>& newSkolems)
1012
{
1013
366
  Assert(mem.getKind() == STRING_IN_REGEXP);
1014
732
  Node s = mem[0];
1015
732
  Node r = mem[1];
1016
366
  NodeManager* nm = NodeManager::currentNM();
1017
366
  Kind k = r.getKind();
1018
366
  Node conc;
1019
366
  if (k == REGEXP_CONCAT)
1020
  {
1021
526
    std::vector<Node> nvec;
1022
526
    std::vector<Node> cc;
1023
263
    SkolemManager* sm = nm->getSkolemManager();
1024
    // Look up skolems for each of the components. If sc has optimizations
1025
    // enabled, this will return arguments of str.to_re.
1026
1060
    for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; ++i)
1027
    {
1028
797
      if (r[i].getKind() == STRING_TO_REGEXP)
1029
      {
1030
        // optimization, just take the body
1031
222
        newSkolems.push_back(r[i][0]);
1032
      }
1033
      else
1034
      {
1035
1150
        Node ivalue = nm->mkConst(Rational(i));
1036
        Node sk = sm->mkSkolemFunction(SkolemFunId::RE_UNFOLD_POS_COMPONENT,
1037
1150
                                       s.getType(),
1038
2300
                                       {mem[0], mem[1], ivalue});
1039
575
        newSkolems.push_back(sk);
1040
575
        nvec.push_back(nm->mkNode(STRING_IN_REGEXP, newSkolems[i], r[i]));
1041
      }
1042
    }
1043
    // (str.in_re x (re.++ R0 .... Rn)) =>
1044
    // (and (= x (str.++ k0 ... kn)) (str.in_re k0 R0) ... (str.in_re kn Rn) )
1045
526
    Node lem = s.eqNode(nm->mkNode(STRING_CONCAT, newSkolems));
1046
263
    nvec.insert(nvec.begin(), lem);
1047
263
    conc = nvec.size() == 1 ? nvec[0] : nm->mkNode(AND, nvec);
1048
  }
1049
103
  else if (k == REGEXP_STAR)
1050
  {
1051
202
    Node emp = Word::mkEmptyWord(s.getType());
1052
202
    Node se = s.eqNode(emp);
1053
202
    Node sinr = nm->mkNode(STRING_IN_REGEXP, s, r[0]);
1054
202
    Node reExpand = nm->mkNode(REGEXP_CONCAT, r[0], r, r[0]);
1055
202
    Node sinRExp = nm->mkNode(STRING_IN_REGEXP, s, reExpand);
1056
    // We unfold `x in R*` by considering three cases: `x` is empty, `x`
1057
    // is matched by `R`, or `x` is matched by two or more `R`s. For the
1058
    // last case, `x` will break into three pieces, making the beginning
1059
    // and the end each match `R` and the middle match `R*`. Matching the
1060
    // beginning and the end with `R` allows us to reason about the
1061
    // beginning and the end of `x` simultaneously.
1062
    //
1063
    // x in R* ---> (x = "") v (x in R) v (x in (re.++ R (re.* R) R))
1064
1065
    // We also immediately unfold the last disjunct for re.*. The advantage
1066
    // of doing this is that we use the same scheme for skolems above.
1067
202
    std::vector<Node> newSkolemsC;
1068
101
    sinRExp = reduceRegExpPos(sinRExp, sc, newSkolemsC);
1069
101
    Assert(newSkolemsC.size() == 3);
1070
    // make the return lemma
1071
    // can also assume the component match the first and last R are non-empty.
1072
    // This means that the overall conclusion is:
1073
    //   (x = "") v (x in R) v (x = (str.++ k1 k2 k3) ^
1074
    //                          k1 in R ^ k2 in (re.* R) ^ k3 in R ^
1075
    //                          k1 != ""  ^ k3 != "")
1076
101
    conc = nm->mkNode(OR,
1077
                      se,
1078
                      sinr,
1079
404
                      nm->mkNode(AND,
1080
                                 sinRExp,
1081
202
                                 newSkolemsC[0].eqNode(emp).negate(),
1082
202
                                 newSkolemsC[2].eqNode(emp).negate()));
1083
  }
1084
  else
1085
  {
1086
2
    Assert(!utils::isRegExpKind(k));
1087
  }
1088
732
  return conc;
1089
}
1090
1091
bool RegExpOpr::isPairNodesInSet(std::set< PairNodes > &s, Node n1, Node n2) {
1092
  for(std::set< PairNodes >::const_iterator itr = s.begin();
1093
      itr != s.end(); ++itr) {
1094
    if((itr->first == n1 && itr->second == n2) ||
1095
       (itr->first == n2 && itr->second == n1)) {
1096
      return true;
1097
    }
1098
  }
1099
  return false;
1100
}
1101
1102
494
bool RegExpOpr::containC2(unsigned cnt, Node n) {
1103
494
  if(n.getKind() == kind::REGEXP_RV) {
1104
122
    Assert(n[0].getConst<Rational>() <= Rational(String::maxSize()))
1105
        << "Exceeded UINT32_MAX in RegExpOpr::containC2";
1106
122
    unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt();
1107
122
    return cnt == y;
1108
372
  } else if(n.getKind() == kind::REGEXP_CONCAT) {
1109
102
    for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1110
80
      if(containC2(cnt, n[i])) {
1111
12
        return true;
1112
      }
1113
    }
1114
338
  } else if(n.getKind() == kind::REGEXP_STAR) {
1115
34
    return containC2(cnt, n[0]);
1116
304
  } else if(n.getKind() == kind::REGEXP_LOOP) {
1117
    return containC2(cnt, n[0]);
1118
304
  } else if(n.getKind() == kind::REGEXP_UNION) {
1119
118
    for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1120
88
      if(containC2(cnt, n[i])) {
1121
12
        return true;
1122
      }
1123
    }
1124
  }
1125
314
  return false;
1126
}
1127
91
Node RegExpOpr::convert1(unsigned cnt, Node n) {
1128
91
  Trace("regexp-debug") << "Converting " << n << " at " << cnt << "... " << std::endl;
1129
182
  Node r1, r2;
1130
91
  convert2(cnt, n, r1, r2);
1131
91
  Trace("regexp-debug") << "... getting r1=" << r1 << ", and r2=" << r2 << std::endl;
1132
91
  Node ret = r1==d_emptySingleton ? r2 : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
1133
91
     NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, r1), r2);
1134
91
  ret = Rewriter::rewrite( ret );
1135
91
  Trace("regexp-debug") << "... done convert at " << cnt << ", with return " << ret << std::endl;
1136
182
  return ret;
1137
}
1138
293
void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) {
1139
293
  if(n == d_emptyRegexp) {
1140
6
    r1 = d_emptyRegexp;
1141
6
    r2 = d_emptyRegexp;
1142
6
    return;
1143
287
  } else if(n == d_emptySingleton) {
1144
17
    r1 = d_emptySingleton;
1145
17
    r2 = d_emptySingleton;
1146
  }
1147
287
  Kind nk = n.getKind();
1148
287
  if (nk == REGEXP_RV)
1149
  {
1150
50
    Assert(n[0].getConst<Rational>() <= Rational(String::maxSize()))
1151
        << "Exceeded UINT32_MAX in RegExpOpr::convert2";
1152
50
    unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt();
1153
50
    r1 = d_emptySingleton;
1154
50
    if(cnt == y) {
1155
50
      r2 = d_emptyRegexp;
1156
    } else {
1157
      r2 = n;
1158
    }
1159
  }
1160
237
  else if (nk == REGEXP_CONCAT)
1161
  {
1162
132
    bool flag = true;
1163
264
    std::vector<Node> vr1, vr2;
1164
362
    for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1165
292
      if(containC2(cnt, n[i])) {
1166
124
        Node t1, t2;
1167
62
        convert2(cnt, n[i], t1, t2);
1168
62
        vr1.push_back(t1);
1169
62
        r1 = vr1.size()==0 ? d_emptyRegexp : vr1.size()==1 ? vr1[0] :
1170
             NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr1);
1171
62
        vr2.push_back(t2);
1172
62
        for( unsigned j=i+1; j<n.getNumChildren(); j++ ) {
1173
          vr2.push_back(n[j]);
1174
        }
1175
62
        r2 = vr2.size()==0 ? d_emptyRegexp : vr2.size()==1 ? vr2[0] :
1176
             NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr2);
1177
62
        flag = false;
1178
62
        break;
1179
      } else {
1180
230
        vr1.push_back(n[i]);
1181
      }
1182
    }
1183
132
    if(flag) {
1184
70
      r1 = d_emptySingleton;
1185
70
      r2 = n;
1186
    }
1187
  }
1188
105
  else if (nk == REGEXP_UNION)
1189
  {
1190
116
    std::vector<Node> vr1, vr2;
1191
198
    for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1192
280
      Node t1, t2;
1193
140
      convert2(cnt, n[i], t1, t2);
1194
140
      vr1.push_back(t1);
1195
140
      vr2.push_back(t2);
1196
    }
1197
58
    r1 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr1);
1198
58
    r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr2);
1199
  }
1200
47
  else if (nk == STRING_TO_REGEXP || nk == REGEXP_SIGMA || nk == REGEXP_RANGE
1201
           || nk == REGEXP_COMPLEMENT || nk == REGEXP_LOOP)
1202
  {
1203
    // this leaves n unchanged
1204
47
    r1 = d_emptySingleton;
1205
47
    r2 = n;
1206
  }
1207
  else
1208
  {
1209
    //is it possible?
1210
    Unreachable();
1211
  }
1212
}
1213
1214
190
Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > cache, unsigned cnt ) {
1215
  //Assert(checkConstRegExp(r1) && checkConstRegExp(r2));
1216
190
  if(r1 > r2) {
1217
60
    TNode tmpNode = r1;
1218
30
    r1 = r2;
1219
30
    r2 = tmpNode;
1220
  }
1221
190
  Trace("regexp-int") << "Starting INTERSECT(" << cnt << "):\n  "<< mkString(r1) << ",\n  " << mkString(r2) << std::endl;
1222
380
  std::pair < Node, Node > p(r1, r2);
1223
190
  std::map < PairNodes, Node >::const_iterator itr = d_inter_cache.find(p);
1224
190
  Node rNode;
1225
190
  if(itr != d_inter_cache.end()) {
1226
35
    rNode = itr->second;
1227
  } else {
1228
155
    Trace("regexp-int-debug") << " ... not in cache" << std::endl;
1229
155
    if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) {
1230
      Trace("regexp-int-debug") << " ... one is empty set" << std::endl;
1231
      rNode = d_emptyRegexp;
1232
155
    } else if(r1 == d_emptySingleton || r2 == d_emptySingleton) {
1233
18
      Trace("regexp-int-debug") << " ... one is empty singleton" << std::endl;
1234
36
      Node exp;
1235
18
      int r = delta((r1 == d_emptySingleton ? r2 : r1), exp);
1236
18
      if(r == 0) {
1237
        //TODO: variable
1238
        Unreachable();
1239
18
      } else if(r == 1) {
1240
15
        rNode = d_emptySingleton;
1241
      } else {
1242
3
        rNode = d_emptyRegexp;
1243
      }
1244
137
    } else if(r1 == r2) {
1245
      Trace("regexp-int-debug") << " ... equal" << std::endl;
1246
      rNode = r1; //convert1(cnt, r1);
1247
    } else {
1248
137
      Trace("regexp-int-debug") << " ... normal checking" << std::endl;
1249
137
      std::map< PairNodes, Node >::const_iterator itrcache = cache.find(p);
1250
137
      if(itrcache != cache.end()) {
1251
46
        rNode = itrcache->second;
1252
      } else {
1253
91
        Trace("regexp-int-debug") << " ... normal without cache" << std::endl;
1254
182
        std::vector<unsigned> cset;
1255
182
        std::set<unsigned> cset1, cset2;
1256
182
        std::set< Node > vset1, vset2;
1257
91
        firstChars(r1, cset1, vset1);
1258
91
        firstChars(r2, cset2, vset2);
1259
91
        Trace("regexp-int-debug") << " ... got fset" << std::endl;
1260
91
        std::set_intersection(cset1.begin(), cset1.end(), cset2.begin(), cset2.end(),
1261
91
             std::inserter(cset, cset.begin()));
1262
182
        std::vector< Node > vec_nodes;
1263
182
        Node delta_exp;
1264
91
        Trace("regexp-int-debug") << " ... try delta" << std::endl;
1265
91
        int flag = delta(r1, delta_exp);
1266
91
        int flag2 = delta(r2, delta_exp);
1267
91
        Trace("regexp-int-debug") << " ... delta1=" << flag << ", delta2=" << flag2 << std::endl;
1268
91
        if(flag != 2 && flag2 != 2) {
1269
13
          if(flag == 1 && flag2 == 1) {
1270
13
            vec_nodes.push_back(d_emptySingleton);
1271
          } else {
1272
            //TODO: variable
1273
            Unreachable();
1274
          }
1275
        }
1276
91
        if(Trace.isOn("regexp-int-debug")) {
1277
          Trace("regexp-int-debug") << "Try CSET(" << cset.size() << ") = {";
1278
          for (std::vector<unsigned>::const_iterator it = cset.begin();
1279
               it != cset.end();
1280
               ++it)
1281
          {
1282
            if (it != cset.begin())
1283
            {
1284
              Trace("regexp-int-debug") << ", ";
1285
            }
1286
            Trace("regexp-int-debug") << (*it);
1287
          }
1288
          Trace("regexp-int-debug") << std::endl;
1289
        }
1290
182
        std::map< PairNodes, Node > cacheX;
1291
240
        for (std::vector<unsigned>::const_iterator it = cset.begin();
1292
240
             it != cset.end();
1293
             ++it)
1294
        {
1295
298
          std::vector<unsigned> cvec;
1296
149
          cvec.push_back(*it);
1297
298
          String c(cvec);
1298
149
          Trace("regexp-int-debug") << "Try character " << c << " ... " << std::endl;
1299
298
          Node r1l = derivativeSingle(r1, c);
1300
298
          Node r2l = derivativeSingle(r2, c);
1301
149
          Trace("regexp-int-debug") << "  ... got partial(r1,c) = " << mkString(r1l) << std::endl;
1302
149
          Trace("regexp-int-debug") << "  ... got partial(r2,c) = " << mkString(r2l) << std::endl;
1303
298
          Node rt;
1304
1305
149
          if(r1l > r2l) {
1306
68
            Node tnode = r1l;
1307
34
            r1l = r2l; r2l = tnode;
1308
          }
1309
298
          PairNodes pp(r1l, r2l);
1310
149
          std::map< PairNodes, Node >::const_iterator itr2 = cacheX.find(pp);
1311
149
          if(itr2 != cacheX.end()) {
1312
8
            rt = itr2->second;
1313
          } else {
1314
282
            std::map< PairNodes, Node > cache2(cache);
1315
282
            cache2[p] = NodeManager::currentNM()->mkNode(
1316
                kind::REGEXP_RV,
1317
282
                NodeManager::currentNM()->mkConst(cvc5::Rational(cnt)));
1318
141
            rt = intersectInternal(r1l, r2l, cache2, cnt+1);
1319
141
            cacheX[ pp ] = rt;
1320
          }
1321
1322
447
          rt = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
1323
298
            NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c)), rt) );
1324
1325
149
          Trace("regexp-int-debug") << "  ... got p(r1,c) && p(r2,c) = " << mkString(rt) << std::endl;
1326
149
          vec_nodes.push_back(rt);
1327
        }
1328
91
        rNode = Rewriter::rewrite( vec_nodes.size()==0 ? d_emptyRegexp : vec_nodes.size()==1 ? vec_nodes[0] :
1329
            NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes) );
1330
91
        rNode = convert1(cnt, rNode);
1331
91
        rNode = Rewriter::rewrite( rNode );
1332
      }
1333
    }
1334
155
    Trace("regexp-int-debug") << "  ... try testing no RV of " << mkString(rNode) << std::endl;
1335
155
    if (!expr::hasSubtermKind(REGEXP_RV, rNode))
1336
    {
1337
71
      d_inter_cache[p] = rNode;
1338
    }
1339
  }
1340
190
  Trace("regexp-int") << "End(" << cnt << ") of INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl;
1341
380
  return rNode;
1342
}
1343
1344
98
Node RegExpOpr::removeIntersection(Node r) {
1345
98
  Assert(checkConstRegExp(r));
1346
98
  NodeManager* nm = NodeManager::currentNM();
1347
196
  std::unordered_map<TNode, Node> visited;
1348
98
  std::unordered_map<TNode, Node>::iterator it;
1349
196
  std::vector<TNode> visit;
1350
196
  TNode cur;
1351
98
  visit.push_back(r);
1352
1559
  do
1353
  {
1354
1657
    cur = visit.back();
1355
1657
    visit.pop_back();
1356
1657
    it = visited.find(cur);
1357
1358
1657
    if (it == visited.end())
1359
    {
1360
773
      visited[cur] = Node::null();
1361
773
      visit.push_back(cur);
1362
1559
      for (const Node& cn : cur)
1363
      {
1364
786
        visit.push_back(cn);
1365
      }
1366
    }
1367
884
    else if (it->second.isNull())
1368
    {
1369
773
      Kind ck = cur.getKind();
1370
1546
      Node ret;
1371
773
      bool childChanged = false;
1372
1546
      std::vector<Node> children;
1373
773
      if (cur.getMetaKind() == metakind::PARAMETERIZED)
1374
      {
1375
        children.push_back(cur.getOperator());
1376
      }
1377
1559
      for (const Node& cn : cur)
1378
      {
1379
786
        it = visited.find(cn);
1380
786
        Assert(it != visited.end());
1381
786
        Assert(!it->second.isNull());
1382
786
        if (ck == REGEXP_INTER)
1383
        {
1384
4
          if (ret.isNull())
1385
          {
1386
2
            ret = it->second;
1387
          }
1388
          else
1389
          {
1390
2
            ret = intersect(ret, it->second);
1391
          }
1392
        }
1393
        else
1394
        {
1395
          // will construct below
1396
782
          childChanged = childChanged || cn != it->second;
1397
782
          children.push_back(it->second);
1398
        }
1399
      }
1400
773
      if (ck != REGEXP_INTER)
1401
      {
1402
771
        if (childChanged)
1403
        {
1404
2
          ret = nm->mkNode(cur.getKind(), children);
1405
        }
1406
        else
1407
        {
1408
769
          ret = cur;
1409
        }
1410
      }
1411
773
      visited[cur] = ret;
1412
    }
1413
1657
  } while (!visit.empty());
1414
98
  Assert(visited.find(r) != visited.end());
1415
98
  Assert(!visited.find(r)->second.isNull());
1416
98
  if (Trace.isOn("regexp-intersect"))
1417
  {
1418
    Trace("regexp-intersect") << "Remove INTERSECTION( " << mkString(r)
1419
                              << " ) = " << mkString(visited[r]) << std::endl;
1420
  }
1421
196
  return visited[r];
1422
}
1423
1424
49
Node RegExpOpr::intersect(Node r1, Node r2)
1425
{
1426
49
  if (!checkConstRegExp(r1) || !checkConstRegExp(r2))
1427
  {
1428
    return Node::null();
1429
  }
1430
98
  Node rr1 = removeIntersection(r1);
1431
98
  Node rr2 = removeIntersection(r2);
1432
98
  std::map<PairNodes, Node> cache;
1433
49
  Trace("regexp-intersect-node") << "Intersect (1): " << rr1 << std::endl;
1434
49
  Trace("regexp-intersect-node") << "Intersect (2): " << rr2 << std::endl;
1435
98
  Trace("regexp-intersect") << "Start INTERSECTION(\n\t" << mkString(r1)
1436
49
                            << ",\n\t" << mkString(r2) << ")" << std::endl;
1437
98
  Node retNode = intersectInternal(rr1, rr2, cache, 1);
1438
98
  Trace("regexp-intersect")
1439
98
      << "End INTERSECTION(\n\t" << mkString(r1) << ",\n\t" << mkString(r2)
1440
49
      << ") =\n\t" << mkString(retNode) << std::endl;
1441
49
  Trace("regexp-intersect-node") << "Intersect finished." << std::endl;
1442
49
  return retNode;
1443
}
1444
1445
//printing
1446
14884
std::string RegExpOpr::niceChar(Node r) {
1447
14884
  if(r.isConst()) {
1448
29654
    std::string s = r.getConst<String>().toString();
1449
14827
    return s == "." ? "\\." : s;
1450
  } else {
1451
114
    std::string ss = "$" + r.toString();
1452
57
    return ss;
1453
  }
1454
}
1455
28042
std::string RegExpOpr::mkString( Node r ) {
1456
28042
  std::string retStr;
1457
28042
  if(r.isNull()) {
1458
    retStr = "\\E";
1459
  } else {
1460
28042
    int k = r.getKind();
1461
28042
    switch( k ) {
1462
337
      case kind::REGEXP_EMPTY: {
1463
337
        retStr += "\\E";
1464
337
        break;
1465
      }
1466
230
      case kind::REGEXP_SIGMA: {
1467
230
        retStr += ".";
1468
230
        break;
1469
      }
1470
14084
      case kind::STRING_TO_REGEXP: {
1471
28168
        std::string tmp( niceChar( r[0] ) );
1472
14084
        retStr += tmp.size()==1? tmp : "(" + tmp + ")";
1473
14084
        break;
1474
      }
1475
3818
      case kind::REGEXP_CONCAT: {
1476
3818
        retStr += "(";
1477
13698
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
1478
          //if(i != 0) retStr += ".";
1479
9880
          retStr += mkString( r[i] );
1480
        }
1481
3818
        retStr += ")";
1482
3818
        break;
1483
      }
1484
3669
      case kind::REGEXP_UNION: {
1485
3669
        retStr += "(";
1486
11720
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
1487
8051
          if(i != 0) retStr += "|";
1488
8051
          retStr += mkString( r[i] );
1489
        }
1490
3669
        retStr += ")";
1491
3669
        break;
1492
      }
1493
22
      case kind::REGEXP_INTER: {
1494
22
        retStr += "(";
1495
66
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
1496
44
          if(i != 0) retStr += "&";
1497
44
          retStr += mkString( r[i] );
1498
        }
1499
22
        retStr += ")";
1500
22
        break;
1501
      }
1502
5169
      case kind::REGEXP_STAR: {
1503
5169
        retStr += mkString( r[0] );
1504
5169
        retStr += "*";
1505
5169
        break;
1506
      }
1507
      case kind::REGEXP_PLUS: {
1508
        retStr += mkString( r[0] );
1509
        retStr += "+";
1510
        break;
1511
      }
1512
      case kind::REGEXP_OPT: {
1513
        retStr += mkString( r[0] );
1514
        retStr += "?";
1515
        break;
1516
      }
1517
400
      case kind::REGEXP_RANGE: {
1518
400
        retStr += "[";
1519
400
        retStr += niceChar( r[0] );
1520
400
        retStr += "-";
1521
400
        retStr += niceChar( r[1] );
1522
400
        retStr += "]";
1523
400
        break;
1524
      }
1525
      case kind::REGEXP_LOOP: {
1526
        uint32_t l = utils::getLoopMinOccurrences(r);
1527
        std::stringstream ss;
1528
        ss << "(" << mkString(r[0]) << "){" << l << ",";
1529
        if(r.getNumChildren() == 3) {
1530
          uint32_t u = utils::getLoopMaxOccurrences(r);
1531
          ss << u;
1532
        }
1533
        ss << "}";
1534
        retStr += ss.str();
1535
        break;
1536
      }
1537
292
      case kind::REGEXP_RV: {
1538
292
        retStr += "<";
1539
292
        retStr += r[0].getConst<Rational>().getNumerator().toString();
1540
292
        retStr += ">";
1541
292
        break;
1542
      }
1543
18
      case REGEXP_COMPLEMENT:
1544
      {
1545
18
        retStr += "^(";
1546
18
        retStr += mkString(r[0]);
1547
18
        retStr += ")";
1548
18
        break;
1549
      }
1550
3
      default:
1551
      {
1552
6
        std::stringstream ss;
1553
3
        ss << r;
1554
3
        retStr = ss.str();
1555
3
        Assert(!utils::isRegExpKind(r.getKind()));
1556
3
        break;
1557
      }
1558
    }
1559
  }
1560
1561
28042
  return retStr;
1562
}
1563
1564
1001
bool RegExpOpr::regExpIncludes(Node r1, Node r2)
1565
{
1566
1001
  const auto& it = d_inclusionCache.find(std::make_pair(r1, r2));
1567
1001
  if (it != d_inclusionCache.end())
1568
  {
1569
571
    return (*it).second;
1570
  }
1571
430
  bool result = RegExpEntail::regExpIncludes(r1, r2);
1572
430
  d_inclusionCache[std::make_pair(r1, r2)] = result;
1573
430
  return result;
1574
}
1575
1576
}  // namespace strings
1577
}  // namespace theory
1578
22746
}  // namespace cvc5