GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/regexp_operation.cpp Lines: 702 981 71.6 %
Date: 2021-11-07 Branches: 1480 4076 36.3 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Tianyi Liang, Andrew Reynolds, Mathias Preiner
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Symbolic Regular Expresion Operations
14
 */
15
16
#include "theory/strings/regexp_operation.h"
17
18
#include <sstream>
19
20
#include "expr/node_algorithm.h"
21
#include "options/strings_options.h"
22
#include "theory/rewriter.h"
23
#include "theory/strings/regexp_entail.h"
24
#include "theory/strings/theory_strings_utils.h"
25
#include "theory/strings/word.h"
26
#include "util/regexp.h"
27
28
using namespace cvc5::kind;
29
30
namespace cvc5 {
31
namespace theory {
32
namespace strings {
33
34
15273
RegExpOpr::RegExpOpr(Env& env, SkolemCache* sc)
35
    : EnvObj(env),
36
      d_true(NodeManager::currentNM()->mkConst(true)),
37
      d_false(NodeManager::currentNM()->mkConst(false)),
38
      d_emptyRegexp(NodeManager::currentNM()->mkNode(kind::REGEXP_EMPTY,
39
30546
                                                     std::vector<Node>{})),
40
30546
      d_zero(NodeManager::currentNM()->mkConst(::cvc5::Rational(0))),
41
30546
      d_one(NodeManager::currentNM()->mkConst(::cvc5::Rational(1))),
42
      d_sigma(NodeManager::currentNM()->mkNode(kind::REGEXP_SIGMA,
43
30546
                                               std::vector<Node>{})),
44
      d_sigma_star(
45
          NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, d_sigma)),
46
137457
      d_sc(sc)
47
{
48
15273
  d_emptyString = Word::mkEmptyWord(NodeManager::currentNM()->stringType());
49
50
15273
  d_emptySingleton =
51
30546
      NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, d_emptyString);
52
15273
  d_lastchar = options().strings.stringsAlphaCard - 1;
53
15273
}
54
55
15268
RegExpOpr::~RegExpOpr() {}
56
57
988
bool RegExpOpr::checkConstRegExp( Node r ) {
58
988
  Assert(r.getType().isRegExp());
59
1976
  Trace("strings-regexp-cstre")
60
988
      << "RegExpOpr::checkConstRegExp /" << mkString(r) << "/" << std::endl;
61
988
  RegExpConstType rct = getRegExpConstType(r);
62
988
  return rct != RE_C_VARIABLE;
63
}
64
65
1819
RegExpConstType RegExpOpr::getRegExpConstType(Node r)
66
{
67
1819
  Assert(r.getType().isRegExp());
68
1819
  std::unordered_map<Node, RegExpConstType>::iterator it;
69
3638
  std::vector<TNode> visit;
70
3638
  TNode cur;
71
1819
  visit.push_back(r);
72
2436
  do
73
  {
74
4255
    cur = visit.back();
75
4255
    visit.pop_back();
76
4255
    it = d_constCache.find(cur);
77
78
4255
    Kind ck = cur.getKind();
79
4255
    if (it == d_constCache.end())
80
    {
81
1335
      if (ck == STRING_TO_REGEXP)
82
      {
83
832
        Node tmp = rewrite(cur[0]);
84
416
        d_constCache[cur] =
85
416
            tmp.isConst() ? RE_C_CONRETE_CONSTANT : RE_C_VARIABLE;
86
      }
87
919
      else if (ck == REGEXP_SIGMA || ck == REGEXP_RANGE)
88
      {
89
130
        d_constCache[cur] = RE_C_CONSTANT;
90
      }
91
789
      else if (!utils::isRegExpKind(ck))
92
      {
93
        // non-regular expression applications, e.g. function applications
94
        // with regular expression return type are treated as variables.
95
4
        d_constCache[cur] = RE_C_VARIABLE;
96
      }
97
      else
98
      {
99
785
        d_constCache[cur] = RE_C_UNKNOWN;
100
785
        visit.push_back(cur);
101
785
        visit.insert(visit.end(), cur.begin(), cur.end());
102
      }
103
    }
104
2920
    else if (it->second == RE_C_UNKNOWN)
105
    {
106
785
      RegExpConstType ret = ck == REGEXP_COMPLEMENT ? RE_C_CONSTANT : RE_C_CONRETE_CONSTANT;
107
2436
      for (const Node& cn : cur)
108
      {
109
1651
        it = d_constCache.find(cn);
110
1651
        Assert(it != d_constCache.end());
111
1651
        if (it->second > ret)
112
        {
113
430
          ret = it->second;
114
        }
115
      }
116
785
      d_constCache[cur] = ret;
117
    }
118
4255
  } while (!visit.empty());
119
1819
  Assert(d_constCache.find(r) != d_constCache.end());
120
3638
  return d_constCache[r];
121
}
122
123
// 0-unknown, 1-yes, 2-no
124
1703
int RegExpOpr::delta( Node r, Node &exp ) {
125
  std::map<Node, std::pair<int, Node> >::const_iterator itd =
126
1703
      d_delta_cache.find(r);
127
1703
  if (itd != d_delta_cache.end())
128
  {
129
    // already computed
130
1107
    exp = itd->second.second;
131
1107
    return itd->second.first;
132
  }
133
596
  Trace("regexp-delta") << "RegExpOpr::delta: " << r << std::endl;
134
596
  int ret = 0;
135
596
  NodeManager* nm = NodeManager::currentNM();
136
596
  Kind k = r.getKind();
137
596
  switch (k)
138
  {
139
    case REGEXP_EMPTY:
140
    case REGEXP_SIGMA:
141
    case REGEXP_RANGE:
142
    {
143
      // does not contain empty string
144
      ret = 2;
145
      break;
146
    }
147
186
    case STRING_TO_REGEXP:
148
    {
149
372
      Node tmp = rewrite(r[0]);
150
186
      if (tmp.isConst())
151
      {
152
178
        if (tmp == d_emptyString)
153
        {
154
12
          ret = 1;
155
        } else {
156
166
          ret = 2;
157
        }
158
      }
159
      else
160
      {
161
8
        ret = 0;
162
8
        if (tmp.getKind() == STRING_CONCAT)
163
        {
164
          for (const Node& tmpc : tmp)
165
          {
166
            if (tmpc.isConst())
167
            {
168
              ret = 2;
169
              break;
170
            }
171
          }
172
        }
173
8
        if (ret == 0)
174
        {
175
8
          exp = r[0].eqNode(d_emptyString);
176
        }
177
      }
178
186
      break;
179
    }
180
311
    case REGEXP_CONCAT:
181
    case REGEXP_UNION:
182
    case REGEXP_INTER:
183
    {
184
      // has there been an unknown child?
185
311
      bool hasUnknownChild = false;
186
622
      std::vector<Node> vec;
187
311
      int checkTmp = k == REGEXP_UNION ? 1 : 2;
188
311
      int retTmp = k == REGEXP_UNION ? 2 : 1;
189
447
      for (const Node& rc : r)
190
      {
191
559
        Node exp2;
192
423
        int tmp = delta(rc, exp2);
193
423
        if (tmp == checkTmp)
194
        {
195
          // return is implied by the child's return value
196
287
          ret = checkTmp;
197
287
          break;
198
        }
199
136
        else if (tmp == 0)
200
        {
201
          // unknown if child contains empty string
202
16
          Assert(!exp2.isNull());
203
16
          vec.push_back(exp2);
204
16
          hasUnknownChild = true;
205
        }
206
      }
207
311
      if (ret != checkTmp)
208
      {
209
24
        if (!hasUnknownChild)
210
        {
211
24
          ret = retTmp;
212
        } else {
213
          Kind kr = k == REGEXP_UNION ? OR : AND;
214
          exp = vec.size() == 1 ? vec[0] : nm->mkNode(kr, vec);
215
        }
216
      }
217
311
      break;
218
    }
219
91
    case REGEXP_STAR:
220
    case REGEXP_OPT:
221
    {
222
      // contains empty string
223
91
      ret = 1;
224
91
      break;
225
    }
226
    case REGEXP_PLUS:
227
    {
228
      ret = delta(r[0], exp);
229
      break;
230
    }
231
    case REGEXP_LOOP:
232
    {
233
      uint32_t lo = utils::getLoopMinOccurrences(r);
234
      if (lo == 0)
235
      {
236
        ret = 1;
237
      }
238
      else
239
      {
240
        ret = delta(r[0], exp);
241
      }
242
      break;
243
    }
244
8
    case REGEXP_COMPLEMENT:
245
    {
246
8
      int tmp = delta(r[0], exp);
247
      // flip the result if known
248
8
      ret = tmp == 0 ? 0 : (3 - tmp);
249
8
      exp = exp.isNull() ? exp : exp.negate();
250
8
      break;
251
    }
252
    default:
253
    {
254
      Assert(!utils::isRegExpKind(k));
255
      break;
256
    }
257
  }
258
596
  if (!exp.isNull())
259
  {
260
8
    exp = rewrite(exp);
261
  }
262
1192
  std::pair<int, Node> p(ret, exp);
263
596
  d_delta_cache[r] = p;
264
1192
  Trace("regexp-delta") << "RegExpOpr::delta returns " << ret << " for " << r
265
596
                        << ", expr = " << exp << std::endl;
266
596
  return ret;
267
}
268
269
// 0-unknown, 1-yes, 2-no
270
24
int RegExpOpr::derivativeS(Node r, cvc5::String c, Node& retNode)
271
{
272
24
  Assert(c.size() < 2);
273
24
  Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl;
274
275
24
  int ret = 1;
276
24
  retNode = d_emptyRegexp;
277
24
  NodeManager* nm = NodeManager::currentNM();
278
24
  SkolemManager* sm = nm->getSkolemManager();
279
280
48
  PairNodeStr dv = std::make_pair( r, c );
281
24
  if( d_deriv_cache.find( dv ) != d_deriv_cache.end() ) {
282
4
    retNode = d_deriv_cache[dv].first;
283
4
    ret = d_deriv_cache[dv].second;
284
  }
285
20
  else if (c.empty())
286
  {
287
    Node expNode;
288
    ret = delta( r, expNode );
289
    if(ret == 0) {
290
      retNode = NodeManager::currentNM()->mkNode(kind::ITE, expNode, r, d_emptyRegexp);
291
    } else if(ret == 1) {
292
      retNode = r;
293
    }
294
    std::pair< Node, int > p(retNode, ret);
295
    d_deriv_cache[dv] = p;
296
  } else {
297
20
    switch( r.getKind() ) {
298
      case kind::REGEXP_EMPTY: {
299
        ret = 2;
300
        break;
301
      }
302
      case kind::REGEXP_SIGMA: {
303
        retNode = d_emptySingleton;
304
        break;
305
      }
306
      case kind::REGEXP_RANGE: {
307
        cvc5::String a = r[0].getConst<String>();
308
        cvc5::String b = r[1].getConst<String>();
309
        retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp;
310
        break;
311
      }
312
8
      case kind::STRING_TO_REGEXP: {
313
16
        Node tmp = rewrite(r[0]);
314
8
        if(tmp.isConst()) {
315
8
          if(tmp == d_emptyString) {
316
4
            ret = 2;
317
          } else {
318
4
            if (tmp.getConst<String>().front() == c.front())
319
            {
320
              retNode =
321
                  nm->mkNode(STRING_TO_REGEXP,
322
                             Word::getLength(tmp) == 1 ? d_emptyString
323
                                                       : Word::substr(tmp, 1));
324
            } else {
325
4
              ret = 2;
326
            }
327
          }
328
        } else {
329
          ret = 0;
330
          Node rest;
331
          if(tmp.getKind() == kind::STRING_CONCAT) {
332
            Node t2 = tmp[0];
333
            if(t2.isConst()) {
334
              if (t2.getConst<String>().front() == c.front())
335
              {
336
                Node n = nm->mkNode(STRING_TO_REGEXP,
337
                                    Word::getLength(tmp) == 1
338
                                        ? d_emptyString
339
                                        : Word::substr(tmp, 1));
340
                std::vector< Node > vec_nodes;
341
                vec_nodes.push_back(n);
342
                for(unsigned i=1; i<tmp.getNumChildren(); i++) {
343
                  vec_nodes.push_back(tmp[i]);
344
                }
345
                retNode = nm->mkNode(kind::REGEXP_CONCAT, vec_nodes);
346
                ret = 1;
347
              } else {
348
                ret = 2;
349
              }
350
            } else {
351
              tmp = tmp[0];
352
              std::vector< Node > vec_nodes;
353
              for(unsigned i=1; i<tmp.getNumChildren(); i++) {
354
                vec_nodes.push_back(tmp[i]);
355
              }
356
              rest = nm->mkNode(kind::REGEXP_CONCAT, vec_nodes);
357
            }
358
          }
359
          if(ret == 0) {
360
            Node sk =
361
                sm->mkDummySkolem("rsp", nm->stringType(), "Split RegExp");
362
            retNode = nm->mkNode(kind::STRING_TO_REGEXP, sk);
363
            if(!rest.isNull()) {
364
              retNode = rewrite(nm->mkNode(kind::REGEXP_CONCAT, retNode, rest));
365
            }
366
            Node exp =
367
                tmp.eqNode(nm->mkNode(kind::STRING_CONCAT, nm->mkConst(c), sk));
368
            retNode =
369
                rewrite(nm->mkNode(kind::ITE, exp, retNode, d_emptyRegexp));
370
          }
371
        }
372
8
        break;
373
      }
374
4
      case kind::REGEXP_CONCAT: {
375
8
        std::vector< Node > vec_nodes;
376
8
        std::vector< Node > delta_nodes;
377
8
        Node dnode = d_true;
378
12
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
379
16
          Node dc;
380
16
          Node exp2;
381
8
          int rt = derivativeS(r[i], c, dc);
382
8
          if(rt != 2) {
383
            if(rt == 0) {
384
              ret = 0;
385
            }
386
            std::vector< Node > vec_nodes2;
387
            if(dc != d_emptySingleton) {
388
              vec_nodes2.push_back( dc );
389
            }
390
            for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
391
              if(r[j] != d_emptySingleton) {
392
                vec_nodes2.push_back( r[j] );
393
              }
394
            }
395
            Node tmp = vec_nodes2.size()==0 ? d_emptySingleton :
396
              vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
397
            if(dnode != d_true) {
398
              tmp = rewrite(nm->mkNode(kind::ITE, dnode, tmp, d_emptyRegexp));
399
              ret = 0;
400
            }
401
            if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
402
              vec_nodes.push_back( tmp );
403
            }
404
          }
405
16
          Node exp3;
406
8
          int rt2 = delta( r[i], exp3 );
407
8
          if( rt2 == 0 ) {
408
            dnode = rewrite(nm->mkNode(kind::AND, dnode, exp3));
409
8
          } else if( rt2 == 2 ) {
410
            break;
411
          }
412
        }
413
4
        retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
414
              ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
415
4
        if(retNode == d_emptyRegexp) {
416
4
          ret = 2;
417
        }
418
4
        break;
419
      }
420
4
      case kind::REGEXP_UNION: {
421
8
        std::vector< Node > vec_nodes;
422
12
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
423
16
          Node dc;
424
8
          int rt = derivativeS(r[i], c, dc);
425
8
          if(rt == 0) {
426
            ret = 0;
427
          }
428
8
          if(rt != 2) {
429
            if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
430
              vec_nodes.push_back( dc );
431
            }
432
          }
433
          //Trace("regexp-derive") << "RegExp-derive OR R[" << i << "] " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
434
        }
435
4
        retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
436
              ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
437
4
        if(retNode == d_emptyRegexp) {
438
4
          ret = 2;
439
        }
440
4
        break;
441
      }
442
      case kind::REGEXP_INTER: {
443
        bool flag = true;
444
        bool flag_sg = false;
445
        std::vector< Node > vec_nodes;
446
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
447
          Node dc;
448
          int rt = derivativeS(r[i], c, dc);
449
          if(rt == 0) {
450
            ret = 0;
451
          } else if(rt == 2) {
452
            flag = false;
453
            break;
454
          }
455
          if(dc == d_sigma_star) {
456
            flag_sg = true;
457
          } else {
458
            if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
459
              vec_nodes.push_back( dc );
460
            }
461
          }
462
        }
463
        if(flag) {
464
          if(vec_nodes.size() == 0 && flag_sg) {
465
            retNode = d_sigma_star;
466
          } else {
467
            retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
468
                  ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
469
            if(retNode == d_emptyRegexp) {
470
              ret = 2;
471
            }
472
          }
473
        } else {
474
          retNode = d_emptyRegexp;
475
          ret = 2;
476
        }
477
        break;
478
      }
479
4
      case kind::REGEXP_STAR: {
480
8
        Node dc;
481
4
        ret = derivativeS(r[0], c, dc);
482
4
        retNode = dc==d_emptyRegexp ? dc : (dc==d_emptySingleton ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r ));
483
4
        break;
484
      }
485
      case kind::REGEXP_LOOP: {
486
        uint32_t l = utils::getLoopMinOccurrences(r);
487
        uint32_t u = utils::getLoopMaxOccurrences(r);
488
        if (l == u && l == 0)
489
        {
490
          ret = 2;
491
          //retNode = d_emptyRegexp;
492
        } else {
493
          Node dc;
494
          ret = derivativeS(r[0], c, dc);
495
          if(dc==d_emptyRegexp) {
496
            Node lop = nm->mkConst(RegExpLoop(l == 0 ? 0 : (l - 1), u - 1));
497
            Node r2 = nm->mkNode(REGEXP_LOOP, lop, r[0]);
498
            retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 );
499
          } else {
500
            retNode = d_emptyRegexp;
501
          }
502
        }
503
        break;
504
      }
505
      case kind::REGEXP_COMPLEMENT:
506
      {
507
        // don't know result
508
        return 0;
509
        break;
510
      }
511
      default: {
512
        Assert(!utils::isRegExpKind(r.getKind()));
513
        return 0;
514
        break;
515
      }
516
    }
517
20
    if(retNode != d_emptyRegexp) {
518
      retNode = rewrite(retNode);
519
    }
520
40
    std::pair< Node, int > p(retNode, ret);
521
20
    d_deriv_cache[dv] = p;
522
  }
523
524
24
  Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl;
525
24
  return ret;
526
}
527
528
1410
Node RegExpOpr::derivativeSingle(Node r, cvc5::String c)
529
{
530
1410
  Assert(c.size() < 2);
531
1410
  Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl;
532
1410
  Node retNode = d_emptyRegexp;
533
2820
  PairNodeStr dv = std::make_pair( r, c );
534
1410
  NodeManager* nm = NodeManager::currentNM();
535
1410
  if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {
536
526
    retNode = d_dv_cache[dv];
537
  }
538
884
  else if (c.empty())
539
  {
540
    Node exp;
541
    int tmp = delta( r, exp );
542
    if(tmp == 0) {
543
      // TODO variable
544
      retNode = d_emptyRegexp;
545
    } else if(tmp == 1) {
546
      retNode = r;
547
    } else {
548
      retNode = d_emptyRegexp;
549
    }
550
  } else {
551
884
    Kind k = r.getKind();
552
884
    switch( k ) {
553
      case kind::REGEXP_EMPTY: {
554
        retNode = d_emptyRegexp;
555
        break;
556
      }
557
      case kind::REGEXP_SIGMA: {
558
        retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
559
        break;
560
      }
561
      case kind::REGEXP_RANGE: {
562
        cvc5::String a = r[0].getConst<String>();
563
        cvc5::String b = r[1].getConst<String>();
564
        retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp;
565
        break;
566
      }
567
270
      case kind::STRING_TO_REGEXP: {
568
270
        if(r[0].isConst()) {
569
270
          if(r[0] == d_emptyString) {
570
14
            retNode = d_emptyRegexp;
571
          } else {
572
256
            if (r[0].getConst<String>().front() == c.front())
573
            {
574
192
              retNode = nm->mkNode(STRING_TO_REGEXP,
575
384
                                   Word::getLength(r[0]) == 1
576
384
                                       ? d_emptyString
577
                                       : Word::substr(r[0], 1));
578
            } else {
579
64
              retNode = d_emptyRegexp;
580
            }
581
          }
582
        } else {
583
          // TODO variable
584
          retNode = d_emptyRegexp;
585
        }
586
270
        break;
587
      }
588
355
      case kind::REGEXP_CONCAT: {
589
710
        Node rees = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
590
710
        std::vector< Node > vec_nodes;
591
531
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
592
685
          Node dc = derivativeSingle(r[i], c);
593
509
          if(dc != d_emptyRegexp) {
594
714
            std::vector< Node > vec_nodes2;
595
357
            if(dc != rees) {
596
236
              vec_nodes2.push_back( dc );
597
            }
598
768
            for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
599
411
              if(r[j] != rees) {
600
411
                vec_nodes2.push_back( r[j] );
601
              }
602
            }
603
357
            Node tmp = vec_nodes2.size()==0 ? rees :
604
714
              vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
605
357
            if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
606
355
              vec_nodes.push_back( tmp );
607
            }
608
          }
609
685
          Node exp;
610
509
          if( delta( r[i], exp ) != 1 ) {
611
333
            break;
612
          }
613
        }
614
999
        retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
615
644
              ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
616
355
        break;
617
      }
618
108
      case kind::REGEXP_UNION: {
619
216
        std::vector< Node > vec_nodes;
620
346
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
621
476
          Node dc = derivativeSingle(r[i], c);
622
238
          if(dc != d_emptyRegexp) {
623
100
            if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
624
98
              vec_nodes.push_back( dc );
625
            }
626
          }
627
          //Trace("regexp-derive") << "RegExp-derive OR R[" << i << "] /" << mkString(r[i]) << "/ returns /" << mkString(dc) << "/" << std::endl;
628
        }
629
286
        retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
630
178
              ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
631
108
        break;
632
      }
633
      case kind::REGEXP_INTER: {
634
        bool flag = true;
635
        bool flag_sg = false;
636
        std::vector< Node > vec_nodes;
637
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
638
          Node dc = derivativeSingle(r[i], c);
639
          if(dc != d_emptyRegexp) {
640
            if(dc == d_sigma_star) {
641
              flag_sg = true;
642
            } else {
643
              if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
644
                vec_nodes.push_back( dc );
645
              }
646
            }
647
          } else {
648
            flag = false;
649
            break;
650
          }
651
        }
652
        if(flag) {
653
          if(vec_nodes.size() == 0 && flag_sg) {
654
            retNode = d_sigma_star;
655
          } else {
656
            retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
657
                  ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
658
          }
659
        } else {
660
          retNode = d_emptyRegexp;
661
        }
662
        break;
663
      }
664
151
      case kind::REGEXP_STAR: {
665
302
        Node dc = derivativeSingle(r[0], c);
666
151
        if(dc != d_emptyRegexp) {
667
115
          retNode = dc==d_emptySingleton? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
668
        } else {
669
36
          retNode = d_emptyRegexp;
670
        }
671
151
        break;
672
      }
673
      case kind::REGEXP_LOOP: {
674
        uint32_t l = utils::getLoopMinOccurrences(r);
675
        uint32_t u = utils::getLoopMaxOccurrences(r);
676
        if (l == u || l == 0)
677
        {
678
          retNode = d_emptyRegexp;
679
        } else {
680
          Node dc = derivativeSingle(r[0], c);
681
          if(dc != d_emptyRegexp) {
682
            Node lop = nm->mkConst(RegExpLoop(l == 0 ? 0 : (l - 1), u - 1));
683
            Node r2 = nm->mkNode(REGEXP_LOOP, lop, r[0]);
684
            retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 );
685
          } else {
686
            retNode = d_emptyRegexp;
687
          }
688
        }
689
        //Trace("regexp-derive") << "RegExp-derive : REGEXP_LOOP returns /" << mkString(retNode) << "/" << std::endl;
690
        break;
691
      }
692
      case kind::REGEXP_COMPLEMENT:
693
      default: {
694
        Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
695
        Unreachable();
696
        break;
697
      }
698
    }
699
884
    if(retNode != d_emptyRegexp) {
700
732
      retNode = rewrite(retNode);
701
    }
702
884
    d_dv_cache[dv] = retNode;
703
  }
704
1410
  Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl;
705
2820
  return retNode;
706
}
707
708
898
void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset)
709
{
710
898
  Trace("regexp-fset") << "Start FSET(" << mkString(r) << ")" << std::endl;
711
  std::map<Node, std::pair<std::set<unsigned>, SetNodes> >::const_iterator itr =
712
898
      d_fset_cache.find(r);
713
898
  if(itr != d_fset_cache.end()) {
714
312
    pcset.insert((itr->second).first.begin(), (itr->second).first.end());
715
312
    pvset.insert((itr->second).second.begin(), (itr->second).second.end());
716
  } else {
717
    // cset is code points
718
1172
    std::set<unsigned> cset;
719
1172
    SetNodes vset;
720
586
    Kind k = r.getKind();
721
586
    switch( k ) {
722
      case kind::REGEXP_EMPTY: {
723
        break;
724
      }
725
      case kind::REGEXP_RANGE: {
726
        unsigned a = r[0].getConst<String>().front();
727
        unsigned b = r[1].getConst<String>().front();
728
        Assert(a < b);
729
        Assert(b < std::numeric_limits<unsigned>::max());
730
        for (unsigned c = a; c <= b; c++)
731
        {
732
          cset.insert(c);
733
        }
734
        break;
735
      }
736
204
      case kind::STRING_TO_REGEXP: {
737
408
        Node st = rewrite(r[0]);
738
204
        if(st.isConst()) {
739
408
          String s = st.getConst<String>();
740
204
          if(s.size() != 0) {
741
196
            unsigned sc = s.front();
742
196
            cset.insert(sc);
743
          }
744
        }
745
        else if (st.getKind() == kind::STRING_CONCAT)
746
        {
747
          if(st[0].isConst()) {
748
            String s = st[0].getConst<String>();
749
            unsigned sc = s.front();
750
            cset.insert(sc);
751
          } else {
752
            vset.insert( st[0] );
753
          }
754
        }
755
        else
756
        {
757
          vset.insert(st);
758
        }
759
204
        break;
760
      }
761
253
      case kind::REGEXP_CONCAT: {
762
329
        for(unsigned i=0; i<r.getNumChildren(); i++) {
763
319
          firstChars(r[i], cset, vset);
764
395
          Node n = r[i];
765
395
          Node exp;
766
319
          if (delta(n, exp) != 1)
767
          {
768
243
            break;
769
          }
770
        }
771
253
        break;
772
      }
773
46
      case kind::REGEXP_UNION: {
774
146
        for(unsigned i=0; i<r.getNumChildren(); i++) {
775
100
          firstChars(r[i], cset, vset);
776
        }
777
46
        break;
778
      }
779
      case kind::REGEXP_INTER: {
780
        //TODO: Overapproximation for now
781
        //for(unsigned i=0; i<r.getNumChildren(); i++) {
782
        // firstChars(r[i], cset, vset);
783
        //}
784
        firstChars(r[0], cset, vset);
785
        break;
786
      }
787
83
      case kind::REGEXP_STAR: {
788
83
        firstChars(r[0], cset, vset);
789
83
        break;
790
      }
791
      case kind::REGEXP_LOOP: {
792
        firstChars(r[0], cset, vset);
793
        break;
794
      }
795
      case kind::REGEXP_SIGMA:
796
      case kind::REGEXP_COMPLEMENT:
797
      default: {
798
        // we do not expect to call this function on regular expressions that
799
        // aren't a standard regular expression kind. However, if we do, then
800
        // the following code is conservative and says that the current
801
        // regular expression can begin with any character.
802
        Assert(utils::isRegExpKind(k));
803
        // can start with any character
804
        Assert(d_lastchar < std::numeric_limits<unsigned>::max());
805
        for (unsigned i = 0; i <= d_lastchar; i++)
806
        {
807
          cset.insert(i);
808
        }
809
        break;
810
      }
811
    }
812
586
    pcset.insert(cset.begin(), cset.end());
813
586
    pvset.insert(vset.begin(), vset.end());
814
1172
    std::pair<std::set<unsigned>, SetNodes> p(cset, vset);
815
586
    d_fset_cache[r] = p;
816
  }
817
818
898
  if(Trace.isOn("regexp-fset")) {
819
    Trace("regexp-fset") << "END FSET(" << mkString(r) << ") = {";
820
    for (std::set<unsigned>::const_iterator it = pcset.begin();
821
         it != pcset.end();
822
         ++it)
823
    {
824
      if (it != pcset.begin())
825
      {
826
        Trace("regexp-fset") << ",";
827
      }
828
      Trace("regexp-fset") << (*it);
829
      }
830
    Trace("regexp-fset") << "}" << std::endl;
831
  }
832
898
}
833
834
558
Node RegExpOpr::simplify(Node t, bool polarity)
835
{
836
1116
  Trace("strings-regexp-simpl")
837
558
      << "RegExpOpr::simplify: " << t << ", polarity=" << polarity << std::endl;
838
558
  Assert(t.getKind() == kind::STRING_IN_REGEXP);
839
1116
  Node tlit = polarity ? t : t.notNode();
840
1116
  Node conc;
841
558
  std::map<Node, Node>::const_iterator itr = d_simpCache.find(tlit);
842
558
  if (itr != d_simpCache.end())
843
  {
844
8
    return itr->second;
845
  }
846
550
  if (polarity)
847
  {
848
812
    std::vector<Node> newSkolems;
849
406
    conc = reduceRegExpPos(tlit, d_sc, newSkolems);
850
  }
851
  else
852
  {
853
    // see if we can use an optimized version of the reduction for re.++.
854
288
    Node r = t[1];
855
144
    if (r.getKind() == REGEXP_CONCAT)
856
    {
857
      // the index we are removing from the RE concatenation
858
122
      size_t index = 0;
859
      // As an optimization to the reduction, if we can determine that
860
      // all strings in the language of R1 have the same length, say n,
861
      // then the conclusion of the reduction is quantifier-free:
862
      //    ~( substr(s,0,n) in R1 ) OR ~( substr(s,n,len(s)-n) in R2)
863
244
      Node reLen = getRegExpConcatFixed(r, index);
864
122
      if (!reLen.isNull())
865
      {
866
102
        conc = reduceRegExpNegConcatFixed(tlit, reLen, index);
867
      }
868
    }
869
144
    if (conc.isNull())
870
    {
871
42
      conc = reduceRegExpNeg(tlit);
872
    }
873
  }
874
550
  d_simpCache[tlit] = conc;
875
1100
  Trace("strings-regexp-simpl")
876
550
      << "RegExpOpr::simplify: returns " << conc << std::endl;
877
550
  return conc;
878
}
879
880
150
Node RegExpOpr::getRegExpConcatFixed(Node r, size_t& index)
881
{
882
150
  Assert(r.getKind() == REGEXP_CONCAT);
883
150
  index = 0;
884
300
  Node reLen = RegExpEntail::getFixedLengthForRegexp(r[0]);
885
150
  if (!reLen.isNull())
886
  {
887
96
    return reLen;
888
  }
889
  // try from the opposite end
890
54
  size_t indexE = r.getNumChildren() - 1;
891
54
  reLen = RegExpEntail::getFixedLengthForRegexp(r[indexE]);
892
54
  if (!reLen.isNull())
893
  {
894
33
    index = indexE;
895
33
    return reLen;
896
  }
897
21
  return Node::null();
898
}
899
900
44
Node RegExpOpr::reduceRegExpNeg(Node mem)
901
{
902
44
  Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP);
903
88
  Node s = mem[0][0];
904
88
  Node r = mem[0][1];
905
44
  NodeManager* nm = NodeManager::currentNM();
906
44
  Kind k = r.getKind();
907
88
  Node zero = nm->mkConst(Rational(0));
908
44
  Node conc;
909
44
  if (k == REGEXP_CONCAT)
910
  {
911
    // do not use length entailment, call regular expression concat
912
44
    Node reLen;
913
22
    size_t i = 0;
914
22
    conc = reduceRegExpNegConcatFixed(mem, reLen, i);
915
  }
916
22
  else if (k == REGEXP_STAR)
917
  {
918
40
    Node emp = Word::mkEmptyWord(s.getType());
919
40
    Node lens = nm->mkNode(STRING_LENGTH, s);
920
40
    Node sne = s.eqNode(emp).negate();
921
40
    Node b1 = nm->mkBoundVar(nm->integerType());
922
40
    Node b1v = nm->mkNode(BOUND_VAR_LIST, b1);
923
    Node g1 =
924
40
        nm->mkNode(AND, nm->mkNode(GT, b1, zero), nm->mkNode(GEQ, lens, b1));
925
    // internal
926
40
    Node s1 = nm->mkNode(STRING_SUBSTR, s, zero, b1);
927
40
    Node s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1));
928
40
    Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[0]).negate();
929
40
    Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r).negate();
930
931
20
    conc = nm->mkNode(OR, s1r1, s2r2);
932
20
    conc = nm->mkNode(IMPLIES, g1, conc);
933
    // must mark as an internal quantifier
934
20
    conc = utils::mkForallInternal(b1v, conc);
935
20
    conc = nm->mkNode(AND, sne, conc);
936
  }
937
  else
938
  {
939
2
    Assert(!utils::isRegExpKind(k));
940
  }
941
88
  return conc;
942
}
943
944
142
Node RegExpOpr::reduceRegExpNegConcatFixed(Node mem, Node reLen, size_t index)
945
{
946
142
  Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP);
947
284
  Node s = mem[0][0];
948
284
  Node r = mem[0][1];
949
142
  NodeManager* nm = NodeManager::currentNM();
950
142
  Assert(r.getKind() == REGEXP_CONCAT);
951
284
  Node zero = nm->mkConst(Rational(0));
952
  // The following simplification states that
953
  //    ~( s in R1 ++ R2 ++... ++ Rn )
954
  // is equivalent to
955
  //    forall x.
956
  //      0 <= x <= len(s) =>
957
  //        ~(substr(s,0,x) in R1) OR ~(substr(s,x,len(s)-x) in R2 ++ ... ++ Rn)
958
  // Index is the child index of r that we are stripping off, which is either
959
  // from the beginning or the end.
960
142
  Assert(index == 0 || index == r.getNumChildren() - 1);
961
284
  Node lens = nm->mkNode(STRING_LENGTH, s);
962
284
  Node b1;
963
284
  Node b1v;
964
284
  Node guard;
965
142
  if (reLen.isNull())
966
  {
967
22
    b1 = SkolemCache::mkIndexVar(mem);
968
22
    b1v = nm->mkNode(BOUND_VAR_LIST, b1);
969
66
    guard = nm->mkNode(AND,
970
44
                       nm->mkNode(GEQ, b1, zero),
971
44
                       nm->mkNode(GEQ, nm->mkNode(STRING_LENGTH, s), b1));
972
  }
973
  else
974
  {
975
120
    b1 = reLen;
976
  }
977
284
  Node s1;
978
284
  Node s2;
979
142
  if (index == 0)
980
  {
981
112
    s1 = nm->mkNode(STRING_SUBSTR, s, zero, b1);
982
112
    s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1));
983
  }
984
  else
985
  {
986
30
    s1 = nm->mkNode(STRING_SUBSTR, s, nm->mkNode(MINUS, lens, b1), b1);
987
30
    s2 = nm->mkNode(STRING_SUBSTR, s, zero, nm->mkNode(MINUS, lens, b1));
988
  }
989
284
  Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[index]).negate();
990
284
  std::vector<Node> nvec;
991
578
  for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; i++)
992
  {
993
436
    if (i != index)
994
    {
995
294
      nvec.push_back(r[i]);
996
    }
997
  }
998
284
  Node r2 = nvec.size() == 1 ? nvec[0] : nm->mkNode(REGEXP_CONCAT, nvec);
999
284
  Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r2).negate();
1000
142
  Node conc = nm->mkNode(OR, s1r1, s2r2);
1001
142
  if (!b1v.isNull())
1002
  {
1003
22
    conc = nm->mkNode(OR, guard.negate(), conc);
1004
    // must mark as an internal quantifier
1005
22
    conc = utils::mkForallInternal(b1v, conc);
1006
  }
1007
284
  return conc;
1008
}
1009
1010
699
Node RegExpOpr::reduceRegExpPos(Node mem,
1011
                                SkolemCache* sc,
1012
                                std::vector<Node>& newSkolems)
1013
{
1014
699
  Assert(mem.getKind() == STRING_IN_REGEXP);
1015
1398
  Node s = mem[0];
1016
1398
  Node r = mem[1];
1017
699
  NodeManager* nm = NodeManager::currentNM();
1018
699
  Kind k = r.getKind();
1019
699
  Node conc;
1020
699
  if (k == REGEXP_CONCAT)
1021
  {
1022
966
    std::vector<Node> nvec;
1023
966
    std::vector<Node> cc;
1024
483
    SkolemManager* sm = nm->getSkolemManager();
1025
    // Look up skolems for each of the components. If sc has optimizations
1026
    // enabled, this will return arguments of str.to_re.
1027
1968
    for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; ++i)
1028
    {
1029
1485
      if (r[i].getKind() == STRING_TO_REGEXP)
1030
      {
1031
        // optimization, just take the body
1032
466
        newSkolems.push_back(r[i][0]);
1033
      }
1034
      else
1035
      {
1036
2038
        Node ivalue = nm->mkConst(Rational(i));
1037
        Node sk = sm->mkSkolemFunction(SkolemFunId::RE_UNFOLD_POS_COMPONENT,
1038
2038
                                       s.getType(),
1039
4076
                                       {mem[0], mem[1], ivalue});
1040
1019
        newSkolems.push_back(sk);
1041
1019
        nvec.push_back(nm->mkNode(STRING_IN_REGEXP, newSkolems[i], r[i]));
1042
      }
1043
    }
1044
    // (str.in_re x (re.++ R0 .... Rn)) =>
1045
    // (and (= x (str.++ k0 ... kn)) (str.in_re k0 R0) ... (str.in_re kn Rn) )
1046
966
    Node lem = s.eqNode(nm->mkNode(STRING_CONCAT, newSkolems));
1047
483
    nvec.insert(nvec.begin(), lem);
1048
483
    conc = nvec.size() == 1 ? nvec[0] : nm->mkNode(AND, nvec);
1049
  }
1050
216
  else if (k == REGEXP_STAR)
1051
  {
1052
424
    Node emp = Word::mkEmptyWord(s.getType());
1053
424
    Node se = s.eqNode(emp);
1054
424
    Node sinr = nm->mkNode(STRING_IN_REGEXP, s, r[0]);
1055
424
    Node reExpand = nm->mkNode(REGEXP_CONCAT, r[0], r, r[0]);
1056
424
    Node sinRExp = nm->mkNode(STRING_IN_REGEXP, s, reExpand);
1057
    // We unfold `x in R*` by considering three cases: `x` is empty, `x`
1058
    // is matched by `R`, or `x` is matched by two or more `R`s. For the
1059
    // last case, `x` will break into three pieces, making the beginning
1060
    // and the end each match `R` and the middle match `R*`. Matching the
1061
    // beginning and the end with `R` allows us to reason about the
1062
    // beginning and the end of `x` simultaneously.
1063
    //
1064
    // x in R* ---> (x = "") v (x in R) v (x in (re.++ R (re.* R) R))
1065
1066
    // We also immediately unfold the last disjunct for re.*. The advantage
1067
    // of doing this is that we use the same scheme for skolems above.
1068
424
    std::vector<Node> newSkolemsC;
1069
212
    sinRExp = reduceRegExpPos(sinRExp, sc, newSkolemsC);
1070
212
    Assert(newSkolemsC.size() == 3);
1071
    // make the return lemma
1072
    // can also assume the component match the first and last R are non-empty.
1073
    // This means that the overall conclusion is:
1074
    //   (x = "") v (x in R) v (x = (str.++ k1 k2 k3) ^
1075
    //                          k1 in R ^ k2 in (re.* R) ^ k3 in R ^
1076
    //                          k1 != ""  ^ k3 != "")
1077
212
    conc = nm->mkNode(OR,
1078
                      se,
1079
                      sinr,
1080
848
                      nm->mkNode(AND,
1081
                                 sinRExp,
1082
424
                                 newSkolemsC[0].eqNode(emp).negate(),
1083
424
                                 newSkolemsC[2].eqNode(emp).negate()));
1084
  }
1085
  else
1086
  {
1087
4
    Assert(!utils::isRegExpKind(k));
1088
  }
1089
1398
  return conc;
1090
}
1091
1092
bool RegExpOpr::isPairNodesInSet(std::set< PairNodes > &s, Node n1, Node n2) {
1093
  for(std::set< PairNodes >::const_iterator itr = s.begin();
1094
      itr != s.end(); ++itr) {
1095
    if((itr->first == n1 && itr->second == n2) ||
1096
       (itr->first == n2 && itr->second == n1)) {
1097
      return true;
1098
    }
1099
  }
1100
  return false;
1101
}
1102
1103
654
bool RegExpOpr::containC2(unsigned cnt, Node n) {
1104
654
  if(n.getKind() == kind::REGEXP_RV) {
1105
202
    Assert(n[0].getConst<Rational>() <= Rational(String::maxSize()))
1106
        << "Exceeded UINT32_MAX in RegExpOpr::containC2";
1107
202
    unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt();
1108
202
    return cnt == y;
1109
452
  } else if(n.getKind() == kind::REGEXP_CONCAT) {
1110
102
    for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1111
80
      if(containC2(cnt, n[i])) {
1112
12
        return true;
1113
      }
1114
    }
1115
418
  } else if(n.getKind() == kind::REGEXP_STAR) {
1116
34
    return containC2(cnt, n[0]);
1117
384
  } else if(n.getKind() == kind::REGEXP_LOOP) {
1118
    return containC2(cnt, n[0]);
1119
384
  } else if(n.getKind() == kind::REGEXP_UNION) {
1120
118
    for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1121
88
      if(containC2(cnt, n[i])) {
1122
12
        return true;
1123
      }
1124
    }
1125
  }
1126
394
  return false;
1127
}
1128
198
Node RegExpOpr::convert1(unsigned cnt, Node n) {
1129
198
  Trace("regexp-debug") << "Converting " << n << " at " << cnt << "... " << std::endl;
1130
396
  Node r1, r2;
1131
198
  convert2(cnt, n, r1, r2);
1132
198
  Trace("regexp-debug") << "... getting r1=" << r1 << ", and r2=" << r2 << std::endl;
1133
198
  Node ret = r1==d_emptySingleton ? r2 : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
1134
198
     NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, r1), r2);
1135
198
  ret = rewrite(ret);
1136
198
  Trace("regexp-debug") << "... done convert at " << cnt << ", with return " << ret << std::endl;
1137
396
  return ret;
1138
}
1139
421
void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) {
1140
421
  if(n == d_emptyRegexp) {
1141
24
    r1 = d_emptyRegexp;
1142
24
    r2 = d_emptyRegexp;
1143
24
    return;
1144
397
  } else if(n == d_emptySingleton) {
1145
25
    r1 = d_emptySingleton;
1146
25
    r2 = d_emptySingleton;
1147
  }
1148
397
  Kind nk = n.getKind();
1149
397
  if (nk == REGEXP_RV)
1150
  {
1151
61
    Assert(n[0].getConst<Rational>() <= Rational(String::maxSize()))
1152
        << "Exceeded UINT32_MAX in RegExpOpr::convert2";
1153
61
    unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt();
1154
61
    r1 = d_emptySingleton;
1155
61
    if(cnt == y) {
1156
61
      r2 = d_emptyRegexp;
1157
    } else {
1158
      r2 = n;
1159
    }
1160
  }
1161
336
  else if (nk == REGEXP_CONCAT)
1162
  {
1163
212
    bool flag = true;
1164
424
    std::vector<Node> vr1, vr2;
1165
591
    for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1166
452
      if(containC2(cnt, n[i])) {
1167
146
        Node t1, t2;
1168
73
        convert2(cnt, n[i], t1, t2);
1169
73
        vr1.push_back(t1);
1170
73
        r1 = vr1.size()==0 ? d_emptyRegexp : vr1.size()==1 ? vr1[0] :
1171
             NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr1);
1172
73
        vr2.push_back(t2);
1173
73
        for( unsigned j=i+1; j<n.getNumChildren(); j++ ) {
1174
          vr2.push_back(n[j]);
1175
        }
1176
73
        r2 = vr2.size()==0 ? d_emptyRegexp : vr2.size()==1 ? vr2[0] :
1177
             NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr2);
1178
73
        flag = false;
1179
73
        break;
1180
      } else {
1181
379
        vr1.push_back(n[i]);
1182
      }
1183
    }
1184
212
    if(flag) {
1185
139
      r1 = d_emptySingleton;
1186
139
      r2 = n;
1187
    }
1188
  }
1189
124
  else if (nk == REGEXP_UNION)
1190
  {
1191
126
    std::vector<Node> vr1, vr2;
1192
213
    for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1193
300
      Node t1, t2;
1194
150
      convert2(cnt, n[i], t1, t2);
1195
150
      vr1.push_back(t1);
1196
150
      vr2.push_back(t2);
1197
    }
1198
63
    r1 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr1);
1199
63
    r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr2);
1200
  }
1201
61
  else if (nk == STRING_TO_REGEXP || nk == REGEXP_SIGMA || nk == REGEXP_RANGE
1202
           || nk == REGEXP_COMPLEMENT || nk == REGEXP_LOOP)
1203
  {
1204
    // this leaves n unchanged
1205
61
    r1 = d_emptySingleton;
1206
61
    r2 = n;
1207
  }
1208
  else
1209
  {
1210
    //is it possible?
1211
    Unreachable();
1212
  }
1213
}
1214
1215
317
Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > cache, unsigned cnt ) {
1216
  //Assert(checkConstRegExp(r1) && checkConstRegExp(r2));
1217
317
  if(r1 > r2) {
1218
60
    TNode tmpNode = r1;
1219
30
    r1 = r2;
1220
30
    r2 = tmpNode;
1221
  }
1222
317
  NodeManager* nm = NodeManager::currentNM();
1223
317
  Trace("regexp-int") << "Starting INTERSECT(" << cnt << "):\n  "<< mkString(r1) << ",\n  " << mkString(r2) << std::endl;
1224
634
  std::pair < Node, Node > p(r1, r2);
1225
317
  std::map < PairNodes, Node >::const_iterator itr = d_inter_cache.find(p);
1226
317
  Node rNode;
1227
317
  if(itr != d_inter_cache.end()) {
1228
38
    rNode = itr->second;
1229
  } else {
1230
279
    Trace("regexp-int-debug") << " ... not in cache" << std::endl;
1231
279
    if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) {
1232
      Trace("regexp-int-debug") << " ... one is empty set" << std::endl;
1233
      rNode = d_emptyRegexp;
1234
279
    } else if(r1 == d_emptySingleton || r2 == d_emptySingleton) {
1235
24
      Trace("regexp-int-debug") << " ... one is empty singleton" << std::endl;
1236
48
      Node exp;
1237
24
      int r = delta((r1 == d_emptySingleton ? r2 : r1), exp);
1238
24
      if(r == 0) {
1239
        //TODO: variable
1240
        Unreachable();
1241
24
      } else if(r == 1) {
1242
18
        rNode = d_emptySingleton;
1243
      } else {
1244
6
        rNode = d_emptyRegexp;
1245
      }
1246
255
    } else if(r1 == r2) {
1247
      Trace("regexp-int-debug") << " ... equal" << std::endl;
1248
      rNode = r1; //convert1(cnt, r1);
1249
    } else {
1250
255
      Trace("regexp-int-debug") << " ... normal checking" << std::endl;
1251
255
      std::map< PairNodes, Node >::const_iterator itrcache = cache.find(p);
1252
255
      if(itrcache != cache.end()) {
1253
57
        rNode = itrcache->second;
1254
      } else {
1255
198
        Trace("regexp-int-debug") << " ... normal without cache" << std::endl;
1256
396
        std::vector<unsigned> cset;
1257
396
        std::set<unsigned> cset1, cset2;
1258
396
        std::set< Node > vset1, vset2;
1259
198
        firstChars(r1, cset1, vset1);
1260
198
        firstChars(r2, cset2, vset2);
1261
198
        Trace("regexp-int-debug") << " ... got fset" << std::endl;
1262
198
        std::set_intersection(cset1.begin(), cset1.end(), cset2.begin(), cset2.end(),
1263
198
             std::inserter(cset, cset.begin()));
1264
396
        std::vector< Node > vec_nodes;
1265
396
        Node delta_exp;
1266
198
        Trace("regexp-int-debug") << " ... try delta" << std::endl;
1267
198
        int flag = delta(r1, delta_exp);
1268
198
        int flag2 = delta(r2, delta_exp);
1269
198
        Trace("regexp-int-debug") << " ... delta1=" << flag << ", delta2=" << flag2 << std::endl;
1270
198
        if(flag != 2 && flag2 != 2) {
1271
21
          if(flag == 1 && flag2 == 1) {
1272
21
            vec_nodes.push_back(d_emptySingleton);
1273
          } else {
1274
            //TODO: variable
1275
            Unreachable();
1276
          }
1277
        }
1278
198
        if(Trace.isOn("regexp-int-debug")) {
1279
          Trace("regexp-int-debug") << "Try CSET(" << cset.size() << ") = {";
1280
          for (std::vector<unsigned>::const_iterator it = cset.begin();
1281
               it != cset.end();
1282
               ++it)
1283
          {
1284
            if (it != cset.begin())
1285
            {
1286
              Trace("regexp-int-debug") << ", ";
1287
            }
1288
            Trace("regexp-int-debug") << (*it);
1289
          }
1290
          Trace("regexp-int-debug") << std::endl;
1291
        }
1292
396
        std::map< PairNodes, Node > cacheX;
1293
454
        for (std::vector<unsigned>::const_iterator it = cset.begin();
1294
454
             it != cset.end();
1295
             ++it)
1296
        {
1297
512
          std::vector<unsigned> cvec;
1298
256
          cvec.push_back(*it);
1299
512
          String c(cvec);
1300
256
          Trace("regexp-int-debug") << "Try character " << c << " ... " << std::endl;
1301
512
          Node r1l = derivativeSingle(r1, c);
1302
512
          Node r2l = derivativeSingle(r2, c);
1303
256
          Trace("regexp-int-debug") << "  ... got partial(r1,c) = " << mkString(r1l) << std::endl;
1304
256
          Trace("regexp-int-debug") << "  ... got partial(r2,c) = " << mkString(r2l) << std::endl;
1305
512
          Node rt;
1306
1307
256
          if(r1l > r2l) {
1308
68
            Node tnode = r1l;
1309
34
            r1l = r2l; r2l = tnode;
1310
          }
1311
512
          PairNodes pp(r1l, r2l);
1312
256
          std::map< PairNodes, Node >::const_iterator itr2 = cacheX.find(pp);
1313
256
          if(itr2 != cacheX.end()) {
1314
8
            rt = itr2->second;
1315
          } else {
1316
496
            std::map< PairNodes, Node > cache2(cache);
1317
496
            cache2[p] = NodeManager::currentNM()->mkNode(
1318
                kind::REGEXP_RV,
1319
496
                NodeManager::currentNM()->mkConst(cvc5::Rational(cnt)));
1320
248
            rt = intersectInternal(r1l, r2l, cache2, cnt+1);
1321
248
            cacheX[ pp ] = rt;
1322
          }
1323
1324
512
          rt = rewrite(
1325
1024
              nm->mkNode(kind::REGEXP_CONCAT,
1326
512
                         nm->mkNode(kind::STRING_TO_REGEXP, nm->mkConst(c)),
1327
                         rt));
1328
1329
256
          Trace("regexp-int-debug") << "  ... got p(r1,c) && p(r2,c) = " << mkString(rt) << std::endl;
1330
256
          vec_nodes.push_back(rt);
1331
        }
1332
396
        rNode = rewrite(vec_nodes.size() == 0
1333
725
                            ? d_emptyRegexp
1334
194
                            : vec_nodes.size() == 1
1335
135
                                  ? vec_nodes[0]
1336
                                  : nm->mkNode(kind::REGEXP_UNION, vec_nodes));
1337
198
        rNode = convert1(cnt, rNode);
1338
198
        rNode = rewrite(rNode);
1339
      }
1340
    }
1341
279
    Trace("regexp-int-debug") << "  ... try testing no RV of " << mkString(rNode) << std::endl;
1342
279
    if (!expr::hasSubtermKind(REGEXP_RV, rNode))
1343
    {
1344
115
      d_inter_cache[p] = rNode;
1345
    }
1346
  }
1347
317
  Trace("regexp-int") << "End(" << cnt << ") of INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl;
1348
634
  return rNode;
1349
}
1350
1351
138
Node RegExpOpr::removeIntersection(Node r) {
1352
138
  Assert(checkConstRegExp(r));
1353
138
  NodeManager* nm = NodeManager::currentNM();
1354
276
  std::unordered_map<TNode, Node> visited;
1355
138
  std::unordered_map<TNode, Node>::iterator it;
1356
276
  std::vector<TNode> visit;
1357
276
  TNode cur;
1358
138
  visit.push_back(r);
1359
1948
  do
1360
  {
1361
2086
    cur = visit.back();
1362
2086
    visit.pop_back();
1363
2086
    it = visited.find(cur);
1364
1365
2086
    if (it == visited.end())
1366
    {
1367
986
      visited[cur] = Node::null();
1368
986
      visit.push_back(cur);
1369
1948
      for (const Node& cn : cur)
1370
      {
1371
962
        visit.push_back(cn);
1372
      }
1373
    }
1374
1100
    else if (it->second.isNull())
1375
    {
1376
986
      Kind ck = cur.getKind();
1377
1972
      Node ret;
1378
986
      bool childChanged = false;
1379
1972
      std::vector<Node> children;
1380
986
      if (cur.getMetaKind() == metakind::PARAMETERIZED)
1381
      {
1382
        children.push_back(cur.getOperator());
1383
      }
1384
1948
      for (const Node& cn : cur)
1385
      {
1386
962
        it = visited.find(cn);
1387
962
        Assert(it != visited.end());
1388
962
        Assert(!it->second.isNull());
1389
962
        if (ck == REGEXP_INTER)
1390
        {
1391
4
          if (ret.isNull())
1392
          {
1393
2
            ret = it->second;
1394
          }
1395
          else
1396
          {
1397
2
            ret = intersect(ret, it->second);
1398
          }
1399
        }
1400
        else
1401
        {
1402
          // will construct below
1403
958
          childChanged = childChanged || cn != it->second;
1404
958
          children.push_back(it->second);
1405
        }
1406
      }
1407
986
      if (ck != REGEXP_INTER)
1408
      {
1409
984
        if (childChanged)
1410
        {
1411
2
          ret = nm->mkNode(cur.getKind(), children);
1412
        }
1413
        else
1414
        {
1415
982
          ret = cur;
1416
        }
1417
      }
1418
986
      visited[cur] = ret;
1419
    }
1420
2086
  } while (!visit.empty());
1421
138
  Assert(visited.find(r) != visited.end());
1422
138
  Assert(!visited.find(r)->second.isNull());
1423
138
  if (Trace.isOn("regexp-intersect"))
1424
  {
1425
    Trace("regexp-intersect") << "Remove INTERSECTION( " << mkString(r)
1426
                              << " ) = " << mkString(visited[r]) << std::endl;
1427
  }
1428
276
  return visited[r];
1429
}
1430
1431
69
Node RegExpOpr::intersect(Node r1, Node r2)
1432
{
1433
69
  if (!checkConstRegExp(r1) || !checkConstRegExp(r2))
1434
  {
1435
    return Node::null();
1436
  }
1437
138
  Node rr1 = removeIntersection(r1);
1438
138
  Node rr2 = removeIntersection(r2);
1439
138
  std::map<PairNodes, Node> cache;
1440
69
  Trace("regexp-intersect-node") << "Intersect (1): " << rr1 << std::endl;
1441
69
  Trace("regexp-intersect-node") << "Intersect (2): " << rr2 << std::endl;
1442
138
  Trace("regexp-intersect") << "Start INTERSECTION(\n\t" << mkString(r1)
1443
69
                            << ",\n\t" << mkString(r2) << ")" << std::endl;
1444
138
  Node retNode = intersectInternal(rr1, rr2, cache, 1);
1445
138
  Trace("regexp-intersect")
1446
138
      << "End INTERSECTION(\n\t" << mkString(r1) << ",\n\t" << mkString(r2)
1447
69
      << ") =\n\t" << mkString(retNode) << std::endl;
1448
69
  Trace("regexp-intersect-node") << "Intersect finished." << std::endl;
1449
69
  return retNode;
1450
}
1451
1452
//printing
1453
19621
std::string RegExpOpr::niceChar(Node r) {
1454
19621
  if(r.isConst()) {
1455
38910
    std::string s = r.getConst<String>().toString();
1456
19455
    return s == "." ? "\\." : s;
1457
  } else {
1458
332
    std::string ss = "$" + r.toString();
1459
166
    return ss;
1460
  }
1461
}
1462
37194
std::string RegExpOpr::mkString( Node r ) {
1463
37194
  std::string retStr;
1464
37194
  if(r.isNull()) {
1465
    retStr = "\\E";
1466
  } else {
1467
37194
    int k = r.getKind();
1468
37194
    switch( k ) {
1469
472
      case kind::REGEXP_EMPTY: {
1470
472
        retStr += "\\E";
1471
472
        break;
1472
      }
1473
434
      case kind::REGEXP_SIGMA: {
1474
434
        retStr += ".";
1475
434
        break;
1476
      }
1477
18511
      case kind::STRING_TO_REGEXP: {
1478
37022
        std::string tmp( niceChar( r[0] ) );
1479
18511
        retStr += tmp.size()==1? tmp : "(" + tmp + ")";
1480
18511
        break;
1481
      }
1482
5506
      case kind::REGEXP_CONCAT: {
1483
5506
        retStr += "(";
1484
19099
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
1485
          //if(i != 0) retStr += ".";
1486
13593
          retStr += mkString( r[i] );
1487
        }
1488
5506
        retStr += ")";
1489
5506
        break;
1490
      }
1491
3902
      case kind::REGEXP_UNION: {
1492
3902
        retStr += "(";
1493
12428
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
1494
8526
          if(i != 0) retStr += "|";
1495
8526
          retStr += mkString( r[i] );
1496
        }
1497
3902
        retStr += ")";
1498
3902
        break;
1499
      }
1500
68
      case kind::REGEXP_INTER: {
1501
68
        retStr += "(";
1502
204
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
1503
136
          if(i != 0) retStr += "&";
1504
136
          retStr += mkString( r[i] );
1505
        }
1506
68
        retStr += ")";
1507
68
        break;
1508
      }
1509
7110
      case kind::REGEXP_STAR: {
1510
7110
        retStr += mkString( r[0] );
1511
7110
        retStr += "*";
1512
7110
        break;
1513
      }
1514
      case kind::REGEXP_PLUS: {
1515
        retStr += mkString( r[0] );
1516
        retStr += "+";
1517
        break;
1518
      }
1519
      case kind::REGEXP_OPT: {
1520
        retStr += mkString( r[0] );
1521
        retStr += "?";
1522
        break;
1523
      }
1524
555
      case kind::REGEXP_RANGE: {
1525
555
        retStr += "[";
1526
555
        retStr += niceChar( r[0] );
1527
555
        retStr += "-";
1528
555
        retStr += niceChar( r[1] );
1529
555
        retStr += "]";
1530
555
        break;
1531
      }
1532
      case kind::REGEXP_LOOP: {
1533
        uint32_t l = utils::getLoopMinOccurrences(r);
1534
        std::stringstream ss;
1535
        ss << "(" << mkString(r[0]) << "){" << l << ",";
1536
        if(r.getNumChildren() == 3) {
1537
          uint32_t u = utils::getLoopMaxOccurrences(r);
1538
          ss << u;
1539
        }
1540
        ss << "}";
1541
        retStr += ss.str();
1542
        break;
1543
      }
1544
532
      case kind::REGEXP_RV: {
1545
532
        retStr += "<";
1546
532
        retStr += r[0].getConst<Rational>().getNumerator().toString();
1547
532
        retStr += ">";
1548
532
        break;
1549
      }
1550
98
      case REGEXP_COMPLEMENT:
1551
      {
1552
98
        retStr += "^(";
1553
98
        retStr += mkString(r[0]);
1554
98
        retStr += ")";
1555
98
        break;
1556
      }
1557
6
      default:
1558
      {
1559
12
        std::stringstream ss;
1560
6
        ss << r;
1561
6
        retStr = ss.str();
1562
6
        Assert(!utils::isRegExpKind(r.getKind()));
1563
6
        break;
1564
      }
1565
    }
1566
  }
1567
1568
37194
  return retStr;
1569
}
1570
1571
1310
bool RegExpOpr::regExpIncludes(Node r1, Node r2)
1572
{
1573
1310
  const auto& it = d_inclusionCache.find(std::make_pair(r1, r2));
1574
1310
  if (it != d_inclusionCache.end())
1575
  {
1576
728
    return (*it).second;
1577
  }
1578
582
  bool result = RegExpEntail::regExpIncludes(r1, r2);
1579
582
  d_inclusionCache[std::make_pair(r1, r2)] = result;
1580
582
  return result;
1581
}
1582
1583
}  // namespace strings
1584
}  // namespace theory
1585
31137
}  // namespace cvc5