GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/theory/strings/regexp_operation.cpp Lines: 721 998 72.2 %
Date: 2021-03-22 Branches: 1525 4222 36.1 %

Line Exec Source
1
/*********************                                                        */
2
/*! \file regexp_operation.cpp
3
 ** \verbatim
4
 ** Top contributors (to current version):
5
 **   Tianyi Liang, Andrew Reynolds, Mathias Preiner
6
 ** This file is part of the CVC4 project.
7
 ** Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 ** in the top-level source directory and their institutional affiliations.
9
 ** All rights reserved.  See the file COPYING in the top-level source
10
 ** directory for licensing information.\endverbatim
11
 **
12
 ** \brief Symbolic Regular Expresion Operations
13
 **
14
 ** Symbolic Regular Expresion Operations
15
 **/
16
17
#include "theory/strings/regexp_operation.h"
18
19
#include <sstream>
20
21
#include "expr/node_algorithm.h"
22
#include "options/strings_options.h"
23
#include "theory/rewriter.h"
24
#include "theory/strings/regexp_entail.h"
25
#include "theory/strings/theory_strings_utils.h"
26
#include "theory/strings/word.h"
27
28
using namespace CVC4::kind;
29
30
namespace CVC4 {
31
namespace theory {
32
namespace strings {
33
34
8999
RegExpOpr::RegExpOpr(SkolemCache* sc)
35
    : d_true(NodeManager::currentNM()->mkConst(true)),
36
      d_false(NodeManager::currentNM()->mkConst(false)),
37
      d_emptyRegexp(NodeManager::currentNM()->mkNode(kind::REGEXP_EMPTY,
38
17998
                                                     std::vector<Node>{})),
39
17998
      d_zero(NodeManager::currentNM()->mkConst(::CVC4::Rational(0))),
40
17998
      d_one(NodeManager::currentNM()->mkConst(::CVC4::Rational(1))),
41
      d_sigma(NodeManager::currentNM()->mkNode(kind::REGEXP_SIGMA,
42
17998
                                               std::vector<Node>{})),
43
      d_sigma_star(
44
          NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, d_sigma)),
45
80991
      d_sc(sc)
46
{
47
8999
  d_emptyString = Word::mkEmptyWord(NodeManager::currentNM()->stringType());
48
49
8999
  d_emptySingleton =
50
17998
      NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, d_emptyString);
51
8999
  d_lastchar = utils::getAlphabetCardinality() - 1;
52
8999
}
53
54
8996
RegExpOpr::~RegExpOpr() {}
55
56
592
bool RegExpOpr::checkConstRegExp( Node r ) {
57
592
  Assert(r.getType().isRegExp());
58
1184
  Trace("strings-regexp-cstre")
59
592
      << "RegExpOpr::checkConstRegExp /" << mkString(r) << "/" << std::endl;
60
592
  RegExpConstType rct = getRegExpConstType(r);
61
592
  return rct != RE_C_VARIABLE;
62
}
63
64
1069
RegExpConstType RegExpOpr::getRegExpConstType(Node r)
65
{
66
1069
  Assert(r.getType().isRegExp());
67
1069
  std::unordered_map<Node, RegExpConstType, NodeHashFunction>::iterator it;
68
2138
  std::vector<TNode> visit;
69
2138
  TNode cur;
70
1069
  visit.push_back(r);
71
1345
  do
72
  {
73
2414
    cur = visit.back();
74
2414
    visit.pop_back();
75
2414
    it = d_constCache.find(cur);
76
77
2414
    Kind ck = cur.getKind();
78
2414
    if (it == d_constCache.end())
79
    {
80
772
      if (ck == STRING_TO_REGEXP)
81
      {
82
480
        Node tmp = Rewriter::rewrite(cur[0]);
83
240
        d_constCache[cur] =
84
240
            tmp.isConst() ? RE_C_CONRETE_CONSTANT : RE_C_VARIABLE;
85
      }
86
532
      else if (ck == REGEXP_SIGMA || ck == REGEXP_RANGE)
87
      {
88
79
        d_constCache[cur] = RE_C_CONSTANT;
89
      }
90
453
      else if (!utils::isRegExpKind(ck))
91
      {
92
        // non-regular expression applications, e.g. function applications
93
        // with regular expression return type are treated as variables.
94
4
        d_constCache[cur] = RE_C_VARIABLE;
95
      }
96
      else
97
      {
98
449
        d_constCache[cur] = RE_C_UNKNOWN;
99
449
        visit.push_back(cur);
100
449
        visit.insert(visit.end(), cur.begin(), cur.end());
101
      }
102
    }
103
1642
    else if (it->second == RE_C_UNKNOWN)
104
    {
105
449
      RegExpConstType ret = ck == REGEXP_COMPLEMENT ? RE_C_CONSTANT : RE_C_CONRETE_CONSTANT;
106
1345
      for (const Node& cn : cur)
107
      {
108
896
        it = d_constCache.find(cn);
109
896
        Assert(it != d_constCache.end());
110
896
        if (it->second > ret)
111
        {
112
188
          ret = it->second;
113
        }
114
      }
115
449
      d_constCache[cur] = ret;
116
    }
117
2414
  } while (!visit.empty());
118
1069
  Assert(d_constCache.find(r) != d_constCache.end());
119
2138
  return d_constCache[r];
120
}
121
122
// 0-unknown, 1-yes, 2-no
123
1311
int RegExpOpr::delta( Node r, Node &exp ) {
124
  std::map<Node, std::pair<int, Node> >::const_iterator itd =
125
1311
      d_delta_cache.find(r);
126
1311
  if (itd != d_delta_cache.end())
127
  {
128
    // already computed
129
803
    exp = itd->second.second;
130
803
    return itd->second.first;
131
  }
132
508
  Trace("regexp-delta") << "RegExpOpr::delta: " << r << std::endl;
133
508
  int ret = 0;
134
508
  NodeManager* nm = NodeManager::currentNM();
135
508
  Kind k = r.getKind();
136
508
  switch (k)
137
  {
138
    case REGEXP_EMPTY:
139
    case REGEXP_SIGMA:
140
    case REGEXP_RANGE:
141
    {
142
      // does not contain empty string
143
      ret = 2;
144
      break;
145
    }
146
178
    case STRING_TO_REGEXP:
147
    {
148
356
      Node tmp = Rewriter::rewrite(r[0]);
149
178
      if (tmp.isConst())
150
      {
151
174
        if (tmp == d_emptyString)
152
        {
153
10
          ret = 1;
154
        } else {
155
164
          ret = 2;
156
        }
157
      }
158
      else
159
      {
160
4
        ret = 0;
161
4
        if (tmp.getKind() == STRING_CONCAT)
162
        {
163
          for (const Node& tmpc : tmp)
164
          {
165
            if (tmpc.isConst())
166
            {
167
              ret = 2;
168
              break;
169
            }
170
          }
171
        }
172
4
        if (ret == 0)
173
        {
174
4
          exp = r[0].eqNode(d_emptyString);
175
        }
176
      }
177
178
      break;
178
    }
179
253
    case REGEXP_CONCAT:
180
    case REGEXP_UNION:
181
    case REGEXP_INTER:
182
    {
183
      // has there been an unknown child?
184
253
      bool hasUnknownChild = false;
185
506
      std::vector<Node> vec;
186
253
      int checkTmp = k == REGEXP_UNION ? 1 : 2;
187
253
      int retTmp = k == REGEXP_UNION ? 2 : 1;
188
333
      for (const Node& rc : r)
189
      {
190
397
        Node exp2;
191
317
        int tmp = delta(rc, exp2);
192
317
        if (tmp == checkTmp)
193
        {
194
          // return is implied by the child's return value
195
237
          ret = checkTmp;
196
237
          break;
197
        }
198
80
        else if (tmp == 0)
199
        {
200
          // unknown if child contains empty string
201
4
          Assert(!exp2.isNull());
202
4
          vec.push_back(exp2);
203
4
          hasUnknownChild = true;
204
        }
205
      }
206
253
      if (ret != checkTmp)
207
      {
208
16
        if (!hasUnknownChild)
209
        {
210
16
          ret = retTmp;
211
        } else {
212
          Kind kr = k == REGEXP_UNION ? OR : AND;
213
          exp = vec.size() == 1 ? vec[0] : nm->mkNode(kr, vec);
214
        }
215
      }
216
253
      break;
217
    }
218
73
    case REGEXP_STAR:
219
    case REGEXP_OPT:
220
    {
221
      // contains empty string
222
73
      ret = 1;
223
73
      break;
224
    }
225
    case REGEXP_PLUS:
226
    {
227
      ret = delta(r[0], exp);
228
      break;
229
    }
230
    case REGEXP_LOOP:
231
    {
232
      uint32_t lo = utils::getLoopMinOccurrences(r);
233
      if (lo == 0)
234
      {
235
        ret = 1;
236
      }
237
      else
238
      {
239
        ret = delta(r[0], exp);
240
      }
241
      break;
242
    }
243
4
    case REGEXP_COMPLEMENT:
244
    {
245
4
      int tmp = delta(r[0], exp);
246
      // flip the result if known
247
4
      ret = tmp == 0 ? 0 : (3 - tmp);
248
4
      exp = exp.isNull() ? exp : exp.negate();
249
4
      break;
250
    }
251
    default:
252
    {
253
      Assert(!utils::isRegExpKind(k));
254
      break;
255
    }
256
  }
257
508
  if (!exp.isNull())
258
  {
259
4
    exp = Rewriter::rewrite(exp);
260
  }
261
1016
  std::pair<int, Node> p(ret, exp);
262
508
  d_delta_cache[r] = p;
263
1016
  Trace("regexp-delta") << "RegExpOpr::delta returns " << ret << " for " << r
264
508
                        << ", expr = " << exp << std::endl;
265
508
  return ret;
266
}
267
268
// 0-unknown, 1-yes, 2-no
269
24
int RegExpOpr::derivativeS( Node r, CVC4::String c, Node &retNode ) {
270
24
  Assert(c.size() < 2);
271
24
  Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl;
272
273
24
  int ret = 1;
274
24
  retNode = d_emptyRegexp;
275
24
  NodeManager* nm = NodeManager::currentNM();
276
277
48
  PairNodeStr dv = std::make_pair( r, c );
278
24
  if( d_deriv_cache.find( dv ) != d_deriv_cache.end() ) {
279
4
    retNode = d_deriv_cache[dv].first;
280
4
    ret = d_deriv_cache[dv].second;
281
  }
282
20
  else if (c.empty())
283
  {
284
    Node expNode;
285
    ret = delta( r, expNode );
286
    if(ret == 0) {
287
      retNode = NodeManager::currentNM()->mkNode(kind::ITE, expNode, r, d_emptyRegexp);
288
    } else if(ret == 1) {
289
      retNode = r;
290
    }
291
    std::pair< Node, int > p(retNode, ret);
292
    d_deriv_cache[dv] = p;
293
  } else {
294
20
    switch( r.getKind() ) {
295
      case kind::REGEXP_EMPTY: {
296
        ret = 2;
297
        break;
298
      }
299
      case kind::REGEXP_SIGMA: {
300
        retNode = d_emptySingleton;
301
        break;
302
      }
303
      case kind::REGEXP_RANGE: {
304
        CVC4::String a = r[0].getConst<String>();
305
        CVC4::String b = r[1].getConst<String>();
306
        retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp;
307
        break;
308
      }
309
8
      case kind::STRING_TO_REGEXP: {
310
16
        Node tmp = Rewriter::rewrite(r[0]);
311
8
        if(tmp.isConst()) {
312
8
          if(tmp == d_emptyString) {
313
4
            ret = 2;
314
          } else {
315
4
            if (tmp.getConst<String>().front() == c.front())
316
            {
317
              retNode =
318
                  nm->mkNode(STRING_TO_REGEXP,
319
                             Word::getLength(tmp) == 1 ? d_emptyString
320
                                                       : Word::substr(tmp, 1));
321
            } else {
322
4
              ret = 2;
323
            }
324
          }
325
        } else {
326
          ret = 0;
327
          Node rest;
328
          if(tmp.getKind() == kind::STRING_CONCAT) {
329
            Node t2 = tmp[0];
330
            if(t2.isConst()) {
331
              if (t2.getConst<String>().front() == c.front())
332
              {
333
                Node n = nm->mkNode(STRING_TO_REGEXP,
334
                                    Word::getLength(tmp) == 1
335
                                        ? d_emptyString
336
                                        : Word::substr(tmp, 1));
337
                std::vector< Node > vec_nodes;
338
                vec_nodes.push_back(n);
339
                for(unsigned i=1; i<tmp.getNumChildren(); i++) {
340
                  vec_nodes.push_back(tmp[i]);
341
                }
342
                retNode = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes);
343
                ret = 1;
344
              } else {
345
                ret = 2;
346
              }
347
            } else {
348
              tmp = tmp[0];
349
              std::vector< Node > vec_nodes;
350
              for(unsigned i=1; i<tmp.getNumChildren(); i++) {
351
                vec_nodes.push_back(tmp[i]);
352
              }
353
              rest = NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vec_nodes);
354
            }
355
          }
356
          if(ret == 0) {
357
            Node sk = NodeManager::currentNM()->mkSkolem( "rsp", NodeManager::currentNM()->stringType(), "Split RegExp" );
358
            retNode = NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, sk);
359
            if(!rest.isNull()) {
360
              retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, retNode, rest));
361
            }
362
            Node exp = tmp.eqNode(NodeManager::currentNM()->mkNode(kind::STRING_CONCAT,
363
                        NodeManager::currentNM()->mkConst(c), sk));
364
            retNode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, exp, retNode, d_emptyRegexp));
365
          }
366
        }
367
8
        break;
368
      }
369
4
      case kind::REGEXP_CONCAT: {
370
8
        std::vector< Node > vec_nodes;
371
8
        std::vector< Node > delta_nodes;
372
8
        Node dnode = d_true;
373
12
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
374
16
          Node dc;
375
16
          Node exp2;
376
8
          int rt = derivativeS(r[i], c, dc);
377
8
          if(rt != 2) {
378
            if(rt == 0) {
379
              ret = 0;
380
            }
381
            std::vector< Node > vec_nodes2;
382
            if(dc != d_emptySingleton) {
383
              vec_nodes2.push_back( dc );
384
            }
385
            for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
386
              if(r[j] != d_emptySingleton) {
387
                vec_nodes2.push_back( r[j] );
388
              }
389
            }
390
            Node tmp = vec_nodes2.size()==0 ? d_emptySingleton :
391
              vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
392
            if(dnode != d_true) {
393
              tmp = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::ITE, dnode, tmp, d_emptyRegexp));
394
              ret = 0;
395
            }
396
            if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
397
              vec_nodes.push_back( tmp );
398
            }
399
          }
400
16
          Node exp3;
401
8
          int rt2 = delta( r[i], exp3 );
402
8
          if( rt2 == 0 ) {
403
            dnode = Rewriter::rewrite(NodeManager::currentNM()->mkNode(kind::AND, dnode, exp3));
404
8
          } else if( rt2 == 2 ) {
405
            break;
406
          }
407
        }
408
4
        retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
409
              ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
410
4
        if(retNode == d_emptyRegexp) {
411
4
          ret = 2;
412
        }
413
4
        break;
414
      }
415
4
      case kind::REGEXP_UNION: {
416
8
        std::vector< Node > vec_nodes;
417
12
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
418
16
          Node dc;
419
8
          int rt = derivativeS(r[i], c, dc);
420
8
          if(rt == 0) {
421
            ret = 0;
422
          }
423
8
          if(rt != 2) {
424
            if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
425
              vec_nodes.push_back( dc );
426
            }
427
          }
428
          //Trace("regexp-derive") << "RegExp-derive OR R[" << i << "] " << mkString(r[i]) << " returns " << mkString(dc) << std::endl;
429
        }
430
4
        retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
431
              ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
432
4
        if(retNode == d_emptyRegexp) {
433
4
          ret = 2;
434
        }
435
4
        break;
436
      }
437
      case kind::REGEXP_INTER: {
438
        bool flag = true;
439
        bool flag_sg = false;
440
        std::vector< Node > vec_nodes;
441
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
442
          Node dc;
443
          int rt = derivativeS(r[i], c, dc);
444
          if(rt == 0) {
445
            ret = 0;
446
          } else if(rt == 2) {
447
            flag = false;
448
            break;
449
          }
450
          if(dc == d_sigma_star) {
451
            flag_sg = true;
452
          } else {
453
            if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
454
              vec_nodes.push_back( dc );
455
            }
456
          }
457
        }
458
        if(flag) {
459
          if(vec_nodes.size() == 0 && flag_sg) {
460
            retNode = d_sigma_star;
461
          } else {
462
            retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
463
                  ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
464
            if(retNode == d_emptyRegexp) {
465
              ret = 2;
466
            }
467
          }
468
        } else {
469
          retNode = d_emptyRegexp;
470
          ret = 2;
471
        }
472
        break;
473
      }
474
4
      case kind::REGEXP_STAR: {
475
8
        Node dc;
476
4
        ret = derivativeS(r[0], c, dc);
477
4
        retNode = dc==d_emptyRegexp ? dc : (dc==d_emptySingleton ? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r ));
478
4
        break;
479
      }
480
      case kind::REGEXP_LOOP: {
481
        uint32_t l = utils::getLoopMinOccurrences(r);
482
        uint32_t u = utils::getLoopMaxOccurrences(r);
483
        if (l == u && l == 0)
484
        {
485
          ret = 2;
486
          //retNode = d_emptyRegexp;
487
        } else {
488
          Node dc;
489
          ret = derivativeS(r[0], c, dc);
490
          if(dc==d_emptyRegexp) {
491
            Node lop = nm->mkConst(RegExpLoop(l == 0 ? 0 : (l - 1), u - 1));
492
            Node r2 = nm->mkNode(REGEXP_LOOP, lop, r[0]);
493
            retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 );
494
          } else {
495
            retNode = d_emptyRegexp;
496
          }
497
        }
498
        break;
499
      }
500
      case kind::REGEXP_COMPLEMENT:
501
      {
502
        // don't know result
503
        return 0;
504
        break;
505
      }
506
      default: {
507
        Assert(!utils::isRegExpKind(r.getKind()));
508
        return 0;
509
        break;
510
      }
511
    }
512
20
    if(retNode != d_emptyRegexp) {
513
      retNode = Rewriter::rewrite( retNode );
514
    }
515
40
    std::pair< Node, int > p(retNode, ret);
516
20
    d_deriv_cache[dv] = p;
517
  }
518
519
24
  Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl;
520
24
  return ret;
521
}
522
523
970
Node RegExpOpr::derivativeSingle( Node r, CVC4::String c ) {
524
970
  Assert(c.size() < 2);
525
970
  Trace("regexp-derive") << "RegExp-derive starts with /" << mkString( r ) << "/, c=" << c << std::endl;
526
970
  Node retNode = d_emptyRegexp;
527
1940
  PairNodeStr dv = std::make_pair( r, c );
528
970
  NodeManager* nm = NodeManager::currentNM();
529
970
  if( d_dv_cache.find( dv ) != d_dv_cache.end() ) {
530
290
    retNode = d_dv_cache[dv];
531
  }
532
680
  else if (c.empty())
533
  {
534
    Node exp;
535
    int tmp = delta( r, exp );
536
    if(tmp == 0) {
537
      // TODO variable
538
      retNode = d_emptyRegexp;
539
    } else if(tmp == 1) {
540
      retNode = r;
541
    } else {
542
      retNode = d_emptyRegexp;
543
    }
544
  } else {
545
680
    Kind k = r.getKind();
546
680
    switch( k ) {
547
      case kind::REGEXP_EMPTY: {
548
        retNode = d_emptyRegexp;
549
        break;
550
      }
551
      case kind::REGEXP_SIGMA: {
552
        retNode = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
553
        break;
554
      }
555
      case kind::REGEXP_RANGE: {
556
        CVC4::String a = r[0].getConst<String>();
557
        CVC4::String b = r[1].getConst<String>();
558
        retNode = (a <= c && c <= b) ? d_emptySingleton : d_emptyRegexp;
559
        break;
560
      }
561
252
      case kind::STRING_TO_REGEXP: {
562
252
        if(r[0].isConst()) {
563
252
          if(r[0] == d_emptyString) {
564
8
            retNode = d_emptyRegexp;
565
          } else {
566
244
            if (r[0].getConst<String>().front() == c.front())
567
            {
568
188
              retNode = nm->mkNode(STRING_TO_REGEXP,
569
376
                                   Word::getLength(r[0]) == 1
570
376
                                       ? d_emptyString
571
                                       : Word::substr(r[0], 1));
572
            } else {
573
56
              retNode = d_emptyRegexp;
574
            }
575
          }
576
        } else {
577
          // TODO variable
578
          retNode = d_emptyRegexp;
579
        }
580
252
        break;
581
      }
582
269
      case kind::REGEXP_CONCAT: {
583
538
        Node rees = NodeManager::currentNM()->mkNode( kind::STRING_TO_REGEXP, d_emptyString );
584
538
        std::vector< Node > vec_nodes;
585
357
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
586
435
          Node dc = derivativeSingle(r[i], c);
587
347
          if(dc != d_emptyRegexp) {
588
550
            std::vector< Node > vec_nodes2;
589
275
            if(dc != rees) {
590
196
              vec_nodes2.push_back( dc );
591
            }
592
554
            for(unsigned j=i+1; j<r.getNumChildren(); ++j) {
593
279
              if(r[j] != rees) {
594
279
                vec_nodes2.push_back( r[j] );
595
              }
596
            }
597
275
            Node tmp = vec_nodes2.size()==0 ? rees :
598
550
              vec_nodes2.size()==1 ? vec_nodes2[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, vec_nodes2 );
599
275
            if(std::find(vec_nodes.begin(), vec_nodes.end(), tmp) == vec_nodes.end()) {
600
275
              vec_nodes.push_back( tmp );
601
            }
602
          }
603
435
          Node exp;
604
347
          if( delta( r[i], exp ) != 1 ) {
605
259
            break;
606
          }
607
        }
608
771
        retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
609
502
              ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
610
269
        break;
611
      }
612
50
      case kind::REGEXP_UNION: {
613
100
        std::vector< Node > vec_nodes;
614
168
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
615
236
          Node dc = derivativeSingle(r[i], c);
616
118
          if(dc != d_emptyRegexp) {
617
52
            if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
618
50
              vec_nodes.push_back( dc );
619
            }
620
          }
621
          //Trace("regexp-derive") << "RegExp-derive OR R[" << i << "] /" << mkString(r[i]) << "/ returns /" << mkString(dc) << "/" << std::endl;
622
        }
623
150
        retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
624
100
              ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_UNION, vec_nodes ) );
625
50
        break;
626
      }
627
      case kind::REGEXP_INTER: {
628
        bool flag = true;
629
        bool flag_sg = false;
630
        std::vector< Node > vec_nodes;
631
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
632
          Node dc = derivativeSingle(r[i], c);
633
          if(dc != d_emptyRegexp) {
634
            if(dc == d_sigma_star) {
635
              flag_sg = true;
636
            } else {
637
              if(std::find(vec_nodes.begin(), vec_nodes.end(), dc) == vec_nodes.end()) {
638
                vec_nodes.push_back( dc );
639
              }
640
            }
641
          } else {
642
            flag = false;
643
            break;
644
          }
645
        }
646
        if(flag) {
647
          if(vec_nodes.size() == 0 && flag_sg) {
648
            retNode = d_sigma_star;
649
          } else {
650
            retNode = vec_nodes.size() == 0 ? d_emptyRegexp :
651
                  ( vec_nodes.size()==1 ? vec_nodes[0] : NodeManager::currentNM()->mkNode( kind::REGEXP_INTER, vec_nodes ) );
652
          }
653
        } else {
654
          retNode = d_emptyRegexp;
655
        }
656
        break;
657
      }
658
109
      case kind::REGEXP_STAR: {
659
218
        Node dc = derivativeSingle(r[0], c);
660
109
        if(dc != d_emptyRegexp) {
661
87
          retNode = dc==d_emptySingleton? r : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r );
662
        } else {
663
22
          retNode = d_emptyRegexp;
664
        }
665
109
        break;
666
      }
667
      case kind::REGEXP_LOOP: {
668
        uint32_t l = utils::getLoopMinOccurrences(r);
669
        uint32_t u = utils::getLoopMaxOccurrences(r);
670
        if (l == u || l == 0)
671
        {
672
          retNode = d_emptyRegexp;
673
        } else {
674
          Node dc = derivativeSingle(r[0], c);
675
          if(dc != d_emptyRegexp) {
676
            Node lop = nm->mkConst(RegExpLoop(l == 0 ? 0 : (l - 1), u - 1));
677
            Node r2 = nm->mkNode(REGEXP_LOOP, lop, r[0]);
678
            retNode = dc==d_emptySingleton? r2 : NodeManager::currentNM()->mkNode( kind::REGEXP_CONCAT, dc, r2 );
679
          } else {
680
            retNode = d_emptyRegexp;
681
          }
682
        }
683
        //Trace("regexp-derive") << "RegExp-derive : REGEXP_LOOP returns /" << mkString(retNode) << "/" << std::endl;
684
        break;
685
      }
686
      case kind::REGEXP_COMPLEMENT:
687
      default: {
688
        Trace("strings-error") << "Unsupported term: " << mkString( r ) << " in derivative of RegExp." << std::endl;
689
        Unreachable();
690
        break;
691
      }
692
    }
693
680
    if(retNode != d_emptyRegexp) {
694
584
      retNode = Rewriter::rewrite( retNode );
695
    }
696
680
    d_dv_cache[dv] = retNode;
697
  }
698
970
  Trace("regexp-derive") << "RegExp-derive returns : /" << mkString( retNode ) << "/" << std::endl;
699
1940
  return retNode;
700
}
701
702
746
void RegExpOpr::firstChars(Node r, std::set<unsigned> &pcset, SetNodes &pvset)
703
{
704
746
  Trace("regexp-fset") << "Start FSET(" << mkString(r) << ")" << std::endl;
705
  std::map<Node, std::pair<std::set<unsigned>, SetNodes> >::const_iterator itr =
706
746
      d_fset_cache.find(r);
707
746
  if(itr != d_fset_cache.end()) {
708
226
    pcset.insert((itr->second).first.begin(), (itr->second).first.end());
709
226
    pvset.insert((itr->second).second.begin(), (itr->second).second.end());
710
  } else {
711
    // cset is code points
712
1040
    std::set<unsigned> cset;
713
1040
    SetNodes vset;
714
520
    Kind k = r.getKind();
715
520
    switch( k ) {
716
      case kind::REGEXP_EMPTY: {
717
        break;
718
      }
719
      case kind::REGEXP_RANGE: {
720
        unsigned a = r[0].getConst<String>().front();
721
        unsigned b = r[1].getConst<String>().front();
722
        Assert(a < b);
723
        Assert(b < std::numeric_limits<unsigned>::max());
724
        for (unsigned c = a; c <= b; c++)
725
        {
726
          cset.insert(c);
727
        }
728
        break;
729
      }
730
202
      case kind::STRING_TO_REGEXP: {
731
404
        Node st = Rewriter::rewrite(r[0]);
732
202
        if(st.isConst()) {
733
404
          String s = st.getConst<String>();
734
202
          if(s.size() != 0) {
735
196
            unsigned sc = s.front();
736
196
            cset.insert(sc);
737
          }
738
        }
739
        else if (st.getKind() == kind::STRING_CONCAT)
740
        {
741
          if(st[0].isConst()) {
742
            String s = st[0].getConst<String>();
743
            unsigned sc = s.front();
744
            cset.insert(sc);
745
          } else {
746
            vset.insert( st[0] );
747
          }
748
        }
749
        else
750
        {
751
          vset.insert(st);
752
        }
753
202
        break;
754
      }
755
221
      case kind::REGEXP_CONCAT: {
756
267
        for(unsigned i=0; i<r.getNumChildren(); i++) {
757
261
          firstChars(r[i], cset, vset);
758
307
          Node n = r[i];
759
307
          Node exp;
760
261
          if (delta(n, exp) != 1)
761
          {
762
215
            break;
763
          }
764
        }
765
221
        break;
766
      }
767
28
      case kind::REGEXP_UNION: {
768
92
        for(unsigned i=0; i<r.getNumChildren(); i++) {
769
64
          firstChars(r[i], cset, vset);
770
        }
771
28
        break;
772
      }
773
      case kind::REGEXP_INTER: {
774
        //TODO: Overapproximation for now
775
        //for(unsigned i=0; i<r.getNumChildren(); i++) {
776
        // firstChars(r[i], cset, vset);
777
        //}
778
        firstChars(r[0], cset, vset);
779
        break;
780
      }
781
69
      case kind::REGEXP_STAR: {
782
69
        firstChars(r[0], cset, vset);
783
69
        break;
784
      }
785
      case kind::REGEXP_LOOP: {
786
        firstChars(r[0], cset, vset);
787
        break;
788
      }
789
      case kind::REGEXP_SIGMA:
790
      case kind::REGEXP_COMPLEMENT:
791
      default: {
792
        // we do not expect to call this function on regular expressions that
793
        // aren't a standard regular expression kind. However, if we do, then
794
        // the following code is conservative and says that the current
795
        // regular expression can begin with any character.
796
        Assert(utils::isRegExpKind(k));
797
        // can start with any character
798
        Assert(d_lastchar < std::numeric_limits<unsigned>::max());
799
        for (unsigned i = 0; i <= d_lastchar; i++)
800
        {
801
          cset.insert(i);
802
        }
803
        break;
804
      }
805
    }
806
520
    pcset.insert(cset.begin(), cset.end());
807
520
    pvset.insert(vset.begin(), vset.end());
808
1040
    std::pair<std::set<unsigned>, SetNodes> p(cset, vset);
809
520
    d_fset_cache[r] = p;
810
  }
811
812
746
  if(Trace.isOn("regexp-fset")) {
813
    Trace("regexp-fset") << "END FSET(" << mkString(r) << ") = {";
814
    for (std::set<unsigned>::const_iterator it = pcset.begin();
815
         it != pcset.end();
816
         ++it)
817
    {
818
      if (it != pcset.begin())
819
      {
820
        Trace("regexp-fset") << ",";
821
      }
822
      Trace("regexp-fset") << (*it);
823
      }
824
    Trace("regexp-fset") << "}" << std::endl;
825
  }
826
746
}
827
828
324
Node RegExpOpr::simplify(Node t, bool polarity)
829
{
830
648
  Trace("strings-regexp-simpl")
831
324
      << "RegExpOpr::simplify: " << t << ", polarity=" << polarity << std::endl;
832
324
  Assert(t.getKind() == kind::STRING_IN_REGEXP);
833
648
  Node tlit = polarity ? t : t.notNode();
834
648
  Node conc;
835
324
  std::map<Node, Node>::const_iterator itr = d_simpCache.find(tlit);
836
324
  if (itr != d_simpCache.end())
837
  {
838
4
    return itr->second;
839
  }
840
320
  if (polarity)
841
  {
842
492
    std::vector<Node> newSkolems;
843
246
    conc = reduceRegExpPos(tlit, d_sc, newSkolems);
844
  }
845
  else
846
  {
847
    // see if we can use an optimized version of the reduction for re.++.
848
148
    Node r = t[1];
849
74
    if (r.getKind() == REGEXP_CONCAT)
850
    {
851
      // the index we are removing from the RE concatenation
852
64
      size_t index = 0;
853
      // As an optimization to the reduction, if we can determine that
854
      // all strings in the language of R1 have the same length, say n,
855
      // then the conclusion of the reduction is quantifier-free:
856
      //    ~( substr(s,0,n) in R1 ) OR ~( substr(s,n,len(s)-n) in R2)
857
128
      Node reLen = getRegExpConcatFixed(r, index);
858
64
      if (!reLen.isNull())
859
      {
860
42
        conc = reduceRegExpNegConcatFixed(tlit, reLen, index);
861
      }
862
    }
863
74
    if (conc.isNull())
864
    {
865
32
      conc = reduceRegExpNeg(tlit);
866
    }
867
  }
868
320
  d_simpCache[tlit] = conc;
869
640
  Trace("strings-regexp-simpl")
870
320
      << "RegExpOpr::simplify: returns " << conc << std::endl;
871
320
  return conc;
872
}
873
874
75
Node RegExpOpr::getRegExpConcatFixed(Node r, size_t& index)
875
{
876
75
  Assert(r.getKind() == REGEXP_CONCAT);
877
75
  index = 0;
878
150
  Node reLen = RegExpEntail::getFixedLengthForRegexp(r[0]);
879
75
  if (!reLen.isNull())
880
  {
881
47
    return reLen;
882
  }
883
  // try from the opposite end
884
28
  size_t indexE = r.getNumChildren() - 1;
885
28
  reLen = RegExpEntail::getFixedLengthForRegexp(r[indexE]);
886
28
  if (!reLen.isNull())
887
  {
888
4
    index = indexE;
889
4
    return reLen;
890
  }
891
24
  return Node::null();
892
}
893
894
36
Node RegExpOpr::reduceRegExpNeg(Node mem)
895
{
896
36
  Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP);
897
72
  Node s = mem[0][0];
898
72
  Node r = mem[0][1];
899
36
  NodeManager* nm = NodeManager::currentNM();
900
36
  Kind k = r.getKind();
901
72
  Node zero = nm->mkConst(Rational(0));
902
36
  Node conc;
903
36
  if (k == REGEXP_CONCAT)
904
  {
905
    // do not use length entailment, call regular expression concat
906
52
    Node reLen;
907
26
    size_t i = 0;
908
26
    conc = reduceRegExpNegConcatFixed(mem, reLen, i);
909
  }
910
10
  else if (k == REGEXP_STAR)
911
  {
912
16
    Node emp = Word::mkEmptyWord(s.getType());
913
16
    Node lens = nm->mkNode(STRING_LENGTH, s);
914
16
    Node sne = s.eqNode(emp).negate();
915
16
    Node b1 = nm->mkBoundVar(nm->integerType());
916
16
    Node b1v = nm->mkNode(BOUND_VAR_LIST, b1);
917
    Node g1 =
918
16
        nm->mkNode(AND, nm->mkNode(GT, b1, zero), nm->mkNode(GEQ, lens, b1));
919
    // internal
920
16
    Node s1 = nm->mkNode(STRING_SUBSTR, s, zero, b1);
921
16
    Node s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1));
922
16
    Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[0]).negate();
923
16
    Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r).negate();
924
925
8
    conc = nm->mkNode(OR, s1r1, s2r2);
926
8
    conc = nm->mkNode(IMPLIES, g1, conc);
927
8
    conc = nm->mkNode(FORALL, b1v, conc);
928
8
    conc = nm->mkNode(AND, sne, conc);
929
  }
930
  else
931
  {
932
2
    Assert(!utils::isRegExpKind(k));
933
  }
934
72
  return conc;
935
}
936
937
74
Node RegExpOpr::reduceRegExpNegConcatFixed(Node mem, Node reLen, size_t index)
938
{
939
74
  Assert(mem.getKind() == NOT && mem[0].getKind() == STRING_IN_REGEXP);
940
148
  Node s = mem[0][0];
941
148
  Node r = mem[0][1];
942
74
  NodeManager* nm = NodeManager::currentNM();
943
74
  Assert(r.getKind() == REGEXP_CONCAT);
944
148
  Node zero = nm->mkConst(Rational(0));
945
  // The following simplification states that
946
  //    ~( s in R1 ++ R2 ++... ++ Rn )
947
  // is equivalent to
948
  //    forall x.
949
  //      0 <= x <= len(s) =>
950
  //        ~(substr(s,0,x) in R1) OR ~(substr(s,x,len(s)-x) in R2 ++ ... ++ Rn)
951
  // Index is the child index of r that we are stripping off, which is either
952
  // from the beginning or the end.
953
74
  Assert(index == 0 || index == r.getNumChildren() - 1);
954
148
  Node lens = nm->mkNode(STRING_LENGTH, s);
955
148
  Node b1;
956
148
  Node b1v;
957
148
  Node guard;
958
74
  if (reLen.isNull())
959
  {
960
26
    b1 = SkolemCache::mkIndexVar(mem);
961
26
    b1v = nm->mkNode(BOUND_VAR_LIST, b1);
962
78
    guard = nm->mkNode(AND,
963
52
                       nm->mkNode(GEQ, b1, zero),
964
52
                       nm->mkNode(GEQ, nm->mkNode(STRING_LENGTH, s), b1));
965
  }
966
  else
967
  {
968
48
    b1 = reLen;
969
  }
970
148
  Node s1;
971
148
  Node s2;
972
74
  if (index == 0)
973
  {
974
70
    s1 = nm->mkNode(STRING_SUBSTR, s, zero, b1);
975
70
    s2 = nm->mkNode(STRING_SUBSTR, s, b1, nm->mkNode(MINUS, lens, b1));
976
  }
977
  else
978
  {
979
4
    s1 = nm->mkNode(STRING_SUBSTR, s, nm->mkNode(MINUS, lens, b1), b1);
980
4
    s2 = nm->mkNode(STRING_SUBSTR, s, zero, nm->mkNode(MINUS, lens, b1));
981
  }
982
148
  Node s1r1 = nm->mkNode(STRING_IN_REGEXP, s1, r[index]).negate();
983
148
  std::vector<Node> nvec;
984
320
  for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; i++)
985
  {
986
246
    if (i != index)
987
    {
988
172
      nvec.push_back(r[i]);
989
    }
990
  }
991
148
  Node r2 = nvec.size() == 1 ? nvec[0] : nm->mkNode(REGEXP_CONCAT, nvec);
992
74
  r2 = Rewriter::rewrite(r2);
993
148
  Node s2r2 = nm->mkNode(STRING_IN_REGEXP, s2, r2).negate();
994
74
  Node conc = nm->mkNode(OR, s1r1, s2r2);
995
74
  if (!b1v.isNull())
996
  {
997
26
    conc = nm->mkNode(OR, guard.negate(), conc);
998
26
    conc = nm->mkNode(FORALL, b1v, conc);
999
  }
1000
148
  return conc;
1001
}
1002
1003
426
Node RegExpOpr::reduceRegExpPos(Node mem,
1004
                                SkolemCache* sc,
1005
                                std::vector<Node>& newSkolems)
1006
{
1007
426
  Assert(mem.getKind() == STRING_IN_REGEXP);
1008
852
  Node s = mem[0];
1009
852
  Node r = mem[1];
1010
426
  NodeManager* nm = NodeManager::currentNM();
1011
426
  Kind k = r.getKind();
1012
426
  Node conc;
1013
426
  if (k == REGEXP_CONCAT)
1014
  {
1015
616
    std::vector<Node> nvec;
1016
616
    std::vector<Node> cc;
1017
    // get the (valid) existential for this membership
1018
616
    Node eform = getExistsForRegExpConcatMem(mem);
1019
308
    SkolemManager* sm = nm->getSkolemManager();
1020
    // Notice that this rule does not introduce witness terms, instead it
1021
    // uses skolems in the conclusion of the proof rule directly. Thus,
1022
    // the existential eform does not need to be explicitly justified by a
1023
    // proof here, since it is only being used as an intermediate formula in
1024
    // this inference. Hence we do not pass a proof generator to mkSkolemize.
1025
308
    sm->mkSkolemize(eform, newSkolems, "rc", "regexp concat skolem");
1026
308
    Assert(newSkolems.size() == r.getNumChildren());
1027
    // Look up skolems for each of the components. If sc has optimizations
1028
    // enabled, this will return arguments of str.to_re.
1029
1209
    for (unsigned i = 0, nchild = r.getNumChildren(); i < nchild; ++i)
1030
    {
1031
901
      if (r[i].getKind() == STRING_TO_REGEXP)
1032
      {
1033
        // optimization, just take the body
1034
350
        newSkolems[i] = r[i][0];
1035
      }
1036
      else
1037
      {
1038
551
        nvec.push_back(nm->mkNode(STRING_IN_REGEXP, newSkolems[i], r[i]));
1039
      }
1040
    }
1041
    // (str.in_re x (re.++ R1 .... Rn)) =>
1042
    // (and (str.in_re k1 R1) ... (str.in_re kn Rn) (= x (str.++ k1 ... kn)))
1043
616
    Node lem = s.eqNode(nm->mkNode(STRING_CONCAT, newSkolems));
1044
308
    nvec.push_back(lem);
1045
308
    conc = nvec.size() == 1 ? nvec[0] : nm->mkNode(AND, nvec);
1046
  }
1047
118
  else if (k == REGEXP_STAR)
1048
  {
1049
228
    Node emp = Word::mkEmptyWord(s.getType());
1050
228
    Node se = s.eqNode(emp);
1051
228
    Node sinr = nm->mkNode(STRING_IN_REGEXP, s, r[0]);
1052
228
    Node reExpand = nm->mkNode(REGEXP_CONCAT, r[0], r, r[0]);
1053
228
    Node sinRExp = nm->mkNode(STRING_IN_REGEXP, s, reExpand);
1054
    // We unfold `x in R*` by considering three cases: `x` is empty, `x`
1055
    // is matched by `R`, or `x` is matched by two or more `R`s. For the
1056
    // last case, `x` will break into three pieces, making the beginning
1057
    // and the end each match `R` and the middle match `R*`. Matching the
1058
    // beginning and the end with `R` allows us to reason about the
1059
    // beginning and the end of `x` simultaneously.
1060
    //
1061
    // x in R* ---> (x = "") v (x in R) v (x in (re.++ R (re.* R) R))
1062
1063
    // We also immediately unfold the last disjunct for re.*. The advantage
1064
    // of doing this is that we use the same scheme for skolems above.
1065
228
    std::vector<Node> newSkolemsC;
1066
114
    sinRExp = reduceRegExpPos(sinRExp, sc, newSkolemsC);
1067
114
    Assert(newSkolemsC.size() == 3);
1068
    // make the return lemma
1069
    // can also assume the component match the first and last R are non-empty.
1070
    // This means that the overall conclusion is:
1071
    //   (x = "") v (x in R) v (x = (str.++ k1 k2 k3) ^
1072
    //                          k1 in R ^ k2 in (re.* R) ^ k3 in R ^
1073
    //                          k1 != ""  ^ k3 != "")
1074
114
    conc = nm->mkNode(OR,
1075
                      se,
1076
                      sinr,
1077
456
                      nm->mkNode(AND,
1078
                                 sinRExp,
1079
228
                                 newSkolemsC[0].eqNode(emp).negate(),
1080
228
                                 newSkolemsC[2].eqNode(emp).negate()));
1081
  }
1082
  else
1083
  {
1084
4
    Assert(!utils::isRegExpKind(k));
1085
  }
1086
852
  return conc;
1087
}
1088
1089
bool RegExpOpr::isPairNodesInSet(std::set< PairNodes > &s, Node n1, Node n2) {
1090
  for(std::set< PairNodes >::const_iterator itr = s.begin();
1091
      itr != s.end(); ++itr) {
1092
    if((itr->first == n1 && itr->second == n2) ||
1093
       (itr->first == n2 && itr->second == n1)) {
1094
      return true;
1095
    }
1096
  }
1097
  return false;
1098
}
1099
1100
346
bool RegExpOpr::containC2(unsigned cnt, Node n) {
1101
346
  if(n.getKind() == kind::REGEXP_RV) {
1102
132
    Assert(n[0].getConst<Rational>() <= Rational(String::maxSize()))
1103
        << "Exceeded UINT32_MAX in RegExpOpr::containC2";
1104
132
    unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt();
1105
132
    return cnt == y;
1106
214
  } else if(n.getKind() == kind::REGEXP_CONCAT) {
1107
16
    for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1108
12
      if(containC2(cnt, n[i])) {
1109
2
        return true;
1110
      }
1111
    }
1112
208
  } else if(n.getKind() == kind::REGEXP_STAR) {
1113
16
    return containC2(cnt, n[0]);
1114
192
  } else if(n.getKind() == kind::REGEXP_LOOP) {
1115
    return containC2(cnt, n[0]);
1116
192
  } else if(n.getKind() == kind::REGEXP_UNION) {
1117
18
    for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1118
14
      if(containC2(cnt, n[i])) {
1119
2
        return true;
1120
      }
1121
    }
1122
  }
1123
194
  return false;
1124
}
1125
176
Node RegExpOpr::convert1(unsigned cnt, Node n) {
1126
176
  Trace("regexp-debug") << "Converting " << n << " at " << cnt << "... " << std::endl;
1127
352
  Node r1, r2;
1128
176
  convert2(cnt, n, r1, r2);
1129
176
  Trace("regexp-debug") << "... getting r1=" << r1 << ", and r2=" << r2 << std::endl;
1130
176
  Node ret = r1==d_emptySingleton ? r2 : NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
1131
176
     NodeManager::currentNM()->mkNode(kind::REGEXP_STAR, r1), r2);
1132
176
  ret = Rewriter::rewrite( ret );
1133
176
  Trace("regexp-debug") << "... done convert at " << cnt << ", with return " << ret << std::endl;
1134
352
  return ret;
1135
}
1136
285
void RegExpOpr::convert2(unsigned cnt, Node n, Node &r1, Node &r2) {
1137
285
  if(n == d_emptyRegexp) {
1138
24
    r1 = d_emptyRegexp;
1139
24
    r2 = d_emptyRegexp;
1140
24
    return;
1141
261
  } else if(n == d_emptySingleton) {
1142
21
    r1 = d_emptySingleton;
1143
21
    r2 = d_emptySingleton;
1144
  }
1145
261
  Kind nk = n.getKind();
1146
261
  if (nk == REGEXP_RV)
1147
  {
1148
33
    Assert(n[0].getConst<Rational>() <= Rational(String::maxSize()))
1149
        << "Exceeded UINT32_MAX in RegExpOpr::convert2";
1150
33
    unsigned y = n[0].getConst<Rational>().getNumerator().toUnsignedInt();
1151
33
    r1 = d_emptySingleton;
1152
33
    if(cnt == y) {
1153
33
      r2 = d_emptyRegexp;
1154
    } else {
1155
      r2 = n;
1156
    }
1157
  }
1158
228
  else if (nk == REGEXP_CONCAT)
1159
  {
1160
148
    bool flag = true;
1161
296
    std::vector<Node> vr1, vr2;
1162
417
    for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1163
304
      if(containC2(cnt, n[i])) {
1164
70
        Node t1, t2;
1165
35
        convert2(cnt, n[i], t1, t2);
1166
35
        vr1.push_back(t1);
1167
35
        r1 = vr1.size()==0 ? d_emptyRegexp : vr1.size()==1 ? vr1[0] :
1168
             NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr1);
1169
35
        vr2.push_back(t2);
1170
35
        for( unsigned j=i+1; j<n.getNumChildren(); j++ ) {
1171
          vr2.push_back(n[j]);
1172
        }
1173
35
        r2 = vr2.size()==0 ? d_emptyRegexp : vr2.size()==1 ? vr2[0] :
1174
             NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT, vr2);
1175
35
        flag = false;
1176
35
        break;
1177
      } else {
1178
269
        vr1.push_back(n[i]);
1179
      }
1180
    }
1181
148
    if(flag) {
1182
113
      r1 = d_emptySingleton;
1183
113
      r2 = n;
1184
    }
1185
  }
1186
80
  else if (nk == REGEXP_UNION)
1187
  {
1188
66
    std::vector<Node> vr1, vr2;
1189
107
    for( unsigned i=0; i<n.getNumChildren(); i++ ) {
1190
148
      Node t1, t2;
1191
74
      convert2(cnt, n[i], t1, t2);
1192
74
      vr1.push_back(t1);
1193
74
      vr2.push_back(t2);
1194
    }
1195
33
    r1 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr1);
1196
33
    r2 = NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vr2);
1197
  }
1198
47
  else if (nk == STRING_TO_REGEXP || nk == REGEXP_SIGMA || nk == REGEXP_RANGE
1199
           || nk == REGEXP_COMPLEMENT || nk == REGEXP_LOOP)
1200
  {
1201
    // this leaves n unchanged
1202
47
    r1 = d_emptySingleton;
1203
47
    r2 = n;
1204
  }
1205
  else
1206
  {
1207
    //is it possible?
1208
    Unreachable();
1209
  }
1210
}
1211
1212
251
Node RegExpOpr::intersectInternal( Node r1, Node r2, std::map< PairNodes, Node > cache, unsigned cnt ) {
1213
  //Assert(checkConstRegExp(r1) && checkConstRegExp(r2));
1214
251
  if(r1 > r2) {
1215
60
    TNode tmpNode = r1;
1216
30
    r1 = r2;
1217
30
    r2 = tmpNode;
1218
  }
1219
251
  Trace("regexp-int") << "Starting INTERSECT(" << cnt << "):\n  "<< mkString(r1) << ",\n  " << mkString(r2) << std::endl;
1220
502
  std::pair < Node, Node > p(r1, r2);
1221
251
  std::map < PairNodes, Node >::const_iterator itr = d_inter_cache.find(p);
1222
251
  Node rNode;
1223
251
  if(itr != d_inter_cache.end()) {
1224
26
    rNode = itr->second;
1225
  } else {
1226
225
    Trace("regexp-int-debug") << " ... not in cache" << std::endl;
1227
225
    if(r1 == d_emptyRegexp || r2 == d_emptyRegexp) {
1228
      Trace("regexp-int-debug") << " ... one is empty set" << std::endl;
1229
      rNode = d_emptyRegexp;
1230
225
    } else if(r1 == d_emptySingleton || r2 == d_emptySingleton) {
1231
18
      Trace("regexp-int-debug") << " ... one is empty singleton" << std::endl;
1232
36
      Node exp;
1233
18
      int r = delta((r1 == d_emptySingleton ? r2 : r1), exp);
1234
18
      if(r == 0) {
1235
        //TODO: variable
1236
        Unreachable();
1237
18
      } else if(r == 1) {
1238
14
        rNode = d_emptySingleton;
1239
      } else {
1240
4
        rNode = d_emptyRegexp;
1241
      }
1242
207
    } else if(r1 == r2) {
1243
2
      Trace("regexp-int-debug") << " ... equal" << std::endl;
1244
2
      rNode = r1; //convert1(cnt, r1);
1245
    } else {
1246
205
      Trace("regexp-int-debug") << " ... normal checking" << std::endl;
1247
205
      std::map< PairNodes, Node >::const_iterator itrcache = cache.find(p);
1248
205
      if(itrcache != cache.end()) {
1249
29
        rNode = itrcache->second;
1250
      } else {
1251
176
        Trace("regexp-int-debug") << " ... normal without cache" << std::endl;
1252
352
        std::vector<unsigned> cset;
1253
352
        std::set<unsigned> cset1, cset2;
1254
352
        std::set< Node > vset1, vset2;
1255
176
        firstChars(r1, cset1, vset1);
1256
176
        firstChars(r2, cset2, vset2);
1257
176
        Trace("regexp-int-debug") << " ... got fset" << std::endl;
1258
176
        std::set_intersection(cset1.begin(), cset1.end(), cset2.begin(), cset2.end(),
1259
176
             std::inserter(cset, cset.begin()));
1260
352
        std::vector< Node > vec_nodes;
1261
352
        Node delta_exp;
1262
176
        Trace("regexp-int-debug") << " ... try delta" << std::endl;
1263
176
        int flag = delta(r1, delta_exp);
1264
176
        int flag2 = delta(r2, delta_exp);
1265
176
        Trace("regexp-int-debug") << " ... delta1=" << flag << ", delta2=" << flag2 << std::endl;
1266
176
        if(flag != 2 && flag2 != 2) {
1267
19
          if(flag == 1 && flag2 == 1) {
1268
19
            vec_nodes.push_back(d_emptySingleton);
1269
          } else {
1270
            //TODO: variable
1271
            Unreachable();
1272
          }
1273
        }
1274
176
        if(Trace.isOn("regexp-int-debug")) {
1275
          Trace("regexp-int-debug") << "Try CSET(" << cset.size() << ") = {";
1276
          for (std::vector<unsigned>::const_iterator it = cset.begin();
1277
               it != cset.end();
1278
               ++it)
1279
          {
1280
            if (it != cset.begin())
1281
            {
1282
              Trace("regexp-int-debug") << ", ";
1283
            }
1284
            Trace("regexp-int-debug") << (*it);
1285
          }
1286
          Trace("regexp-int-debug") << std::endl;
1287
        }
1288
352
        std::map< PairNodes, Node > cacheX;
1289
374
        for (std::vector<unsigned>::const_iterator it = cset.begin();
1290
374
             it != cset.end();
1291
             ++it)
1292
        {
1293
396
          std::vector<unsigned> cvec;
1294
198
          cvec.push_back(*it);
1295
396
          String c(cvec);
1296
198
          Trace("regexp-int-debug") << "Try character " << c << " ... " << std::endl;
1297
396
          Node r1l = derivativeSingle(r1, c);
1298
396
          Node r2l = derivativeSingle(r2, c);
1299
198
          Trace("regexp-int-debug") << "  ... got partial(r1,c) = " << mkString(r1l) << std::endl;
1300
198
          Trace("regexp-int-debug") << "  ... got partial(r2,c) = " << mkString(r2l) << std::endl;
1301
396
          Node rt;
1302
1303
198
          if(r1l > r2l) {
1304
32
            Node tnode = r1l;
1305
16
            r1l = r2l; r2l = tnode;
1306
          }
1307
396
          PairNodes pp(r1l, r2l);
1308
198
          std::map< PairNodes, Node >::const_iterator itr2 = cacheX.find(pp);
1309
198
          if(itr2 != cacheX.end()) {
1310
4
            rt = itr2->second;
1311
          } else {
1312
388
            std::map< PairNodes, Node > cache2(cache);
1313
194
            cache2[ p ] = NodeManager::currentNM()->mkNode(kind::REGEXP_RV, NodeManager::currentNM()->mkConst(CVC4::Rational(cnt)));
1314
194
            rt = intersectInternal(r1l, r2l, cache2, cnt+1);
1315
194
            cacheX[ pp ] = rt;
1316
          }
1317
1318
594
          rt = Rewriter::rewrite( NodeManager::currentNM()->mkNode(kind::REGEXP_CONCAT,
1319
396
            NodeManager::currentNM()->mkNode(kind::STRING_TO_REGEXP, NodeManager::currentNM()->mkConst(c)), rt) );
1320
1321
198
          Trace("regexp-int-debug") << "  ... got p(r1,c) && p(r2,c) = " << mkString(rt) << std::endl;
1322
198
          vec_nodes.push_back(rt);
1323
        }
1324
176
        rNode = Rewriter::rewrite( vec_nodes.size()==0 ? d_emptyRegexp : vec_nodes.size()==1 ? vec_nodes[0] :
1325
            NodeManager::currentNM()->mkNode(kind::REGEXP_UNION, vec_nodes) );
1326
176
        rNode = convert1(cnt, rNode);
1327
176
        rNode = Rewriter::rewrite( rNode );
1328
      }
1329
    }
1330
225
    Trace("regexp-int-debug") << "  ... try testing no RV of " << mkString(rNode) << std::endl;
1331
225
    if (!expr::hasSubtermKind(REGEXP_RV, rNode))
1332
    {
1333
101
      d_inter_cache[p] = rNode;
1334
    }
1335
  }
1336
251
  Trace("regexp-int") << "End(" << cnt << ") of INTERSECT( " << mkString(r1) << ", " << mkString(r2) << " ) = " << mkString(rNode) << std::endl;
1337
502
  return rNode;
1338
}
1339
1340
114
Node RegExpOpr::removeIntersection(Node r) {
1341
114
  Assert(checkConstRegExp(r));
1342
114
  NodeManager* nm = NodeManager::currentNM();
1343
228
  std::unordered_map<TNode, Node, TNodeHashFunction> visited;
1344
114
  std::unordered_map<TNode, Node, TNodeHashFunction>::iterator it;
1345
228
  std::vector<TNode> visit;
1346
228
  TNode cur;
1347
114
  visit.push_back(r);
1348
1302
  do
1349
  {
1350
1416
    cur = visit.back();
1351
1416
    visit.pop_back();
1352
1416
    it = visited.find(cur);
1353
1354
1416
    if (it == visited.end())
1355
    {
1356
694
      visited[cur] = Node::null();
1357
694
      visit.push_back(cur);
1358
1302
      for (const Node& cn : cur)
1359
      {
1360
608
        visit.push_back(cn);
1361
      }
1362
    }
1363
722
    else if (it->second.isNull())
1364
    {
1365
694
      Kind ck = cur.getKind();
1366
1388
      Node ret;
1367
694
      bool childChanged = false;
1368
1388
      std::vector<Node> children;
1369
694
      if (cur.getMetaKind() == metakind::PARAMETERIZED)
1370
      {
1371
        children.push_back(cur.getOperator());
1372
      }
1373
1302
      for (const Node& cn : cur)
1374
      {
1375
608
        it = visited.find(cn);
1376
608
        Assert(it != visited.end());
1377
608
        Assert(!it->second.isNull());
1378
608
        if (ck == REGEXP_INTER)
1379
        {
1380
4
          if (ret.isNull())
1381
          {
1382
2
            ret = it->second;
1383
          }
1384
          else
1385
          {
1386
2
            ret = intersect(ret, it->second);
1387
          }
1388
        }
1389
        else
1390
        {
1391
          // will construct below
1392
604
          childChanged = childChanged || cn != it->second;
1393
604
          children.push_back(it->second);
1394
        }
1395
      }
1396
694
      if (ck != REGEXP_INTER)
1397
      {
1398
692
        if (childChanged)
1399
        {
1400
2
          ret = nm->mkNode(cur.getKind(), children);
1401
        }
1402
        else
1403
        {
1404
690
          ret = cur;
1405
        }
1406
      }
1407
694
      visited[cur] = ret;
1408
    }
1409
1416
  } while (!visit.empty());
1410
114
  Assert(visited.find(r) != visited.end());
1411
114
  Assert(!visited.find(r)->second.isNull());
1412
114
  if (Trace.isOn("regexp-intersect"))
1413
  {
1414
    Trace("regexp-intersect") << "Remove INTERSECTION( " << mkString(r)
1415
                              << " ) = " << mkString(visited[r]) << std::endl;
1416
  }
1417
228
  return visited[r];
1418
}
1419
1420
57
Node RegExpOpr::intersect(Node r1, Node r2)
1421
{
1422
57
  if (!checkConstRegExp(r1) || !checkConstRegExp(r2))
1423
  {
1424
    return Node::null();
1425
  }
1426
114
  Node rr1 = removeIntersection(r1);
1427
114
  Node rr2 = removeIntersection(r2);
1428
114
  std::map<PairNodes, Node> cache;
1429
57
  Trace("regexp-intersect-node") << "Intersect (1): " << rr1 << std::endl;
1430
57
  Trace("regexp-intersect-node") << "Intersect (2): " << rr2 << std::endl;
1431
114
  Trace("regexp-intersect") << "Start INTERSECTION(\n\t" << mkString(r1)
1432
57
                            << ",\n\t" << mkString(r2) << ")" << std::endl;
1433
114
  Node retNode = intersectInternal(rr1, rr2, cache, 1);
1434
114
  Trace("regexp-intersect")
1435
114
      << "End INTERSECTION(\n\t" << mkString(r1) << ",\n\t" << mkString(r2)
1436
57
      << ") =\n\t" << mkString(retNode) << std::endl;
1437
57
  Trace("regexp-intersect-node") << "Intersect finished." << std::endl;
1438
57
  return retNode;
1439
}
1440
1441
//printing
1442
10830
std::string RegExpOpr::niceChar(Node r) {
1443
10830
  if(r.isConst()) {
1444
21624
    std::string s = r.getConst<String>().toString();
1445
10812
    return s == "." ? "\\." : s;
1446
  } else {
1447
36
    std::string ss = "$" + r.toString();
1448
18
    return ss;
1449
  }
1450
}
1451
20826
std::string RegExpOpr::mkString( Node r ) {
1452
20826
  std::string retStr;
1453
20826
  if(r.isNull()) {
1454
    retStr = "\\E";
1455
  } else {
1456
20826
    int k = r.getKind();
1457
20826
    switch( k ) {
1458
300
      case kind::REGEXP_EMPTY: {
1459
300
        retStr += "\\E";
1460
300
        break;
1461
      }
1462
259
      case kind::REGEXP_SIGMA: {
1463
259
        retStr += ".";
1464
259
        break;
1465
      }
1466
10420
      case kind::STRING_TO_REGEXP: {
1467
20840
        std::string tmp( niceChar( r[0] ) );
1468
10420
        retStr += tmp.size()==1? tmp : "(" + tmp + ")";
1469
10420
        break;
1470
      }
1471
3592
      case kind::REGEXP_CONCAT: {
1472
3592
        retStr += "(";
1473
11449
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
1474
          //if(i != 0) retStr += ".";
1475
7857
          retStr += mkString( r[i] );
1476
        }
1477
3592
        retStr += ")";
1478
3592
        break;
1479
      }
1480
1202
      case kind::REGEXP_UNION: {
1481
1202
        retStr += "(";
1482
3998
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
1483
2796
          if(i != 0) retStr += "|";
1484
2796
          retStr += mkString( r[i] );
1485
        }
1486
1202
        retStr += ")";
1487
1202
        break;
1488
      }
1489
28
      case kind::REGEXP_INTER: {
1490
28
        retStr += "(";
1491
84
        for(unsigned i=0; i<r.getNumChildren(); ++i) {
1492
56
          if(i != 0) retStr += "&";
1493
56
          retStr += mkString( r[i] );
1494
        }
1495
28
        retStr += ")";
1496
28
        break;
1497
      }
1498
4404
      case kind::REGEXP_STAR: {
1499
4404
        retStr += mkString( r[0] );
1500
4404
        retStr += "*";
1501
4404
        break;
1502
      }
1503
      case kind::REGEXP_PLUS: {
1504
        retStr += mkString( r[0] );
1505
        retStr += "+";
1506
        break;
1507
      }
1508
      case kind::REGEXP_OPT: {
1509
        retStr += mkString( r[0] );
1510
        retStr += "?";
1511
        break;
1512
      }
1513
205
      case kind::REGEXP_RANGE: {
1514
205
        retStr += "[";
1515
205
        retStr += niceChar( r[0] );
1516
205
        retStr += "-";
1517
205
        retStr += niceChar( r[1] );
1518
205
        retStr += "]";
1519
205
        break;
1520
      }
1521
      case kind::REGEXP_LOOP: {
1522
        uint32_t l = utils::getLoopMinOccurrences(r);
1523
        std::stringstream ss;
1524
        ss << "(" << mkString(r[0]) << "){" << l << ",";
1525
        if(r.getNumChildren() == 3) {
1526
          uint32_t u = utils::getLoopMaxOccurrences(r);
1527
          ss << u;
1528
        }
1529
        ss << "}";
1530
        retStr += ss.str();
1531
        break;
1532
      }
1533
382
      case kind::REGEXP_RV: {
1534
382
        retStr += "<";
1535
382
        retStr += r[0].getConst<Rational>().getNumerator().toString();
1536
382
        retStr += ">";
1537
382
        break;
1538
      }
1539
28
      case REGEXP_COMPLEMENT:
1540
      {
1541
28
        retStr += "^(";
1542
28
        retStr += mkString(r[0]);
1543
28
        retStr += ")";
1544
28
        break;
1545
      }
1546
6
      default:
1547
      {
1548
12
        std::stringstream ss;
1549
6
        ss << r;
1550
6
        retStr = ss.str();
1551
6
        Assert(!utils::isRegExpKind(r.getKind()));
1552
6
        break;
1553
      }
1554
    }
1555
  }
1556
1557
20826
  return retStr;
1558
}
1559
1560
624
bool RegExpOpr::regExpIncludes(Node r1, Node r2)
1561
{
1562
624
  const auto& it = d_inclusionCache.find(std::make_pair(r1, r2));
1563
624
  if (it != d_inclusionCache.end())
1564
  {
1565
302
    return (*it).second;
1566
  }
1567
322
  bool result = RegExpEntail::regExpIncludes(r1, r2);
1568
322
  d_inclusionCache[std::make_pair(r1, r2)] = result;
1569
322
  return result;
1570
}
1571
1572
/**
1573
 * Associating formulas with their "exists form", or an existentially
1574
 * quantified formula that is equivalent to it. This is currently used
1575
 * for regular expression memberships in the method below.
1576
 */
1577
struct ExistsFormAttributeId
1578
{
1579
};
1580
typedef expr::Attribute<ExistsFormAttributeId, Node> ExistsFormAttribute;
1581
1582
308
Node RegExpOpr::getExistsForRegExpConcatMem(Node mem)
1583
{
1584
  // get or make the exists form of the membership
1585
  ExistsFormAttribute efa;
1586
308
  if (mem.hasAttribute(efa))
1587
  {
1588
    // already computed
1589
75
    return mem.getAttribute(efa);
1590
  }
1591
233
  Assert(mem.getKind() == STRING_IN_REGEXP);
1592
466
  Node x = mem[0];
1593
466
  Node r = mem[1];
1594
233
  Assert(r.getKind() == REGEXP_CONCAT);
1595
233
  NodeManager* nm = NodeManager::currentNM();
1596
466
  TypeNode xtn = x.getType();
1597
466
  std::vector<Node> vars;
1598
466
  std::vector<Node> mems;
1599
934
  for (const Node& rc : r)
1600
  {
1601
1402
    Node v = nm->mkBoundVar(xtn);
1602
701
    vars.push_back(v);
1603
701
    mems.push_back(nm->mkNode(STRING_IN_REGEXP, v, rc));
1604
  }
1605
466
  Node sconcat = nm->mkNode(STRING_CONCAT, vars);
1606
466
  Node eq = x.eqNode(sconcat);
1607
233
  mems.insert(mems.begin(), eq);
1608
466
  Node bvl = nm->mkNode(BOUND_VAR_LIST, vars);
1609
466
  Node ebody = nm->mkNode(AND, mems);
1610
466
  Node eform = nm->mkNode(EXISTS, bvl, ebody);
1611
233
  mem.setAttribute(efa, eform);
1612
233
  Trace("regexp-opr") << "Exists form " << mem << " : " << eform << std::endl;
1613
233
  return eform;
1614
}
1615
1616
}/* CVC4::theory::strings namespace */
1617
}/* CVC4::theory namespace */
1618
26676
}/* CVC4 namespace */