Head

GCC Code Coverage Report

Directory:	.		Exec	Total	Coverage
File:	src/theory/strings/regexp_solver.cpp	Lines:	296	331	89.4 %
Date:	2021-08-16	Branches:	587	1211	48.5 %


/******************************************************************************
 * Top contributors (to current version):
 *   Andrew Reynolds, Andres Noetzli, Tianyi Liang
 *
 * This file is part of the cvc5 project.
 *
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
 * in the top-level source directory and their institutional affiliations.
 * All rights reserved.  See the file COPYING in the top-level source
 * directory for licensing information.
 * ****************************************************************************
 *
 * Implementation of the regular expression solver for the theory of strings.
 */

#include "theory/strings/regexp_solver.h"

#include <cmath>

#include "options/strings_options.h"
#include "smt/logic_exception.h"
#include "theory/ext_theory.h"
#include "theory/strings/theory_strings_utils.h"
#include "theory/theory_model.h"
#include "util/statistics_value.h"

using namespace std;
using namespace cvc5::context;
using namespace cvc5::kind;

namespace cvc5 {
namespace theory {
namespace strings {

RegExpSolver::RegExpSolver(SolverState& s,
                           InferenceManager& im,
                           SkolemCache* skc,
                           CoreSolver& cs,
                           ExtfSolver& es,
                           SequencesStatistics& stats)
    : d_state(s),
      d_im(im),
      d_csolver(cs),
      d_esolver(es),
      d_statistics(stats),
      d_regexp_ucached(s.getUserContext()),
      d_regexp_ccached(s.getSatContext()),
      d_processed_memberships(s.getSatContext()),
      d_regexp_opr(skc)
{
  d_emptyString = NodeManager::currentNM()->mkConst(::cvc5::String(""));
  d_emptyRegexp = NodeManager::currentNM()->mkNode(REGEXP_EMPTY);
  d_true = NodeManager::currentNM()->mkConst(true);
  d_false = NodeManager::currentNM()->mkConst(false);
}

Node RegExpSolver::mkAnd(Node c1, Node c2)
{
  return NodeManager::currentNM()->mkNode(AND, c1, c2);
}

void RegExpSolver::checkMemberships()
{
  // add the memberships
  std::vector<Node> mems = d_esolver.getActive(STRING_IN_REGEXP);
  // maps representatives to regular expression memberships in that class
  std::map<Node, std::vector<Node> > assertedMems;
  const std::map<Node, ExtfInfoTmp>& einfo = d_esolver.getInfo();
  std::map<Node, ExtfInfoTmp>::const_iterator it;
  for (unsigned i = 0; i < mems.size(); i++)
  {
    Node n = mems[i];
    Assert(n.getKind() == STRING_IN_REGEXP);
    it = einfo.find(n);
    Assert(it != einfo.end());
    if (!it->second.d_const.isNull())
    {
      bool pol = it->second.d_const.getConst<bool>();
      Trace("strings-process-debug")
          << "  add membership : " << n << ", pol = " << pol << std::endl;
      Node r = d_state.getRepresentative(n[0]);
      assertedMems[r].push_back(pol ? n : n.negate());
    }
    else
    {
      Trace("strings-process-debug")
          << "  irrelevant (non-asserted) membership : " << n << std::endl;
    }
  }
  check(assertedMems);
}

void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
{
  bool addedLemma = false;
  bool changed = false;
  std::vector<Node> processed;

  Trace("regexp-process") << "Checking Memberships ... " << std::endl;
  for (const std::pair<const Node, std::vector<Node> >& mr : mems)
  {
    std::vector<Node> mems2 = mr.second;
    Trace("regexp-process")
        << "Memberships(" << mr.first << ") = " << mr.second << std::endl;
    if (!checkEqcInclusion(mems2))
    {
      // conflict discovered, return
      return;
    }
    if (!checkEqcIntersect(mems2))
    {
      // conflict discovered, return
      return;
    }
  }

  Trace("regexp-debug")
      << "... No Intersect Conflict in Memberships, addedLemma: " << addedLemma
      << std::endl;
  if (!addedLemma)
  {
    // get all memberships
    std::map<Node, Node> allMems;
    for (const std::pair<const Node, std::vector<Node> >& mr : mems)
    {
      for (const Node& m : mr.second)
      {
        allMems[m] = mr.first;
      }
    }

    NodeManager* nm = NodeManager::currentNM();
    // representatives of strings that are the LHS of positive memberships that
    // we unfolded
    std::unordered_set<Node> repUnfold;
    // check positive (e=0), then negative (e=1) memberships
    for (unsigned e = 0; e < 2; e++)
    {
      for (const std::pair<const Node, Node>& mp : allMems)
      {
        Node assertion = mp.first;
        Node rep = mp.second;
        // check regular expression membership
        Trace("regexp-debug")
            << "Check : " << assertion << " "
            << (d_regexp_ucached.find(assertion) == d_regexp_ucached.end())
            << " "
            << (d_regexp_ccached.find(assertion) == d_regexp_ccached.end())
            << std::endl;
        if (d_regexp_ucached.find(assertion) != d_regexp_ucached.end()
            || d_regexp_ccached.find(assertion) != d_regexp_ccached.end())
        {
          continue;
        }
        Trace("strings-regexp")
            << "We have regular expression assertion : " << assertion
            << std::endl;
        Node atom = assertion.getKind() == NOT ? assertion[0] : assertion;
        Assert(atom == Rewriter::rewrite(atom));
        bool polarity = assertion.getKind() != NOT;
        if (polarity != (e == 0))
        {
          continue;
        }
        bool flag = true;
        Node x = atom[0];
        Node r = atom[1];
        Assert(rep == d_state.getRepresentative(x));
        // The following code takes normal forms into account for the purposes
        // of simplifying a regular expression membership x in R. For example,
        // if x = "A" in the current context, then we may be interested in
        // reasoning about ( x in R ) * { x -> "A" }. Say we update the
        // membership to nx in R', then:
        // - nfexp => ( x in R ) <=> nx in R'
        // - rnfexp => R = R'
        // We use these explanations below as assumptions on inferences when
        // appropriate. Notice that for inferring conflicts and tautologies,
        // we use the normal form of x always. This is because we always want to
        // discover conflicts/tautologies whenever possible.
        // For inferences based on regular expression unfolding, we do not use
        // the normal form of x. The reason is that it is better to unfold
        // regular expression memberships in a context-indepedent manner,
        // that is, not taking into account the current normal form of x, since
        // this ensures these lemmas are still relevant after backtracking.
        std::vector<Node> nfexp;
        std::vector<Node> rnfexp;
        // The normal form of x is stored in nx, while x is left unchanged.
        Node nx = x;
        if (!x.isConst())
        {
          nx = d_csolver.getNormalString(x, nfexp);
        }
        // If r is not a constant regular expression, we update it based on
        // normal forms, which may concretize its variables.
        if (!d_regexp_opr.checkConstRegExp(r))
        {
          r = getNormalSymRegExp(r, rnfexp);
          nfexp.insert(nfexp.end(), rnfexp.begin(), rnfexp.end());
          changed = true;
        }
        Trace("strings-regexp-nf") << "Term " << atom << " is normalized to "
                                   << nx << " IN " << r << std::endl;
        if (nx != x || changed)
        {
          // We rewrite the membership nx IN r.
          Node tmp = Rewriter::rewrite(nm->mkNode(STRING_IN_REGEXP, nx, r));
          Trace("strings-regexp-nf") << "Simplifies to " << tmp << std::endl;
          if (tmp.isConst())
          {
            if (tmp.getConst<bool>() == polarity)
            {
              // it is satisfied in this SAT context
              d_regexp_ccached.insert(assertion);
              continue;
            }
            else
            {
              // we have a conflict
              std::vector<Node> iexp = nfexp;
              std::vector<Node> noExplain;
              iexp.push_back(assertion);
              noExplain.push_back(assertion);
              Node conc = Node::null();
              d_im.sendInference(
                  iexp, noExplain, conc, InferenceId::STRINGS_RE_NF_CONFLICT);
              addedLemma = true;
              break;
            }
          }
        }
        if (e == 1 && repUnfold.find(rep) != repUnfold.end())
        {
          // do not unfold negative memberships of strings that have new
          // positive unfoldings. For example:
          //   x in ("A")* ^ NOT x in ("B")*
          // We unfold x = "A" ++ x' only. The intution here is that positive
          // unfoldings lead to stronger constraints (equalities are stronger
          // than disequalities), and are easier to check.
          continue;
        }
        if (polarity)
        {
          flag = checkPDerivative(x, r, atom, addedLemma, rnfexp);
        }
        else
        {
          if (!options::stringExp())
          {
            throw LogicException(
                "Strings Incomplete (due to Negative Membership) by default, "
                "try --strings-exp option.");
          }
        }
        if (flag)
        {
          // check if the term is atomic
          Trace("strings-regexp")
              << "Unroll/simplify membership of atomic term " << rep
              << std::endl;
          // if so, do simple unrolling
          Trace("strings-regexp") << "Simplify on " << atom << std::endl;
          Node conc = d_regexp_opr.simplify(atom, polarity);
          Trace("strings-regexp") << "...finished, got " << conc << std::endl;
          // if simplifying successfully generated a lemma
          if (!conc.isNull())
          {
            std::vector<Node> iexp;
            std::vector<Node> noExplain;
            iexp.push_back(assertion);
            noExplain.push_back(assertion);
            Assert(atom.getKind() == STRING_IN_REGEXP);
            if (polarity)
            {
              d_statistics.d_regexpUnfoldingsPos << atom[1].getKind();
            }
            else
            {
              d_statistics.d_regexpUnfoldingsNeg << atom[1].getKind();
            }
            InferenceId inf =
                polarity ? InferenceId::STRINGS_RE_UNFOLD_POS : InferenceId::STRINGS_RE_UNFOLD_NEG;
            // in very rare cases, we may find out that the unfolding lemma
            // for a membership is equivalent to true, in spite of the RE
            // not being rewritten to true.
            if (d_im.sendInference(iexp, noExplain, conc, inf))
            {
              addedLemma = true;
              if (e == 0)
              {
                // Remember that we have unfolded a membership for x
                // notice that we only do this here, after we have definitely
                // added a lemma.
                repUnfold.insert(rep);
              }
            }
            processed.push_back(assertion);
          }
          else
          {
            // otherwise we are incomplete
            d_im.setIncomplete(IncompleteId::STRINGS_REGEXP_NO_SIMPLIFY);
          }
        }
        if (d_state.isInConflict())
        {
          break;
        }
      }
    }
  }
  if (addedLemma)
  {
    if (!d_state.isInConflict())
    {
      for (unsigned i = 0; i < processed.size(); i++)
      {
        Trace("strings-regexp")
            << "...add " << processed[i] << " to u-cache." << std::endl;
        d_regexp_ucached.insert(processed[i]);
      }
    }
  }
}

bool RegExpSolver::checkEqcInclusion(std::vector<Node>& mems)
{
  std::unordered_set<Node> remove;

  for (const Node& m1 : mems)
  {
    bool m1Neg = m1.getKind() == NOT;
    Node m1Lit = m1Neg ? m1[0] : m1;

    if (remove.find(m1) != remove.end())
    {
      // Skip memberships marked for removal
      continue;
    }

    for (const Node& m2 : mems)
    {
      if (m1 == m2)
      {
        continue;
      }

      bool m2Neg = m2.getKind() == NOT;
      Node m2Lit = m2Neg ? m2[0] : m2;

      // Both regular expression memberships have the same polarity
      if (m1Neg == m2Neg)
      {
        if (d_regexp_opr.regExpIncludes(m1Lit[1], m2Lit[1]))
        {
          if (m1Neg)
          {
            // ~str.in.re(x, R1) includes ~str.in.re(x, R2) --->
            //   mark ~str.in.re(x, R2) as reduced
            d_im.markReduced(m2Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE_NEG);
            remove.insert(m2);
          }
          else
          {
            // str.in.re(x, R1) includes str.in.re(x, R2) --->
            //   mark str.in.re(x, R1) as reduced
            d_im.markReduced(m1Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE);
            remove.insert(m1);

            // We don't need to process m1 anymore
            break;
          }
        }
      }
      else
      {
        Node pos = m1Neg ? m2Lit : m1Lit;
        Node neg = m1Neg ? m1Lit : m2Lit;
        if (d_regexp_opr.regExpIncludes(neg[1], pos[1]))
        {
          // We have a conflict because we have a case where str.in.re(x, R1)
          // and ~str.in.re(x, R2) but R2 includes R1, so there is no
          // possible value for x that satisfies both memberships.
          std::vector<Node> vec_nodes;
          vec_nodes.push_back(pos);
          vec_nodes.push_back(neg.negate());

          if (pos[0] != neg[0])
          {
            vec_nodes.push_back(pos[0].eqNode(neg[0]));
          }

          Node conc;
          d_im.sendInference(
              vec_nodes, conc, InferenceId::STRINGS_RE_INTER_INCLUDE, false, true);
          return false;
        }
      }
    }
  }

  mems.erase(std::remove_if(
                 mems.begin(),
                 mems.end(),
                 [&remove](Node& n) { return remove.find(n) != remove.end(); }),
             mems.end());

  return true;
}

bool RegExpSolver::checkEqcIntersect(const std::vector<Node>& mems)
{
  // do not compute intersections if the re intersection mode is none
  if (options::stringRegExpInterMode() == options::RegExpInterMode::NONE)
  {
    return true;
  }
  if (mems.empty())
  {
    // nothing to do
    return true;
  }
  // the initial regular expression membership and its constant type
  Node mi;
  RegExpConstType rcti = RE_C_UNKNOWN;
  NodeManager* nm = NodeManager::currentNM();
  for (const Node& m : mems)
  {
    if (m.getKind() != STRING_IN_REGEXP)
    {
      // do not do negative
      Assert(m.getKind() == NOT && m[0].getKind() == STRING_IN_REGEXP);
      continue;
    }
    RegExpConstType rct = d_regexp_opr.getRegExpConstType(m[1]);
    if (rct == RE_C_VARIABLE
        || (options::stringRegExpInterMode()
                == options::RegExpInterMode::CONSTANT
            && rct != RE_C_CONRETE_CONSTANT))
    {
      // cannot do intersection on RE with variables, or with re.allchar based
      // on option.
      continue;
    }
    if (options::stringRegExpInterMode()
        == options::RegExpInterMode::ONE_CONSTANT)
    {
      if (!mi.isNull() && rcti >= RE_C_CONSTANT && rct >= RE_C_CONSTANT)
      {
        // if both have re.allchar, do not do intersection if the
        // options::RegExpInterMode::ONE_CONSTANT option is set.
        continue;
      }
    }
    if (mi.isNull())
    {
      // first regular expression seen
      mi = m;
      rcti = rct;
      continue;
    }
    Node resR = d_regexp_opr.intersect(mi[1], m[1]);
    // intersection should be computable
    Assert(!resR.isNull());
    if (resR == d_emptyRegexp)
    {
      // conflict, explain
      std::vector<Node> vec_nodes;
      vec_nodes.push_back(mi);
      vec_nodes.push_back(m);
      if (mi[0] != m[0])
      {
        vec_nodes.push_back(mi[0].eqNode(m[0]));
      }
      Node conc;
      d_im.sendInference(
          vec_nodes, conc, InferenceId::STRINGS_RE_INTER_CONF, false, true);
      // conflict, return
      return false;
    }
    // rewrite to ensure the equality checks below are precise
    Node mres = nm->mkNode(STRING_IN_REGEXP, mi[0], resR);
    Node mresr = Rewriter::rewrite(mres);
    if (mresr == mi)
    {
      // if R1 = intersect( R1, R2 ), then x in R1 ^ x in R2 is equivalent
      // to x in R1, hence x in R2 can be marked redundant.
      d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME);
    }
    else if (mresr == m)
    {
      // same as above, opposite direction
      d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME);
    }
    else
    {
      // new conclusion
      // (x in R1 ^ y in R2 ^ x = y) => (x in intersect(R1,R2))
      std::vector<Node> vec_nodes;
      vec_nodes.push_back(mi);
      vec_nodes.push_back(m);
      if (mi[0] != m[0])
      {
        vec_nodes.push_back(mi[0].eqNode(m[0]));
      }
      d_im.sendInference(
          vec_nodes, mres, InferenceId::STRINGS_RE_INTER_INFER, false, true);
      // both are reduced
      d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER);
      d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER);
      // do not send more than one lemma for this class
      return true;
    }
  }
  return true;
}

bool RegExpSolver::checkPDerivative(
    Node x, Node r, Node atom, bool& addedLemma, std::vector<Node>& nf_exp)
{
  if (d_state.areEqual(x, d_emptyString))
  {
    Node exp;
    switch (d_regexp_opr.delta(r, exp))
    {
      case 0:
      {
        std::vector<Node> noExplain;
        noExplain.push_back(atom);
        noExplain.push_back(x.eqNode(d_emptyString));
        std::vector<Node> iexp = nf_exp;
        iexp.insert(iexp.end(), noExplain.begin(), noExplain.end());
        d_im.sendInference(iexp, noExplain, exp, InferenceId::STRINGS_RE_DELTA);
        addedLemma = true;
        d_regexp_ccached.insert(atom);
        return false;
      }
      case 1:
      {
        d_regexp_ccached.insert(atom);
        break;
      }
      case 2:
      {
        std::vector<Node> noExplain;
        noExplain.push_back(atom);
        noExplain.push_back(x.eqNode(d_emptyString));
        std::vector<Node> iexp = nf_exp;
        iexp.insert(iexp.end(), noExplain.begin(), noExplain.end());
        d_im.sendInference(iexp, noExplain, d_false, InferenceId::STRINGS_RE_DELTA_CONF);
        addedLemma = true;
        d_regexp_ccached.insert(atom);
        return false;
      }
      default:
        // Impossible
        break;
    }
  }
  else
  {
    if (deriveRegExp(x, r, atom, nf_exp))
    {
      addedLemma = true;
      d_regexp_ccached.insert(atom);
      return false;
    }
  }
  return true;
}

cvc5::String RegExpSolver::getHeadConst(Node x)
{
  if (x.isConst())
  {
    return x.getConst<String>();
  }
  else if (x.getKind() == STRING_CONCAT)
  {
    if (x[0].isConst())
    {
      return x[0].getConst<String>();
    }
  }
  return d_emptyString.getConst<String>();
}

bool RegExpSolver::deriveRegExp(Node x,
                                Node r,
                                Node atom,
                                std::vector<Node>& ant)
{
  Assert(x != d_emptyString);
  Trace("regexp-derive") << "RegExpSolver::deriveRegExp: x=" << x
                         << ", r= " << r << std::endl;
  cvc5::String s = getHeadConst(x);
  // only allow RE_DERIVE for concrete constant regular expressions
  if (!s.empty() && d_regexp_opr.getRegExpConstType(r) == RE_C_CONRETE_CONSTANT)
  {
    Node conc = Node::null();
    Node dc = r;
    bool flag = true;
    for (unsigned i = 0; i < s.size(); ++i)
    {
      cvc5::String c = s.substr(i, 1);
      Node dc2;
      int rt = d_regexp_opr.derivativeS(dc, c, dc2);
      dc = dc2;
      if (rt == 2)
      {
        // CONFLICT
        flag = false;
        break;
      }
    }
    // send lemma
    if (flag)
    {
      if (x.isConst())
      {
        Assert(false)
            << "Impossible: RegExpSolver::deriveRegExp: const string in const "
               "regular expression.";
        return false;
      }
      else
      {
        Assert(x.getKind() == STRING_CONCAT);
        std::vector<Node> vec_nodes;
        for (unsigned int i = 1; i < x.getNumChildren(); ++i)
        {
          vec_nodes.push_back(x[i]);
        }
        Node left = utils::mkConcat(vec_nodes, x.getType());
        left = Rewriter::rewrite(left);
        conc = NodeManager::currentNM()->mkNode(STRING_IN_REGEXP, left, dc);
      }
    }
    std::vector<Node> iexp = ant;
    std::vector<Node> noExplain;
    noExplain.push_back(atom);
    iexp.push_back(atom);
    d_im.sendInference(iexp, noExplain, conc, InferenceId::STRINGS_RE_DERIVE);
    return true;
  }
  return false;
}

Node RegExpSolver::getNormalSymRegExp(Node r, std::vector<Node>& nf_exp)
{
  Node ret = r;
  switch (r.getKind())
  {
    case REGEXP_EMPTY:
    case REGEXP_SIGMA:
    case REGEXP_RANGE: break;
    case STRING_TO_REGEXP:
    {
      if (!r[0].isConst())
      {
        Node tmp = d_csolver.getNormalString(r[0], nf_exp);
        if (tmp != r[0])
        {
          ret = NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, tmp);
        }
      }
      break;
    }
    case REGEXP_CONCAT:
    case REGEXP_UNION:
    case REGEXP_INTER:
    case REGEXP_STAR:
    case REGEXP_COMPLEMENT:
    {
      std::vector<Node> vec_nodes;
      for (const Node& cr : r)
      {
        vec_nodes.push_back(getNormalSymRegExp(cr, nf_exp));
      }
      ret = Rewriter::rewrite(
          NodeManager::currentNM()->mkNode(r.getKind(), vec_nodes));
      break;
    }
    default:
    {
      Trace("strings-error") << "Unsupported term: " << r
                             << " in normalization SymRegExp." << std::endl;
      Assert(!utils::isRegExpKind(r.getKind()));
    }
  }
  return ret;
}

}  // namespace strings
}  // namespace theory
}  // namespace cvc5


Generated by: GCOVR (Version 3.2)

Line	Exec	Source
1		/******************************************************************************
2		* Top contributors (to current version):
3		* Andrew Reynolds, Andres Noetzli, Tianyi Liang
4		*
5		* This file is part of the cvc5 project.
6		*
7		* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8		* in the top-level source directory and their institutional affiliations.
9		* All rights reserved. See the file COPYING in the top-level source
10		* directory for licensing information.
11		* ****************************************************************************
12		*
13		* Implementation of the regular expression solver for the theory of strings.
14		*/
15
16		#include "theory/strings/regexp_solver.h"
17
18		#include <cmath>
19
20		#include "options/strings_options.h"
21		#include "smt/logic_exception.h"
22		#include "theory/ext_theory.h"
23		#include "theory/strings/theory_strings_utils.h"
24		#include "theory/theory_model.h"
25		#include "util/statistics_value.h"
26
27		using namespace std;
28		using namespace cvc5::context;
29		using namespace cvc5::kind;
30
31		namespace cvc5 {
32		namespace theory {
33		namespace strings {
34
35	9853	RegExpSolver::RegExpSolver(SolverState& s,
36		InferenceManager& im,
37		SkolemCache* skc,
38		CoreSolver& cs,
39		ExtfSolver& es,
40	9853	SequencesStatistics& stats)
41		: d_state(s),
42		d_im(im),
43		d_csolver(cs),
44		d_esolver(es),
45		d_statistics(stats),
46	9853	d_regexp_ucached(s.getUserContext()),
47		d_regexp_ccached(s.getSatContext()),
48		d_processed_memberships(s.getSatContext()),
49	19706	d_regexp_opr(skc)
50		{
51	9853	d_emptyString = NodeManager::currentNM()->mkConst(::cvc5::String(""));
52	9853	d_emptyRegexp = NodeManager::currentNM()->mkNode(REGEXP_EMPTY);
53	9853	d_true = NodeManager::currentNM()->mkConst(true);
54	9853	d_false = NodeManager::currentNM()->mkConst(false);
55	9853	}
56
57		Node RegExpSolver::mkAnd(Node c1, Node c2)
58		{
59		return NodeManager::currentNM()->mkNode(AND, c1, c2);
60		}
61
62	8348	void RegExpSolver::checkMemberships()
63		{
64		// add the memberships
65	16696	std::vector<Node> mems = d_esolver.getActive(STRING_IN_REGEXP);
66		// maps representatives to regular expression memberships in that class
67	16696	std::map<Node, std::vector<Node> > assertedMems;
68	8348	const std::map<Node, ExtfInfoTmp>& einfo = d_esolver.getInfo();
69	8348	std::map<Node, ExtfInfoTmp>::const_iterator it;
70	9635	for (unsigned i = 0; i < mems.size(); i++)
71		{
72	2574	Node n = mems[i];
73	1287	Assert(n.getKind() == STRING_IN_REGEXP);
74	1287	it = einfo.find(n);
75	1287	Assert(it != einfo.end());
76	1287	if (!it->second.d_const.isNull())
77		{
78	1275	bool pol = it->second.d_const.getConst<bool>();
79	2550	Trace("strings-process-debug")
80	1275	<< " add membership : " << n << ", pol = " << pol << std::endl;
81	2550	Node r = d_state.getRepresentative(n[0]);
82	1275	assertedMems[r].push_back(pol ? n : n.negate());
83		}
84		else
85		{
86	24	Trace("strings-process-debug")
87	12	<< " irrelevant (non-asserted) membership : " << n << std::endl;
88		}
89		}
90	8348	check(assertedMems);
91	8348	}
92
93	8348	void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
94		{
95	8348	bool addedLemma = false;
96	8348	bool changed = false;
97	16674	std::vector<Node> processed;
98
99	8348	Trace("regexp-process") << "Checking Memberships ... " << std::endl;
100	9187	for (const std::pair<const Node, std::vector<Node> >& mr : mems)
101		{
102	1700	std::vector<Node> mems2 = mr.second;
103	1722	Trace("regexp-process")
104	861	<< "Memberships(" << mr.first << ") = " << mr.second << std::endl;
105	861	if (!checkEqcInclusion(mems2))
106		{
107		// conflict discovered, return
108	10	return;
109		}
110	851	if (!checkEqcIntersect(mems2))
111		{
112		// conflict discovered, return
113	12	return;
114		}
115		}
116
117	16652	Trace("regexp-debug")
118	8326	<< "... No Intersect Conflict in Memberships, addedLemma: " << addedLemma
119	8326	<< std::endl;
120	8326	if (!addedLemma)
121		{
122		// get all memberships
123	16652	std::map<Node, Node> allMems;
124	9165	for (const std::pair<const Node, std::vector<Node> >& mr : mems)
125		{
126	2055	for (const Node& m : mr.second)
127		{
128	1216	allMems[m] = mr.first;
129		}
130		}
131
132	8326	NodeManager* nm = NodeManager::currentNM();
133		// representatives of strings that are the LHS of positive memberships that
134		// we unfolded
135	16652	std::unordered_set<Node> repUnfold;
136		// check positive (e=0), then negative (e=1) memberships
137	24978	for (unsigned e = 0; e < 2; e++)
138		{
139	19008	for (const std::pair<const Node, Node>& mp : allMems)
140		{
141	2908	Node assertion = mp.first;
142	2908	Node rep = mp.second;
143		// check regular expression membership
144	4860	Trace("regexp-debug")
145	2430	<< "Check : " << assertion << " "
146	4860	<< (d_regexp_ucached.find(assertion) == d_regexp_ucached.end())
147	2430	<< " "
148	4860	<< (d_regexp_ccached.find(assertion) == d_regexp_ccached.end())
149	2430	<< std::endl;
150	4860	if (d_regexp_ucached.find(assertion) != d_regexp_ucached.end()
151	2430	\|\| d_regexp_ccached.find(assertion) != d_regexp_ccached.end())
152		{
153	1234	continue;
154		}
155	2392	Trace("strings-regexp")
156	1196	<< "We have regular expression assertion : " << assertion
157	1196	<< std::endl;
158	1674	Node atom = assertion.getKind() == NOT ? assertion[0] : assertion;
159	1196	Assert(atom == Rewriter::rewrite(atom));
160	1196	bool polarity = assertion.getKind() != NOT;
161	1196	if (polarity != (e == 0))
162		{
163	590	continue;
164		}
165	606	bool flag = true;
166	1084	Node x = atom[0];
167	1084	Node r = atom[1];
168	606	Assert(rep == d_state.getRepresentative(x));
169		// The following code takes normal forms into account for the purposes
170		// of simplifying a regular expression membership x in R. For example,
171		// if x = "A" in the current context, then we may be interested in
172		// reasoning about ( x in R ) * { x -> "A" }. Say we update the
173		// membership to nx in R', then:
174		// - nfexp => ( x in R ) <=> nx in R'
175		// - rnfexp => R = R'
176		// We use these explanations below as assumptions on inferences when
177		// appropriate. Notice that for inferring conflicts and tautologies,
178		// we use the normal form of x always. This is because we always want to
179		// discover conflicts/tautologies whenever possible.
180		// For inferences based on regular expression unfolding, we do not use
181		// the normal form of x. The reason is that it is better to unfold
182		// regular expression memberships in a context-indepedent manner,
183		// that is, not taking into account the current normal form of x, since
184		// this ensures these lemmas are still relevant after backtracking.
185	1084	std::vector<Node> nfexp;
186	1084	std::vector<Node> rnfexp;
187		// The normal form of x is stored in nx, while x is left unchanged.
188	1084	Node nx = x;
189	606	if (!x.isConst())
190		{
191	587	nx = d_csolver.getNormalString(x, nfexp);
192		}
193		// If r is not a constant regular expression, we update it based on
194		// normal forms, which may concretize its variables.
195	606	if (!d_regexp_opr.checkConstRegExp(r))
196		{
197	111	r = getNormalSymRegExp(r, rnfexp);
198	111	nfexp.insert(nfexp.end(), rnfexp.begin(), rnfexp.end());
199	111	changed = true;
200		}
201	1212	Trace("strings-regexp-nf") << "Term " << atom << " is normalized to "
202	606	<< nx << " IN " << r << std::endl;
203	606	if (nx != x \|\| changed)
204		{
205		// We rewrite the membership nx IN r.
206	606	Node tmp = Rewriter::rewrite(nm->mkNode(STRING_IN_REGEXP, nx, r));
207	349	Trace("strings-regexp-nf") << "Simplifies to " << tmp << std::endl;
208	349	if (tmp.isConst())
209		{
210	110	if (tmp.getConst<bool>() == polarity)
211		{
212		// it is satisfied in this SAT context
213	18	d_regexp_ccached.insert(assertion);
214	18	continue;
215		}
216		else
217		{
218		// we have a conflict
219	148	std::vector<Node> iexp = nfexp;
220	148	std::vector<Node> noExplain;
221	74	iexp.push_back(assertion);
222	74	noExplain.push_back(assertion);
223	148	Node conc = Node::null();
224	74	d_im.sendInference(
225		iexp, noExplain, conc, InferenceId::STRINGS_RE_NF_CONFLICT);
226	74	addedLemma = true;
227	74	break;
228		}
229		}
230		}
231	514	if (e == 1 && repUnfold.find(rep) != repUnfold.end())
232		{
233		// do not unfold negative memberships of strings that have new
234		// positive unfoldings. For example:
235		// x in ("A")* ^ NOT x in ("B")*
236		// We unfold x = "A" ++ x' only. The intution here is that positive
237		// unfoldings lead to stronger constraints (equalities are stronger
238		// than disequalities), and are easier to check.
239	36	continue;
240		}
241	478	if (polarity)
242		{
243	363	flag = checkPDerivative(x, r, atom, addedLemma, rnfexp);
244		}
245		else
246		{
247	115	if (!options::stringExp())
248		{
249		throw LogicException(
250		"Strings Incomplete (due to Negative Membership) by default, "
251		"try --strings-exp option.");
252		}
253		}
254	478	if (flag)
255		{
256		// check if the term is atomic
257	940	Trace("strings-regexp")
258	470	<< "Unroll/simplify membership of atomic term " << rep
259	470	<< std::endl;
260		// if so, do simple unrolling
261	470	Trace("strings-regexp") << "Simplify on " << atom << std::endl;
262	940	Node conc = d_regexp_opr.simplify(atom, polarity);
263	470	Trace("strings-regexp") << "...finished, got " << conc << std::endl;
264		// if simplifying successfully generated a lemma
265	470	if (!conc.isNull())
266		{
267	934	std::vector<Node> iexp;
268	934	std::vector<Node> noExplain;
269	467	iexp.push_back(assertion);
270	467	noExplain.push_back(assertion);
271	467	Assert(atom.getKind() == STRING_IN_REGEXP);
272	467	if (polarity)
273		{
274	353	d_statistics.d_regexpUnfoldingsPos << atom[1].getKind();
275		}
276		else
277		{
278	114	d_statistics.d_regexpUnfoldingsNeg << atom[1].getKind();
279		}
280	467	InferenceId inf =
281	467	polarity ? InferenceId::STRINGS_RE_UNFOLD_POS : InferenceId::STRINGS_RE_UNFOLD_NEG;
282		// in very rare cases, we may find out that the unfolding lemma
283		// for a membership is equivalent to true, in spite of the RE
284		// not being rewritten to true.
285	467	if (d_im.sendInference(iexp, noExplain, conc, inf))
286		{
287	461	addedLemma = true;
288	461	if (e == 0)
289		{
290		// Remember that we have unfolded a membership for x
291		// notice that we only do this here, after we have definitely
292		// added a lemma.
293	351	repUnfold.insert(rep);
294		}
295		}
296	467	processed.push_back(assertion);
297		}
298		else
299		{
300		// otherwise we are incomplete
301	3	d_im.setIncomplete(IncompleteId::STRINGS_REGEXP_NO_SIMPLIFY);
302		}
303		}
304	478	if (d_state.isInConflict())
305		{
306		break;
307		}
308		}
309		}
310		}
311	8326	if (addedLemma)
312		{
313	367	if (!d_state.isInConflict())
314		{
315	830	for (unsigned i = 0; i < processed.size(); i++)
316		{
317	926	Trace("strings-regexp")
318	463	<< "...add " << processed[i] << " to u-cache." << std::endl;
319	463	d_regexp_ucached.insert(processed[i]);
320		}
321		}
322		}
323		}
324
325	861	bool RegExpSolver::checkEqcInclusion(std::vector<Node>& mems)
326		{
327	1722	std::unordered_set<Node> remove;
328
329	2108	for (const Node& m1 : mems)
330		{
331	1257	bool m1Neg = m1.getKind() == NOT;
332	2452	Node m1Lit = m1Neg ? m1[0] : m1;
333
334	1257	if (remove.find(m1) != remove.end())
335		{
336		// Skip memberships marked for removal
337	52	continue;
338		}
339
340	3414	for (const Node& m2 : mems)
341		{
342	2273	if (m1 == m2)
343		{
344	1177	continue;
345		}
346
347	1096	bool m2Neg = m2.getKind() == NOT;
348	2128	Node m2Lit = m2Neg ? m2[0] : m2;
349
350		// Both regular expression memberships have the same polarity
351	1096	if (m1Neg == m2Neg)
352		{
353	714	if (d_regexp_opr.regExpIncludes(m1Lit[1], m2Lit[1]))
354		{
355	108	if (m1Neg)
356		{
357		// ~str.in.re(x, R1) includes ~str.in.re(x, R2) --->
358		// mark ~str.in.re(x, R2) as reduced
359	54	d_im.markReduced(m2Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE_NEG);
360	54	remove.insert(m2);
361		}
362		else
363		{
364		// str.in.re(x, R1) includes str.in.re(x, R2) --->
365		// mark str.in.re(x, R1) as reduced
366	54	d_im.markReduced(m1Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE);
367	54	remove.insert(m1);
368
369		// We don't need to process m1 anymore
370	54	break;
371		}
372		}
373		}
374		else
375		{
376	754	Node pos = m1Neg ? m2Lit : m1Lit;
377	754	Node neg = m1Neg ? m1Lit : m2Lit;
378	382	if (d_regexp_opr.regExpIncludes(neg[1], pos[1]))
379		{
380		// We have a conflict because we have a case where str.in.re(x, R1)
381		// and ~str.in.re(x, R2) but R2 includes R1, so there is no
382		// possible value for x that satisfies both memberships.
383	20	std::vector<Node> vec_nodes;
384	10	vec_nodes.push_back(pos);
385	10	vec_nodes.push_back(neg.negate());
386
387	10	if (pos[0] != neg[0])
388		{
389		vec_nodes.push_back(pos[0].eqNode(neg[0]));
390		}
391
392	20	Node conc;
393	10	d_im.sendInference(
394		vec_nodes, conc, InferenceId::STRINGS_RE_INTER_INCLUDE, false, true);
395	10	return false;
396		}
397		}
398		}
399		}
400
401	2553	mems.erase(std::remove_if(
402		mems.begin(),
403		mems.end(),
404	2094	[&remove](Node& n) { return remove.find(n) != remove.end(); }),
405	3404	mems.end());
406
407	851	return true;
408		}
409
410	851	bool RegExpSolver::checkEqcIntersect(const std::vector<Node>& mems)
411		{
412		// do not compute intersections if the re intersection mode is none
413	851	if (options::stringRegExpInterMode() == options::RegExpInterMode::NONE)
414		{
415		return true;
416		}
417	851	if (mems.empty())
418		{
419		// nothing to do
420	22	return true;
421		}
422		// the initial regular expression membership and its constant type
423	1658	Node mi;
424	829	RegExpConstType rcti = RE_C_UNKNOWN;
425	829	NodeManager* nm = NodeManager::currentNM();
426	1852	for (const Node& m : mems)
427		{
428	1590	if (m.getKind() != STRING_IN_REGEXP)
429		{
430		// do not do negative
431	507	Assert(m.getKind() == NOT && m[0].getKind() == STRING_IN_REGEXP);
432	1523	continue;
433		}
434	576	RegExpConstType rct = d_regexp_opr.getRegExpConstType(m[1]);
435	576	if (rct == RE_C_VARIABLE
436	921	\|\| (options::stringRegExpInterMode()
437		== options::RegExpInterMode::CONSTANT
438	428	&& rct != RE_C_CONRETE_CONSTANT))
439		{
440		// cannot do intersection on RE with variables, or with re.allchar based
441		// on option.
442	345	continue;
443		}
444	462	if (options::stringRegExpInterMode()
445	231	== options::RegExpInterMode::ONE_CONSTANT)
446		{
447		if (!mi.isNull() && rcti >= RE_C_CONSTANT && rct >= RE_C_CONSTANT)
448		{
449		// if both have re.allchar, do not do intersection if the
450		// options::RegExpInterMode::ONE_CONSTANT option is set.
451		continue;
452		}
453		}
454	395	if (mi.isNull())
455		{
456		// first regular expression seen
457	164	mi = m;
458	164	rcti = rct;
459	164	continue;
460		}
461	74	Node resR = d_regexp_opr.intersect(mi[1], m[1]);
462		// intersection should be computable
463	67	Assert(!resR.isNull());
464	67	if (resR == d_emptyRegexp)
465		{
466		// conflict, explain
467	24	std::vector<Node> vec_nodes;
468	12	vec_nodes.push_back(mi);
469	12	vec_nodes.push_back(m);
470	12	if (mi[0] != m[0])
471		{
472	8	vec_nodes.push_back(mi[0].eqNode(m[0]));
473		}
474	24	Node conc;
475	12	d_im.sendInference(
476		vec_nodes, conc, InferenceId::STRINGS_RE_INTER_CONF, false, true);
477		// conflict, return
478	12	return false;
479		}
480		// rewrite to ensure the equality checks below are precise
481	62	Node mres = nm->mkNode(STRING_IN_REGEXP, mi[0], resR);
482	62	Node mresr = Rewriter::rewrite(mres);
483	55	if (mresr == mi)
484		{
485		// if R1 = intersect( R1, R2 ), then x in R1 ^ x in R2 is equivalent
486		// to x in R1, hence x in R2 can be marked redundant.
487		d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME);
488		}
489	55	else if (mresr == m)
490		{
491		// same as above, opposite direction
492	7	d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME);
493		}
494		else
495		{
496		// new conclusion
497		// (x in R1 ^ y in R2 ^ x = y) => (x in intersect(R1,R2))
498	96	std::vector<Node> vec_nodes;
499	48	vec_nodes.push_back(mi);
500	48	vec_nodes.push_back(m);
501	48	if (mi[0] != m[0])
502		{
503	8	vec_nodes.push_back(mi[0].eqNode(m[0]));
504		}
505	48	d_im.sendInference(
506		vec_nodes, mres, InferenceId::STRINGS_RE_INTER_INFER, false, true);
507		// both are reduced
508	48	d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER);
509	48	d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER);
510		// do not send more than one lemma for this class
511	48	return true;
512		}
513		}
514	769	return true;
515		}
516
517	363	bool RegExpSolver::checkPDerivative(
518		Node x, Node r, Node atom, bool& addedLemma, std::vector<Node>& nf_exp)
519		{
520	363	if (d_state.areEqual(x, d_emptyString))
521		{
522	4	Node exp;
523	4	switch (d_regexp_opr.delta(r, exp))
524		{
525		case 0:
526		{
527		std::vector<Node> noExplain;
528		noExplain.push_back(atom);
529		noExplain.push_back(x.eqNode(d_emptyString));
530		std::vector<Node> iexp = nf_exp;
531		iexp.insert(iexp.end(), noExplain.begin(), noExplain.end());
532		d_im.sendInference(iexp, noExplain, exp, InferenceId::STRINGS_RE_DELTA);
533		addedLemma = true;
534		d_regexp_ccached.insert(atom);
535		return false;
536		}
537		case 1:
538		{
539		d_regexp_ccached.insert(atom);
540		break;
541		}
542	4	case 2:
543		{
544	8	std::vector<Node> noExplain;
545	4	noExplain.push_back(atom);
546	4	noExplain.push_back(x.eqNode(d_emptyString));
547	8	std::vector<Node> iexp = nf_exp;
548	4	iexp.insert(iexp.end(), noExplain.begin(), noExplain.end());
549	4	d_im.sendInference(iexp, noExplain, d_false, InferenceId::STRINGS_RE_DELTA_CONF);
550	4	addedLemma = true;
551	4	d_regexp_ccached.insert(atom);
552	4	return false;
553		}
554		default:
555		// Impossible
556		break;
557		}
558		}
559		else
560		{
561	359	if (deriveRegExp(x, r, atom, nf_exp))
562		{
563	4	addedLemma = true;
564	4	d_regexp_ccached.insert(atom);
565	4	return false;
566		}
567		}
568	355	return true;
569		}
570
571	359	cvc5::String RegExpSolver::getHeadConst(Node x)
572		{
573	359	if (x.isConst())
574		{
575	2	return x.getConst<String>();
576		}
577	357	else if (x.getKind() == STRING_CONCAT)
578		{
579	50	if (x[0].isConst())
580		{
581	6	return x[0].getConst<String>();
582		}
583		}
584	351	return d_emptyString.getConst<String>();
585		}
586
587	359	bool RegExpSolver::deriveRegExp(Node x,
588		Node r,
589		Node atom,
590		std::vector<Node>& ant)
591		{
592	359	Assert(x != d_emptyString);
593	718	Trace("regexp-derive") << "RegExpSolver::deriveRegExp: x=" << x
594	359	<< ", r= " << r << std::endl;
595	718	cvc5::String s = getHeadConst(x);
596		// only allow RE_DERIVE for concrete constant regular expressions
597	359	if (!s.empty() && d_regexp_opr.getRegExpConstType(r) == RE_C_CONRETE_CONSTANT)
598		{
599	8	Node conc = Node::null();
600	8	Node dc = r;
601	4	bool flag = true;
602	4	for (unsigned i = 0; i < s.size(); ++i)
603		{
604	4	cvc5::String c = s.substr(i, 1);
605	4	Node dc2;
606	4	int rt = d_regexp_opr.derivativeS(dc, c, dc2);
607	4	dc = dc2;
608	4	if (rt == 2)
609		{
610		// CONFLICT
611	4	flag = false;
612	4	break;
613		}
614		}
615		// send lemma
616	4	if (flag)
617		{
618		if (x.isConst())
619		{
620		Assert(false)
621		<< "Impossible: RegExpSolver::deriveRegExp: const string in const "
622		"regular expression.";
623		return false;
624		}
625		else
626		{
627		Assert(x.getKind() == STRING_CONCAT);
628		std::vector<Node> vec_nodes;
629		for (unsigned int i = 1; i < x.getNumChildren(); ++i)
630		{
631		vec_nodes.push_back(x[i]);
632		}
633		Node left = utils::mkConcat(vec_nodes, x.getType());
634		left = Rewriter::rewrite(left);
635		conc = NodeManager::currentNM()->mkNode(STRING_IN_REGEXP, left, dc);
636		}
637		}
638	8	std::vector<Node> iexp = ant;
639	8	std::vector<Node> noExplain;
640	4	noExplain.push_back(atom);
641	4	iexp.push_back(atom);
642	4	d_im.sendInference(iexp, noExplain, conc, InferenceId::STRINGS_RE_DERIVE);
643	4	return true;
644		}
645	355	return false;
646		}
647
648	577	Node RegExpSolver::getNormalSymRegExp(Node r, std::vector<Node>& nf_exp)
649		{
650	577	Node ret = r;
651	577	switch (r.getKind())
652		{
653	72	case REGEXP_EMPTY:
654		case REGEXP_SIGMA:
655	72	case REGEXP_RANGE: break;
656	220	case STRING_TO_REGEXP:
657		{
658	220	if (!r[0].isConst())
659		{
660	228	Node tmp = d_csolver.getNormalString(r[0], nf_exp);
661	114	if (tmp != r[0])
662		{
663	100	ret = NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, tmp);
664		}
665		}
666	220	break;
667		}
668	282	case REGEXP_CONCAT:
669		case REGEXP_UNION:
670		case REGEXP_INTER:
671		case REGEXP_STAR:
672		case REGEXP_COMPLEMENT:
673		{
674	564	std::vector<Node> vec_nodes;
675	748	for (const Node& cr : r)
676		{
677	466	vec_nodes.push_back(getNormalSymRegExp(cr, nf_exp));
678		}
679	282	ret = Rewriter::rewrite(
680	564	NodeManager::currentNM()->mkNode(r.getKind(), vec_nodes));
681	282	break;
682		}
683	3	default:
684		{
685	6	Trace("strings-error") << "Unsupported term: " << r
686	3	<< " in normalization SymRegExp." << std::endl;
687	3	Assert(!utils::isRegExpKind(r.getKind()));
688		}
689		}
690	577	return ret;
691		}
692
693		} // namespace strings
694		} // namespace theory
695	29340	} // namespace cvc5