Head

GCC Code Coverage Report

Directory:	.		Exec	Total	Coverage
File:	src/theory/strings/regexp_solver.cpp	Lines:	296	331	89.4 %
Date:	2021-09-16	Branches:	588	1213	48.5 %


/******************************************************************************
 * Top contributors (to current version):
 *   Andrew Reynolds, Andres Noetzli, Tianyi Liang
 *
 * This file is part of the cvc5 project.
 *
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
 * in the top-level source directory and their institutional affiliations.
 * All rights reserved.  See the file COPYING in the top-level source
 * directory for licensing information.
 * ****************************************************************************
 *
 * Implementation of the regular expression solver for the theory of strings.
 */

#include "theory/strings/regexp_solver.h"

#include <cmath>

#include "options/strings_options.h"
#include "smt/logic_exception.h"
#include "theory/ext_theory.h"
#include "theory/strings/theory_strings_utils.h"
#include "theory/theory_model.h"
#include "util/statistics_value.h"

using namespace std;
using namespace cvc5::context;
using namespace cvc5::kind;

namespace cvc5 {
namespace theory {
namespace strings {

RegExpSolver::RegExpSolver(Env& env,
                           SolverState& s,
                           InferenceManager& im,
                           SkolemCache* skc,
                           CoreSolver& cs,
                           ExtfSolver& es,
                           SequencesStatistics& stats)
    : EnvObj(env),
      d_state(s),
      d_im(im),
      d_csolver(cs),
      d_esolver(es),
      d_statistics(stats),
      d_regexp_ucached(userContext()),
      d_regexp_ccached(context()),
      d_processed_memberships(context()),
      d_regexp_opr(skc)
{
  d_emptyString = NodeManager::currentNM()->mkConst(::cvc5::String(""));
  d_emptyRegexp = NodeManager::currentNM()->mkNode(REGEXP_EMPTY);
  d_true = NodeManager::currentNM()->mkConst(true);
  d_false = NodeManager::currentNM()->mkConst(false);
}

Node RegExpSolver::mkAnd(Node c1, Node c2)
{
  return NodeManager::currentNM()->mkNode(AND, c1, c2);
}

void RegExpSolver::checkMemberships()
{
  // add the memberships
  std::vector<Node> mems = d_esolver.getActive(STRING_IN_REGEXP);
  // maps representatives to regular expression memberships in that class
  std::map<Node, std::vector<Node> > assertedMems;
  const std::map<Node, ExtfInfoTmp>& einfo = d_esolver.getInfo();
  std::map<Node, ExtfInfoTmp>::const_iterator it;
  for (unsigned i = 0; i < mems.size(); i++)
  {
    Node n = mems[i];
    Assert(n.getKind() == STRING_IN_REGEXP);
    it = einfo.find(n);
    Assert(it != einfo.end());
    if (!it->second.d_const.isNull())
    {
      bool pol = it->second.d_const.getConst<bool>();
      Trace("strings-process-debug")
          << "  add membership : " << n << ", pol = " << pol << std::endl;
      Node r = d_state.getRepresentative(n[0]);
      assertedMems[r].push_back(pol ? n : n.negate());
    }
    else
    {
      Trace("strings-process-debug")
          << "  irrelevant (non-asserted) membership : " << n << std::endl;
    }
  }
  check(assertedMems);
}

void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
{
  bool addedLemma = false;
  bool changed = false;
  std::vector<Node> processed;

  Trace("regexp-process") << "Checking Memberships ... " << std::endl;
  for (const std::pair<const Node, std::vector<Node> >& mr : mems)
  {
    std::vector<Node> mems2 = mr.second;
    Trace("regexp-process")
        << "Memberships(" << mr.first << ") = " << mr.second << std::endl;
    if (!checkEqcInclusion(mems2))
    {
      // conflict discovered, return
      return;
    }
    if (!checkEqcIntersect(mems2))
    {
      // conflict discovered, return
      return;
    }
  }

  Trace("regexp-debug")
      << "... No Intersect Conflict in Memberships, addedLemma: " << addedLemma
      << std::endl;
  if (!addedLemma)
  {
    // get all memberships
    std::map<Node, Node> allMems;
    for (const std::pair<const Node, std::vector<Node> >& mr : mems)
    {
      for (const Node& m : mr.second)
      {
        allMems[m] = mr.first;
      }
    }

    NodeManager* nm = NodeManager::currentNM();
    // representatives of strings that are the LHS of positive memberships that
    // we unfolded
    std::unordered_set<Node> repUnfold;
    // check positive (e=0), then negative (e=1) memberships
    for (unsigned e = 0; e < 2; e++)
    {
      for (const std::pair<const Node, Node>& mp : allMems)
      {
        Node assertion = mp.first;
        Node rep = mp.second;
        // check regular expression membership
        Trace("regexp-debug")
            << "Check : " << assertion << " "
            << (d_regexp_ucached.find(assertion) == d_regexp_ucached.end())
            << " "
            << (d_regexp_ccached.find(assertion) == d_regexp_ccached.end())
            << std::endl;
        if (d_regexp_ucached.find(assertion) != d_regexp_ucached.end()
            || d_regexp_ccached.find(assertion) != d_regexp_ccached.end())
        {
          continue;
        }
        Trace("strings-regexp")
            << "We have regular expression assertion : " << assertion
            << std::endl;
        Node atom = assertion.getKind() == NOT ? assertion[0] : assertion;
        Assert(atom == Rewriter::rewrite(atom));
        bool polarity = assertion.getKind() != NOT;
        if (polarity != (e == 0))
        {
          continue;
        }
        bool flag = true;
        Node x = atom[0];
        Node r = atom[1];
        Assert(rep == d_state.getRepresentative(x));
        // The following code takes normal forms into account for the purposes
        // of simplifying a regular expression membership x in R. For example,
        // if x = "A" in the current context, then we may be interested in
        // reasoning about ( x in R ) * { x -> "A" }. Say we update the
        // membership to nx in R', then:
        // - nfexp => ( x in R ) <=> nx in R'
        // - rnfexp => R = R'
        // We use these explanations below as assumptions on inferences when
        // appropriate. Notice that for inferring conflicts and tautologies,
        // we use the normal form of x always. This is because we always want to
        // discover conflicts/tautologies whenever possible.
        // For inferences based on regular expression unfolding, we do not use
        // the normal form of x. The reason is that it is better to unfold
        // regular expression memberships in a context-indepedent manner,
        // that is, not taking into account the current normal form of x, since
        // this ensures these lemmas are still relevant after backtracking.
        std::vector<Node> nfexp;
        std::vector<Node> rnfexp;
        // The normal form of x is stored in nx, while x is left unchanged.
        Node nx = x;
        if (!x.isConst())
        {
          nx = d_csolver.getNormalString(x, nfexp);
        }
        // If r is not a constant regular expression, we update it based on
        // normal forms, which may concretize its variables.
        if (!d_regexp_opr.checkConstRegExp(r))
        {
          r = getNormalSymRegExp(r, rnfexp);
          nfexp.insert(nfexp.end(), rnfexp.begin(), rnfexp.end());
          changed = true;
        }
        Trace("strings-regexp-nf") << "Term " << atom << " is normalized to "
                                   << nx << " IN " << r << std::endl;
        if (nx != x || changed)
        {
          // We rewrite the membership nx IN r.
          Node tmp = Rewriter::rewrite(nm->mkNode(STRING_IN_REGEXP, nx, r));
          Trace("strings-regexp-nf") << "Simplifies to " << tmp << std::endl;
          if (tmp.isConst())
          {
            if (tmp.getConst<bool>() == polarity)
            {
              // it is satisfied in this SAT context
              d_regexp_ccached.insert(assertion);
              continue;
            }
            else
            {
              // we have a conflict
              std::vector<Node> iexp = nfexp;
              std::vector<Node> noExplain;
              iexp.push_back(assertion);
              noExplain.push_back(assertion);
              Node conc = Node::null();
              d_im.sendInference(
                  iexp, noExplain, conc, InferenceId::STRINGS_RE_NF_CONFLICT);
              addedLemma = true;
              break;
            }
          }
        }
        if (e == 1 && repUnfold.find(rep) != repUnfold.end())
        {
          // do not unfold negative memberships of strings that have new
          // positive unfoldings. For example:
          //   x in ("A")* ^ NOT x in ("B")*
          // We unfold x = "A" ++ x' only. The intution here is that positive
          // unfoldings lead to stronger constraints (equalities are stronger
          // than disequalities), and are easier to check.
          continue;
        }
        if (polarity)
        {
          flag = checkPDerivative(x, r, atom, addedLemma, rnfexp);
        }
        else
        {
          if (!options::stringExp())
          {
            throw LogicException(
                "Strings Incomplete (due to Negative Membership) by default, "
                "try --strings-exp option.");
          }
        }
        if (flag)
        {
          // check if the term is atomic
          Trace("strings-regexp")
              << "Unroll/simplify membership of atomic term " << rep
              << std::endl;
          // if so, do simple unrolling
          Trace("strings-regexp") << "Simplify on " << atom << std::endl;
          Node conc = d_regexp_opr.simplify(atom, polarity);
          Trace("strings-regexp") << "...finished, got " << conc << std::endl;
          // if simplifying successfully generated a lemma
          if (!conc.isNull())
          {
            std::vector<Node> iexp;
            std::vector<Node> noExplain;
            iexp.push_back(assertion);
            noExplain.push_back(assertion);
            Assert(atom.getKind() == STRING_IN_REGEXP);
            if (polarity)
            {
              d_statistics.d_regexpUnfoldingsPos << atom[1].getKind();
            }
            else
            {
              d_statistics.d_regexpUnfoldingsNeg << atom[1].getKind();
            }
            InferenceId inf =
                polarity ? InferenceId::STRINGS_RE_UNFOLD_POS : InferenceId::STRINGS_RE_UNFOLD_NEG;
            // in very rare cases, we may find out that the unfolding lemma
            // for a membership is equivalent to true, in spite of the RE
            // not being rewritten to true.
            if (d_im.sendInference(iexp, noExplain, conc, inf))
            {
              addedLemma = true;
              if (e == 0)
              {
                // Remember that we have unfolded a membership for x
                // notice that we only do this here, after we have definitely
                // added a lemma.
                repUnfold.insert(rep);
              }
            }
            processed.push_back(assertion);
          }
          else
          {
            // otherwise we are incomplete
            d_im.setIncomplete(IncompleteId::STRINGS_REGEXP_NO_SIMPLIFY);
          }
        }
        if (d_state.isInConflict())
        {
          break;
        }
      }
    }
  }
  if (addedLemma)
  {
    if (!d_state.isInConflict())
    {
      for (unsigned i = 0; i < processed.size(); i++)
      {
        Trace("strings-regexp")
            << "...add " << processed[i] << " to u-cache." << std::endl;
        d_regexp_ucached.insert(processed[i]);
      }
    }
  }
}

bool RegExpSolver::checkEqcInclusion(std::vector<Node>& mems)
{
  std::unordered_set<Node> remove;

  for (const Node& m1 : mems)
  {
    bool m1Neg = m1.getKind() == NOT;
    Node m1Lit = m1Neg ? m1[0] : m1;

    if (remove.find(m1) != remove.end())
    {
      // Skip memberships marked for removal
      continue;
    }

    for (const Node& m2 : mems)
    {
      if (m1 == m2)
      {
        continue;
      }

      bool m2Neg = m2.getKind() == NOT;
      Node m2Lit = m2Neg ? m2[0] : m2;

      // Both regular expression memberships have the same polarity
      if (m1Neg == m2Neg)
      {
        if (d_regexp_opr.regExpIncludes(m1Lit[1], m2Lit[1]))
        {
          if (m1Neg)
          {
            // ~str.in.re(x, R1) includes ~str.in.re(x, R2) --->
            //   mark ~str.in.re(x, R2) as reduced
            d_im.markReduced(m2Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE_NEG);
            remove.insert(m2);
          }
          else
          {
            // str.in.re(x, R1) includes str.in.re(x, R2) --->
            //   mark str.in.re(x, R1) as reduced
            d_im.markReduced(m1Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE);
            remove.insert(m1);

            // We don't need to process m1 anymore
            break;
          }
        }
      }
      else
      {
        Node pos = m1Neg ? m2Lit : m1Lit;
        Node neg = m1Neg ? m1Lit : m2Lit;
        if (d_regexp_opr.regExpIncludes(neg[1], pos[1]))
        {
          // We have a conflict because we have a case where str.in.re(x, R1)
          // and ~str.in.re(x, R2) but R2 includes R1, so there is no
          // possible value for x that satisfies both memberships.
          std::vector<Node> vec_nodes;
          vec_nodes.push_back(pos);
          vec_nodes.push_back(neg.negate());

          if (pos[0] != neg[0])
          {
            vec_nodes.push_back(pos[0].eqNode(neg[0]));
          }

          Node conc;
          d_im.sendInference(
              vec_nodes, conc, InferenceId::STRINGS_RE_INTER_INCLUDE, false, true);
          return false;
        }
      }
    }
  }

  mems.erase(std::remove_if(
                 mems.begin(),
                 mems.end(),
                 [&remove](Node& n) { return remove.find(n) != remove.end(); }),
             mems.end());

  return true;
}

bool RegExpSolver::checkEqcIntersect(const std::vector<Node>& mems)
{
  // do not compute intersections if the re intersection mode is none
  if (options::stringRegExpInterMode() == options::RegExpInterMode::NONE)
  {
    return true;
  }
  if (mems.empty())
  {
    // nothing to do
    return true;
  }
  // the initial regular expression membership and its constant type
  Node mi;
  RegExpConstType rcti = RE_C_UNKNOWN;
  NodeManager* nm = NodeManager::currentNM();
  for (const Node& m : mems)
  {
    if (m.getKind() != STRING_IN_REGEXP)
    {
      // do not do negative
      Assert(m.getKind() == NOT && m[0].getKind() == STRING_IN_REGEXP);
      continue;
    }
    RegExpConstType rct = d_regexp_opr.getRegExpConstType(m[1]);
    if (rct == RE_C_VARIABLE
        || (options::stringRegExpInterMode()
                == options::RegExpInterMode::CONSTANT
            && rct != RE_C_CONRETE_CONSTANT))
    {
      // cannot do intersection on RE with variables, or with re.allchar based
      // on option.
      continue;
    }
    if (options::stringRegExpInterMode()
        == options::RegExpInterMode::ONE_CONSTANT)
    {
      if (!mi.isNull() && rcti >= RE_C_CONSTANT && rct >= RE_C_CONSTANT)
      {
        // if both have re.allchar, do not do intersection if the
        // options::RegExpInterMode::ONE_CONSTANT option is set.
        continue;
      }
    }
    if (mi.isNull())
    {
      // first regular expression seen
      mi = m;
      rcti = rct;
      continue;
    }
    Node resR = d_regexp_opr.intersect(mi[1], m[1]);
    // intersection should be computable
    Assert(!resR.isNull());
    if (resR == d_emptyRegexp)
    {
      // conflict, explain
      std::vector<Node> vec_nodes;
      vec_nodes.push_back(mi);
      vec_nodes.push_back(m);
      if (mi[0] != m[0])
      {
        vec_nodes.push_back(mi[0].eqNode(m[0]));
      }
      Node conc;
      d_im.sendInference(
          vec_nodes, conc, InferenceId::STRINGS_RE_INTER_CONF, false, true);
      // conflict, return
      return false;
    }
    // rewrite to ensure the equality checks below are precise
    Node mres = nm->mkNode(STRING_IN_REGEXP, mi[0], resR);
    Node mresr = Rewriter::rewrite(mres);
    if (mresr == mi)
    {
      // if R1 = intersect( R1, R2 ), then x in R1 ^ x in R2 is equivalent
      // to x in R1, hence x in R2 can be marked redundant.
      d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME);
    }
    else if (mresr == m)
    {
      // same as above, opposite direction
      d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME);
    }
    else
    {
      // new conclusion
      // (x in R1 ^ y in R2 ^ x = y) => (x in intersect(R1,R2))
      std::vector<Node> vec_nodes;
      vec_nodes.push_back(mi);
      vec_nodes.push_back(m);
      if (mi[0] != m[0])
      {
        vec_nodes.push_back(mi[0].eqNode(m[0]));
      }
      d_im.sendInference(
          vec_nodes, mres, InferenceId::STRINGS_RE_INTER_INFER, false, true);
      // both are reduced
      d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER);
      d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER);
      // do not send more than one lemma for this class
      return true;
    }
  }
  return true;
}

bool RegExpSolver::checkPDerivative(
    Node x, Node r, Node atom, bool& addedLemma, std::vector<Node>& nf_exp)
{
  if (d_state.areEqual(x, d_emptyString))
  {
    Node exp;
    switch (d_regexp_opr.delta(r, exp))
    {
      case 0:
      {
        std::vector<Node> noExplain;
        noExplain.push_back(atom);
        noExplain.push_back(x.eqNode(d_emptyString));
        std::vector<Node> iexp = nf_exp;
        iexp.insert(iexp.end(), noExplain.begin(), noExplain.end());
        d_im.sendInference(iexp, noExplain, exp, InferenceId::STRINGS_RE_DELTA);
        addedLemma = true;
        d_regexp_ccached.insert(atom);
        return false;
      }
      case 1:
      {
        d_regexp_ccached.insert(atom);
        break;
      }
      case 2:
      {
        std::vector<Node> noExplain;
        noExplain.push_back(atom);
        noExplain.push_back(x.eqNode(d_emptyString));
        std::vector<Node> iexp = nf_exp;
        iexp.insert(iexp.end(), noExplain.begin(), noExplain.end());
        d_im.sendInference(iexp, noExplain, d_false, InferenceId::STRINGS_RE_DELTA_CONF);
        addedLemma = true;
        d_regexp_ccached.insert(atom);
        return false;
      }
      default:
        // Impossible
        break;
    }
  }
  else
  {
    if (deriveRegExp(x, r, atom, nf_exp))
    {
      addedLemma = true;
      d_regexp_ccached.insert(atom);
      return false;
    }
  }
  return true;
}

cvc5::String RegExpSolver::getHeadConst(Node x)
{
  if (x.isConst())
  {
    return x.getConst<String>();
  }
  else if (x.getKind() == STRING_CONCAT)
  {
    if (x[0].isConst())
    {
      return x[0].getConst<String>();
    }
  }
  return d_emptyString.getConst<String>();
}

bool RegExpSolver::deriveRegExp(Node x,
                                Node r,
                                Node atom,
                                std::vector<Node>& ant)
{
  Assert(x != d_emptyString);
  Trace("regexp-derive") << "RegExpSolver::deriveRegExp: x=" << x
                         << ", r= " << r << std::endl;
  cvc5::String s = getHeadConst(x);
  // only allow RE_DERIVE for concrete constant regular expressions
  if (!s.empty() && d_regexp_opr.getRegExpConstType(r) == RE_C_CONRETE_CONSTANT)
  {
    Node conc = Node::null();
    Node dc = r;
    bool flag = true;
    for (unsigned i = 0; i < s.size(); ++i)
    {
      cvc5::String c = s.substr(i, 1);
      Node dc2;
      int rt = d_regexp_opr.derivativeS(dc, c, dc2);
      dc = dc2;
      if (rt == 2)
      {
        // CONFLICT
        flag = false;
        break;
      }
    }
    // send lemma
    if (flag)
    {
      if (x.isConst())
      {
        Assert(false)
            << "Impossible: RegExpSolver::deriveRegExp: const string in const "
               "regular expression.";
        return false;
      }
      else
      {
        Assert(x.getKind() == STRING_CONCAT);
        std::vector<Node> vec_nodes;
        for (unsigned int i = 1; i < x.getNumChildren(); ++i)
        {
          vec_nodes.push_back(x[i]);
        }
        Node left = utils::mkConcat(vec_nodes, x.getType());
        left = Rewriter::rewrite(left);
        conc = NodeManager::currentNM()->mkNode(STRING_IN_REGEXP, left, dc);
      }
    }
    std::vector<Node> iexp = ant;
    std::vector<Node> noExplain;
    noExplain.push_back(atom);
    iexp.push_back(atom);
    d_im.sendInference(iexp, noExplain, conc, InferenceId::STRINGS_RE_DERIVE);
    return true;
  }
  return false;
}

Node RegExpSolver::getNormalSymRegExp(Node r, std::vector<Node>& nf_exp)
{
  Node ret = r;
  switch (r.getKind())
  {
    case REGEXP_EMPTY:
    case REGEXP_SIGMA:
    case REGEXP_RANGE: break;
    case STRING_TO_REGEXP:
    {
      if (!r[0].isConst())
      {
        Node tmp = d_csolver.getNormalString(r[0], nf_exp);
        if (tmp != r[0])
        {
          ret = NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, tmp);
        }
      }
      break;
    }
    case REGEXP_CONCAT:
    case REGEXP_UNION:
    case REGEXP_INTER:
    case REGEXP_STAR:
    case REGEXP_COMPLEMENT:
    {
      std::vector<Node> vec_nodes;
      for (const Node& cr : r)
      {
        vec_nodes.push_back(getNormalSymRegExp(cr, nf_exp));
      }
      ret = Rewriter::rewrite(
          NodeManager::currentNM()->mkNode(r.getKind(), vec_nodes));
      break;
    }
    default:
    {
      Trace("strings-error") << "Unsupported term: " << r
                             << " in normalization SymRegExp." << std::endl;
      Assert(!utils::isRegExpKind(r.getKind()));
    }
  }
  return ret;
}

}  // namespace strings
}  // namespace theory
}  // namespace cvc5


Generated by: GCOVR (Version 3.2)

Line	Exec	Source
1		/******************************************************************************
2		* Top contributors (to current version):
3		* Andrew Reynolds, Andres Noetzli, Tianyi Liang
4		*
5		* This file is part of the cvc5 project.
6		*
7		* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8		* in the top-level source directory and their institutional affiliations.
9		* All rights reserved. See the file COPYING in the top-level source
10		* directory for licensing information.
11		* ****************************************************************************
12		*
13		* Implementation of the regular expression solver for the theory of strings.
14		*/
15
16		#include "theory/strings/regexp_solver.h"
17
18		#include <cmath>
19
20		#include "options/strings_options.h"
21		#include "smt/logic_exception.h"
22		#include "theory/ext_theory.h"
23		#include "theory/strings/theory_strings_utils.h"
24		#include "theory/theory_model.h"
25		#include "util/statistics_value.h"
26
27		using namespace std;
28		using namespace cvc5::context;
29		using namespace cvc5::kind;
30
31		namespace cvc5 {
32		namespace theory {
33		namespace strings {
34
35	9942	RegExpSolver::RegExpSolver(Env& env,
36		SolverState& s,
37		InferenceManager& im,
38		SkolemCache* skc,
39		CoreSolver& cs,
40		ExtfSolver& es,
41	9942	SequencesStatistics& stats)
42		: EnvObj(env),
43		d_state(s),
44		d_im(im),
45		d_csolver(cs),
46		d_esolver(es),
47		d_statistics(stats),
48	9942	d_regexp_ucached(userContext()),
49		d_regexp_ccached(context()),
50		d_processed_memberships(context()),
51	19884	d_regexp_opr(skc)
52		{
53	9942	d_emptyString = NodeManager::currentNM()->mkConst(::cvc5::String(""));
54	9942	d_emptyRegexp = NodeManager::currentNM()->mkNode(REGEXP_EMPTY);
55	9942	d_true = NodeManager::currentNM()->mkConst(true);
56	9942	d_false = NodeManager::currentNM()->mkConst(false);
57	9942	}
58
59		Node RegExpSolver::mkAnd(Node c1, Node c2)
60		{
61		return NodeManager::currentNM()->mkNode(AND, c1, c2);
62		}
63
64	8149	void RegExpSolver::checkMemberships()
65		{
66		// add the memberships
67	16298	std::vector<Node> mems = d_esolver.getActive(STRING_IN_REGEXP);
68		// maps representatives to regular expression memberships in that class
69	16298	std::map<Node, std::vector<Node> > assertedMems;
70	8149	const std::map<Node, ExtfInfoTmp>& einfo = d_esolver.getInfo();
71	8149	std::map<Node, ExtfInfoTmp>::const_iterator it;
72	9623	for (unsigned i = 0; i < mems.size(); i++)
73		{
74	2948	Node n = mems[i];
75	1474	Assert(n.getKind() == STRING_IN_REGEXP);
76	1474	it = einfo.find(n);
77	1474	Assert(it != einfo.end());
78	1474	if (!it->second.d_const.isNull())
79		{
80	1442	bool pol = it->second.d_const.getConst<bool>();
81	2884	Trace("strings-process-debug")
82	1442	<< " add membership : " << n << ", pol = " << pol << std::endl;
83	2884	Node r = d_state.getRepresentative(n[0]);
84	1442	assertedMems[r].push_back(pol ? n : n.negate());
85		}
86		else
87		{
88	64	Trace("strings-process-debug")
89	32	<< " irrelevant (non-asserted) membership : " << n << std::endl;
90		}
91		}
92	8149	check(assertedMems);
93	8149	}
94
95	8149	void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
96		{
97	8149	bool addedLemma = false;
98	8149	bool changed = false;
99	16276	std::vector<Node> processed;
100
101	8149	Trace("regexp-process") << "Checking Memberships ... " << std::endl;
102	9129	for (const std::pair<const Node, std::vector<Node> >& mr : mems)
103		{
104	1982	std::vector<Node> mems2 = mr.second;
105	2004	Trace("regexp-process")
106	1002	<< "Memberships(" << mr.first << ") = " << mr.second << std::endl;
107	1002	if (!checkEqcInclusion(mems2))
108		{
109		// conflict discovered, return
110	10	return;
111		}
112	992	if (!checkEqcIntersect(mems2))
113		{
114		// conflict discovered, return
115	12	return;
116		}
117		}
118
119	16254	Trace("regexp-debug")
120	8127	<< "... No Intersect Conflict in Memberships, addedLemma: " << addedLemma
121	8127	<< std::endl;
122	8127	if (!addedLemma)
123		{
124		// get all memberships
125	16254	std::map<Node, Node> allMems;
126	9107	for (const std::pair<const Node, std::vector<Node> >& mr : mems)
127		{
128	2363	for (const Node& m : mr.second)
129		{
130	1383	allMems[m] = mr.first;
131		}
132		}
133
134	8127	NodeManager* nm = NodeManager::currentNM();
135		// representatives of strings that are the LHS of positive memberships that
136		// we unfolded
137	16254	std::unordered_set<Node> repUnfold;
138		// check positive (e=0), then negative (e=1) memberships
139	24381	for (unsigned e = 0; e < 2; e++)
140		{
141	18966	for (const std::pair<const Node, Node>& mp : allMems)
142		{
143	3299	Node assertion = mp.first;
144	3299	Node rep = mp.second;
145		// check regular expression membership
146	5528	Trace("regexp-debug")
147	2764	<< "Check : " << assertion << " "
148	5528	<< (d_regexp_ucached.find(assertion) == d_regexp_ucached.end())
149	2764	<< " "
150	5528	<< (d_regexp_ccached.find(assertion) == d_regexp_ccached.end())
151	2764	<< std::endl;
152	5528	if (d_regexp_ucached.find(assertion) != d_regexp_ucached.end()
153	2764	\|\| d_regexp_ccached.find(assertion) != d_regexp_ccached.end())
154		{
155	1458	continue;
156		}
157	2612	Trace("strings-regexp")
158	1306	<< "We have regular expression assertion : " << assertion
159	1306	<< std::endl;
160	1841	Node atom = assertion.getKind() == NOT ? assertion[0] : assertion;
161	1306	Assert(atom == Rewriter::rewrite(atom));
162	1306	bool polarity = assertion.getKind() != NOT;
163	1306	if (polarity != (e == 0))
164		{
165	645	continue;
166		}
167	661	bool flag = true;
168	1196	Node x = atom[0];
169	1196	Node r = atom[1];
170	661	Assert(rep == d_state.getRepresentative(x));
171		// The following code takes normal forms into account for the purposes
172		// of simplifying a regular expression membership x in R. For example,
173		// if x = "A" in the current context, then we may be interested in
174		// reasoning about ( x in R ) * { x -> "A" }. Say we update the
175		// membership to nx in R', then:
176		// - nfexp => ( x in R ) <=> nx in R'
177		// - rnfexp => R = R'
178		// We use these explanations below as assumptions on inferences when
179		// appropriate. Notice that for inferring conflicts and tautologies,
180		// we use the normal form of x always. This is because we always want to
181		// discover conflicts/tautologies whenever possible.
182		// For inferences based on regular expression unfolding, we do not use
183		// the normal form of x. The reason is that it is better to unfold
184		// regular expression memberships in a context-indepedent manner,
185		// that is, not taking into account the current normal form of x, since
186		// this ensures these lemmas are still relevant after backtracking.
187	1196	std::vector<Node> nfexp;
188	1196	std::vector<Node> rnfexp;
189		// The normal form of x is stored in nx, while x is left unchanged.
190	1196	Node nx = x;
191	661	if (!x.isConst())
192		{
193	642	nx = d_csolver.getNormalString(x, nfexp);
194		}
195		// If r is not a constant regular expression, we update it based on
196		// normal forms, which may concretize its variables.
197	661	if (!d_regexp_opr.checkConstRegExp(r))
198		{
199	111	r = getNormalSymRegExp(r, rnfexp);
200	111	nfexp.insert(nfexp.end(), rnfexp.begin(), rnfexp.end());
201	111	changed = true;
202		}
203	1322	Trace("strings-regexp-nf") << "Term " << atom << " is normalized to "
204	661	<< nx << " IN " << r << std::endl;
205	661	if (nx != x \|\| changed)
206		{
207		// We rewrite the membership nx IN r.
208	668	Node tmp = Rewriter::rewrite(nm->mkNode(STRING_IN_REGEXP, nx, r));
209	366	Trace("strings-regexp-nf") << "Simplifies to " << tmp << std::endl;
210	366	if (tmp.isConst())
211		{
212	76	if (tmp.getConst<bool>() == polarity)
213		{
214		// it is satisfied in this SAT context
215	12	d_regexp_ccached.insert(assertion);
216	12	continue;
217		}
218		else
219		{
220		// we have a conflict
221	104	std::vector<Node> iexp = nfexp;
222	104	std::vector<Node> noExplain;
223	52	iexp.push_back(assertion);
224	52	noExplain.push_back(assertion);
225	104	Node conc = Node::null();
226	52	d_im.sendInference(
227		iexp, noExplain, conc, InferenceId::STRINGS_RE_NF_CONFLICT);
228	52	addedLemma = true;
229	52	break;
230		}
231		}
232		}
233	597	if (e == 1 && repUnfold.find(rep) != repUnfold.end())
234		{
235		// do not unfold negative memberships of strings that have new
236		// positive unfoldings. For example:
237		// x in ("A")* ^ NOT x in ("B")*
238		// We unfold x = "A" ++ x' only. The intution here is that positive
239		// unfoldings lead to stronger constraints (equalities are stronger
240		// than disequalities), and are easier to check.
241	62	continue;
242		}
243	535	if (polarity)
244		{
245	400	flag = checkPDerivative(x, r, atom, addedLemma, rnfexp);
246		}
247		else
248		{
249	135	if (!options::stringExp())
250		{
251		throw LogicException(
252		"Strings Incomplete (due to Negative Membership) by default, "
253		"try --strings-exp option.");
254		}
255		}
256	535	if (flag)
257		{
258		// check if the term is atomic
259	1054	Trace("strings-regexp")
260	527	<< "Unroll/simplify membership of atomic term " << rep
261	527	<< std::endl;
262		// if so, do simple unrolling
263	527	Trace("strings-regexp") << "Simplify on " << atom << std::endl;
264	1054	Node conc = d_regexp_opr.simplify(atom, polarity);
265	527	Trace("strings-regexp") << "...finished, got " << conc << std::endl;
266		// if simplifying successfully generated a lemma
267	527	if (!conc.isNull())
268		{
269	1048	std::vector<Node> iexp;
270	1048	std::vector<Node> noExplain;
271	524	iexp.push_back(assertion);
272	524	noExplain.push_back(assertion);
273	524	Assert(atom.getKind() == STRING_IN_REGEXP);
274	524	if (polarity)
275		{
276	390	d_statistics.d_regexpUnfoldingsPos << atom[1].getKind();
277		}
278		else
279		{
280	134	d_statistics.d_regexpUnfoldingsNeg << atom[1].getKind();
281		}
282	524	InferenceId inf =
283	524	polarity ? InferenceId::STRINGS_RE_UNFOLD_POS : InferenceId::STRINGS_RE_UNFOLD_NEG;
284		// in very rare cases, we may find out that the unfolding lemma
285		// for a membership is equivalent to true, in spite of the RE
286		// not being rewritten to true.
287	524	if (d_im.sendInference(iexp, noExplain, conc, inf))
288		{
289	518	addedLemma = true;
290	518	if (e == 0)
291		{
292		// Remember that we have unfolded a membership for x
293		// notice that we only do this here, after we have definitely
294		// added a lemma.
295	388	repUnfold.insert(rep);
296		}
297		}
298	524	processed.push_back(assertion);
299		}
300		else
301		{
302		// otherwise we are incomplete
303	3	d_im.setIncomplete(IncompleteId::STRINGS_REGEXP_NO_SIMPLIFY);
304		}
305		}
306	535	if (d_state.isInConflict())
307		{
308		break;
309		}
310		}
311		}
312		}
313	8127	if (addedLemma)
314		{
315	392	if (!d_state.isInConflict())
316		{
317	912	for (unsigned i = 0; i < processed.size(); i++)
318		{
319	1040	Trace("strings-regexp")
320	520	<< "...add " << processed[i] << " to u-cache." << std::endl;
321	520	d_regexp_ucached.insert(processed[i]);
322		}
323		}
324		}
325		}
326
327	1002	bool RegExpSolver::checkEqcInclusion(std::vector<Node>& mems)
328		{
329	2004	std::unordered_set<Node> remove;
330
331	2416	for (const Node& m1 : mems)
332		{
333	1424	bool m1Neg = m1.getKind() == NOT;
334	2800	Node m1Lit = m1Neg ? m1[0] : m1;
335
336	1424	if (remove.find(m1) != remove.end())
337		{
338		// Skip memberships marked for removal
339	38	continue;
340		}
341
342	3866	for (const Node& m2 : mems)
343		{
344	2544	if (m1 == m2)
345		{
346	1358	continue;
347		}
348
349	1186	bool m2Neg = m2.getKind() == NOT;
350	2308	Node m2Lit = m2Neg ? m2[0] : m2;
351
352		// Both regular expression memberships have the same polarity
353	1186	if (m1Neg == m2Neg)
354		{
355	756	if (d_regexp_opr.regExpIncludes(m1Lit[1], m2Lit[1]))
356		{
357	94	if (m1Neg)
358		{
359		// ~str.in.re(x, R1) includes ~str.in.re(x, R2) --->
360		// mark ~str.in.re(x, R2) as reduced
361	40	d_im.markReduced(m2Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE_NEG);
362	40	remove.insert(m2);
363		}
364		else
365		{
366		// str.in.re(x, R1) includes str.in.re(x, R2) --->
367		// mark str.in.re(x, R1) as reduced
368	54	d_im.markReduced(m1Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE);
369	54	remove.insert(m1);
370
371		// We don't need to process m1 anymore
372	54	break;
373		}
374		}
375		}
376		else
377		{
378	850	Node pos = m1Neg ? m2Lit : m1Lit;
379	850	Node neg = m1Neg ? m1Lit : m2Lit;
380	430	if (d_regexp_opr.regExpIncludes(neg[1], pos[1]))
381		{
382		// We have a conflict because we have a case where str.in.re(x, R1)
383		// and ~str.in.re(x, R2) but R2 includes R1, so there is no
384		// possible value for x that satisfies both memberships.
385	20	std::vector<Node> vec_nodes;
386	10	vec_nodes.push_back(pos);
387	10	vec_nodes.push_back(neg.negate());
388
389	10	if (pos[0] != neg[0])
390		{
391		vec_nodes.push_back(pos[0].eqNode(neg[0]));
392		}
393
394	20	Node conc;
395	10	d_im.sendInference(
396		vec_nodes, conc, InferenceId::STRINGS_RE_INTER_INCLUDE, false, true);
397	10	return false;
398		}
399		}
400		}
401		}
402
403	2976	mems.erase(std::remove_if(
404		mems.begin(),
405		mems.end(),
406	2402	[&remove](Node& n) { return remove.find(n) != remove.end(); }),
407	3968	mems.end());
408
409	992	return true;
410		}
411
412	992	bool RegExpSolver::checkEqcIntersect(const std::vector<Node>& mems)
413		{
414		// do not compute intersections if the re intersection mode is none
415	992	if (options::stringRegExpInterMode() == options::RegExpInterMode::NONE)
416		{
417		return true;
418		}
419	992	if (mems.empty())
420		{
421		// nothing to do
422	22	return true;
423		}
424		// the initial regular expression membership and its constant type
425	1940	Node mi;
426	970	RegExpConstType rcti = RE_C_UNKNOWN;
427	970	NodeManager* nm = NodeManager::currentNM();
428	2178	for (const Node& m : mems)
429		{
430	1765	if (m.getKind() != STRING_IN_REGEXP)
431		{
432		// do not do negative
433	497	Assert(m.getKind() == NOT && m[0].getKind() == STRING_IN_REGEXP);
434	1698	continue;
435		}
436	771	RegExpConstType rct = d_regexp_opr.getRegExpConstType(m[1]);
437	771	if (rct == RE_C_VARIABLE
438	1311	\|\| (options::stringRegExpInterMode()
439		== options::RegExpInterMode::CONSTANT
440	623	&& rct != RE_C_CONRETE_CONSTANT))
441		{
442		// cannot do intersection on RE with variables, or with re.allchar based
443		// on option.
444	540	continue;
445		}
446	462	if (options::stringRegExpInterMode()
447	231	== options::RegExpInterMode::ONE_CONSTANT)
448		{
449		if (!mi.isNull() && rcti >= RE_C_CONSTANT && rct >= RE_C_CONSTANT)
450		{
451		// if both have re.allchar, do not do intersection if the
452		// options::RegExpInterMode::ONE_CONSTANT option is set.
453		continue;
454		}
455		}
456	395	if (mi.isNull())
457		{
458		// first regular expression seen
459	164	mi = m;
460	164	rcti = rct;
461	164	continue;
462		}
463	74	Node resR = d_regexp_opr.intersect(mi[1], m[1]);
464		// intersection should be computable
465	67	Assert(!resR.isNull());
466	67	if (resR == d_emptyRegexp)
467		{
468		// conflict, explain
469	24	std::vector<Node> vec_nodes;
470	12	vec_nodes.push_back(mi);
471	12	vec_nodes.push_back(m);
472	12	if (mi[0] != m[0])
473		{
474	8	vec_nodes.push_back(mi[0].eqNode(m[0]));
475		}
476	24	Node conc;
477	12	d_im.sendInference(
478		vec_nodes, conc, InferenceId::STRINGS_RE_INTER_CONF, false, true);
479		// conflict, return
480	12	return false;
481		}
482		// rewrite to ensure the equality checks below are precise
483	62	Node mres = nm->mkNode(STRING_IN_REGEXP, mi[0], resR);
484	62	Node mresr = Rewriter::rewrite(mres);
485	55	if (mresr == mi)
486		{
487		// if R1 = intersect( R1, R2 ), then x in R1 ^ x in R2 is equivalent
488		// to x in R1, hence x in R2 can be marked redundant.
489		d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME);
490		}
491	55	else if (mresr == m)
492		{
493		// same as above, opposite direction
494	7	d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME);
495		}
496		else
497		{
498		// new conclusion
499		// (x in R1 ^ y in R2 ^ x = y) => (x in intersect(R1,R2))
500	96	std::vector<Node> vec_nodes;
501	48	vec_nodes.push_back(mi);
502	48	vec_nodes.push_back(m);
503	48	if (mi[0] != m[0])
504		{
505	8	vec_nodes.push_back(mi[0].eqNode(m[0]));
506		}
507	48	d_im.sendInference(
508		vec_nodes, mres, InferenceId::STRINGS_RE_INTER_INFER, false, true);
509		// both are reduced
510	48	d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER);
511	48	d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER);
512		// do not send more than one lemma for this class
513	48	return true;
514		}
515		}
516	910	return true;
517		}
518
519	400	bool RegExpSolver::checkPDerivative(
520		Node x, Node r, Node atom, bool& addedLemma, std::vector<Node>& nf_exp)
521		{
522	400	if (d_state.areEqual(x, d_emptyString))
523		{
524	4	Node exp;
525	4	switch (d_regexp_opr.delta(r, exp))
526		{
527		case 0:
528		{
529		std::vector<Node> noExplain;
530		noExplain.push_back(atom);
531		noExplain.push_back(x.eqNode(d_emptyString));
532		std::vector<Node> iexp = nf_exp;
533		iexp.insert(iexp.end(), noExplain.begin(), noExplain.end());
534		d_im.sendInference(iexp, noExplain, exp, InferenceId::STRINGS_RE_DELTA);
535		addedLemma = true;
536		d_regexp_ccached.insert(atom);
537		return false;
538		}
539		case 1:
540		{
541		d_regexp_ccached.insert(atom);
542		break;
543		}
544	4	case 2:
545		{
546	8	std::vector<Node> noExplain;
547	4	noExplain.push_back(atom);
548	4	noExplain.push_back(x.eqNode(d_emptyString));
549	8	std::vector<Node> iexp = nf_exp;
550	4	iexp.insert(iexp.end(), noExplain.begin(), noExplain.end());
551	4	d_im.sendInference(iexp, noExplain, d_false, InferenceId::STRINGS_RE_DELTA_CONF);
552	4	addedLemma = true;
553	4	d_regexp_ccached.insert(atom);
554	4	return false;
555		}
556		default:
557		// Impossible
558		break;
559		}
560		}
561		else
562		{
563	396	if (deriveRegExp(x, r, atom, nf_exp))
564		{
565	4	addedLemma = true;
566	4	d_regexp_ccached.insert(atom);
567	4	return false;
568		}
569		}
570	392	return true;
571		}
572
573	396	cvc5::String RegExpSolver::getHeadConst(Node x)
574		{
575	396	if (x.isConst())
576		{
577	2	return x.getConst<String>();
578		}
579	394	else if (x.getKind() == STRING_CONCAT)
580		{
581	50	if (x[0].isConst())
582		{
583	6	return x[0].getConst<String>();
584		}
585		}
586	388	return d_emptyString.getConst<String>();
587		}
588
589	396	bool RegExpSolver::deriveRegExp(Node x,
590		Node r,
591		Node atom,
592		std::vector<Node>& ant)
593		{
594	396	Assert(x != d_emptyString);
595	792	Trace("regexp-derive") << "RegExpSolver::deriveRegExp: x=" << x
596	396	<< ", r= " << r << std::endl;
597	792	cvc5::String s = getHeadConst(x);
598		// only allow RE_DERIVE for concrete constant regular expressions
599	396	if (!s.empty() && d_regexp_opr.getRegExpConstType(r) == RE_C_CONRETE_CONSTANT)
600		{
601	8	Node conc = Node::null();
602	8	Node dc = r;
603	4	bool flag = true;
604	4	for (unsigned i = 0; i < s.size(); ++i)
605		{
606	4	cvc5::String c = s.substr(i, 1);
607	4	Node dc2;
608	4	int rt = d_regexp_opr.derivativeS(dc, c, dc2);
609	4	dc = dc2;
610	4	if (rt == 2)
611		{
612		// CONFLICT
613	4	flag = false;
614	4	break;
615		}
616		}
617		// send lemma
618	4	if (flag)
619		{
620		if (x.isConst())
621		{
622		Assert(false)
623		<< "Impossible: RegExpSolver::deriveRegExp: const string in const "
624		"regular expression.";
625		return false;
626		}
627		else
628		{
629		Assert(x.getKind() == STRING_CONCAT);
630		std::vector<Node> vec_nodes;
631		for (unsigned int i = 1; i < x.getNumChildren(); ++i)
632		{
633		vec_nodes.push_back(x[i]);
634		}
635		Node left = utils::mkConcat(vec_nodes, x.getType());
636		left = Rewriter::rewrite(left);
637		conc = NodeManager::currentNM()->mkNode(STRING_IN_REGEXP, left, dc);
638		}
639		}
640	8	std::vector<Node> iexp = ant;
641	8	std::vector<Node> noExplain;
642	4	noExplain.push_back(atom);
643	4	iexp.push_back(atom);
644	4	d_im.sendInference(iexp, noExplain, conc, InferenceId::STRINGS_RE_DERIVE);
645	4	return true;
646		}
647	392	return false;
648		}
649
650	577	Node RegExpSolver::getNormalSymRegExp(Node r, std::vector<Node>& nf_exp)
651		{
652	577	Node ret = r;
653	577	switch (r.getKind())
654		{
655	72	case REGEXP_EMPTY:
656		case REGEXP_SIGMA:
657	72	case REGEXP_RANGE: break;
658	220	case STRING_TO_REGEXP:
659		{
660	220	if (!r[0].isConst())
661		{
662	228	Node tmp = d_csolver.getNormalString(r[0], nf_exp);
663	114	if (tmp != r[0])
664		{
665	100	ret = NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, tmp);
666		}
667		}
668	220	break;
669		}
670	282	case REGEXP_CONCAT:
671		case REGEXP_UNION:
672		case REGEXP_INTER:
673		case REGEXP_STAR:
674		case REGEXP_COMPLEMENT:
675		{
676	564	std::vector<Node> vec_nodes;
677	748	for (const Node& cr : r)
678		{
679	466	vec_nodes.push_back(getNormalSymRegExp(cr, nf_exp));
680		}
681	282	ret = Rewriter::rewrite(
682	564	NodeManager::currentNM()->mkNode(r.getKind(), vec_nodes));
683	282	break;
684		}
685	3	default:
686		{
687	6	Trace("strings-error") << "Unsupported term: " << r
688	3	<< " in normalization SymRegExp." << std::endl;
689	3	Assert(!utils::isRegExpKind(r.getKind()));
690		}
691		}
692	577	return ret;
693		}
694
695		} // namespace strings
696		} // namespace theory
697	29577	} // namespace cvc5