Head

GCC Code Coverage Report

Directory:	.		Exec	Total	Coverage
File:	src/theory/strings/regexp_solver.cpp	Lines:	297	334	88.9 %
Date:	2021-05-21	Branches:	596	1241	48.0 %


/******************************************************************************
 * Top contributors (to current version):
 *   Andrew Reynolds, Andres Noetzli, Tianyi Liang
 *
 * This file is part of the cvc5 project.
 *
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
 * in the top-level source directory and their institutional affiliations.
 * All rights reserved.  See the file COPYING in the top-level source
 * directory for licensing information.
 * ****************************************************************************
 *
 * Implementation of the regular expression solver for the theory of strings.
 */

#include "theory/strings/regexp_solver.h"

#include <cmath>

#include "options/strings_options.h"
#include "smt/logic_exception.h"
#include "theory/ext_theory.h"
#include "theory/strings/theory_strings_utils.h"
#include "theory/theory_model.h"
#include "util/statistics_value.h"

using namespace std;
using namespace cvc5::context;
using namespace cvc5::kind;

namespace cvc5 {
namespace theory {
namespace strings {

RegExpSolver::RegExpSolver(SolverState& s,
                           InferenceManager& im,
                           SkolemCache* skc,
                           CoreSolver& cs,
                           ExtfSolver& es,
                           SequencesStatistics& stats)
    : d_state(s),
      d_im(im),
      d_csolver(cs),
      d_esolver(es),
      d_statistics(stats),
      d_regexp_ucached(s.getUserContext()),
      d_regexp_ccached(s.getSatContext()),
      d_processed_memberships(s.getSatContext()),
      d_regexp_opr(skc)
{
  d_emptyString = NodeManager::currentNM()->mkConst(::cvc5::String(""));
  std::vector<Node> nvec;
  d_emptyRegexp = NodeManager::currentNM()->mkNode(REGEXP_EMPTY, nvec);
  d_true = NodeManager::currentNM()->mkConst(true);
  d_false = NodeManager::currentNM()->mkConst(false);
}

Node RegExpSolver::mkAnd(Node c1, Node c2)
{
  return NodeManager::currentNM()->mkNode(AND, c1, c2);
}

void RegExpSolver::checkMemberships()
{
  // add the memberships
  std::vector<Node> mems = d_esolver.getActive(STRING_IN_REGEXP);
  // maps representatives to regular expression memberships in that class
  std::map<Node, std::vector<Node> > assertedMems;
  const std::map<Node, ExtfInfoTmp>& einfo = d_esolver.getInfo();
  std::map<Node, ExtfInfoTmp>::const_iterator it;
  for (unsigned i = 0; i < mems.size(); i++)
  {
    Node n = mems[i];
    Assert(n.getKind() == STRING_IN_REGEXP);
    it = einfo.find(n);
    Assert(it != einfo.end());
    if (!it->second.d_const.isNull())
    {
      bool pol = it->second.d_const.getConst<bool>();
      Trace("strings-process-debug")
          << "  add membership : " << n << ", pol = " << pol << std::endl;
      Node r = d_state.getRepresentative(n[0]);
      assertedMems[r].push_back(pol ? n : n.negate());
    }
    else
    {
      Trace("strings-process-debug")
          << "  irrelevant (non-asserted) membership : " << n << std::endl;
    }
  }
  check(assertedMems);
}

void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
{
  bool addedLemma = false;
  bool changed = false;
  std::vector<Node> processed;

  Trace("regexp-process") << "Checking Memberships ... " << std::endl;
  for (const std::pair<const Node, std::vector<Node> >& mr : mems)
  {
    std::vector<Node> mems2 = mr.second;
    Trace("regexp-process")
        << "Memberships(" << mr.first << ") = " << mr.second << std::endl;
    if (!checkEqcInclusion(mems2))
    {
      // conflict discovered, return
      return;
    }
    if (!checkEqcIntersect(mems2))
    {
      // conflict discovered, return
      return;
    }
  }

  Trace("regexp-debug")
      << "... No Intersect Conflict in Memberships, addedLemma: " << addedLemma
      << std::endl;
  if (!addedLemma)
  {
    // get all memberships
    std::map<Node, Node> allMems;
    for (const std::pair<const Node, std::vector<Node> >& mr : mems)
    {
      for (const Node& m : mr.second)
      {
        bool polarity = m.getKind() != NOT;
        if (polarity || !options::stringIgnNegMembership())
        {
          allMems[m] = mr.first;
        }
      }
    }

    NodeManager* nm = NodeManager::currentNM();
    // representatives of strings that are the LHS of positive memberships that
    // we unfolded
    std::unordered_set<Node> repUnfold;
    // check positive (e=0), then negative (e=1) memberships
    for (unsigned e = 0; e < 2; e++)
    {
      for (const std::pair<const Node, Node>& mp : allMems)
      {
        Node assertion = mp.first;
        Node rep = mp.second;
        // check regular expression membership
        Trace("regexp-debug")
            << "Check : " << assertion << " "
            << (d_regexp_ucached.find(assertion) == d_regexp_ucached.end())
            << " "
            << (d_regexp_ccached.find(assertion) == d_regexp_ccached.end())
            << std::endl;
        if (d_regexp_ucached.find(assertion) != d_regexp_ucached.end()
            || d_regexp_ccached.find(assertion) != d_regexp_ccached.end())
        {
          continue;
        }
        Trace("strings-regexp")
            << "We have regular expression assertion : " << assertion
            << std::endl;
        Node atom = assertion.getKind() == NOT ? assertion[0] : assertion;
        Assert(atom == Rewriter::rewrite(atom));
        bool polarity = assertion.getKind() != NOT;
        if (polarity != (e == 0))
        {
          continue;
        }
        bool flag = true;
        Node x = atom[0];
        Node r = atom[1];
        Assert(rep == d_state.getRepresentative(x));
        // The following code takes normal forms into account for the purposes
        // of simplifying a regular expression membership x in R. For example,
        // if x = "A" in the current context, then we may be interested in
        // reasoning about ( x in R ) * { x -> "A" }. Say we update the
        // membership to nx in R', then:
        // - nfexp => ( x in R ) <=> nx in R'
        // - rnfexp => R = R'
        // We use these explanations below as assumptions on inferences when
        // appropriate. Notice that for inferring conflicts and tautologies,
        // we use the normal form of x always. This is because we always want to
        // discover conflicts/tautologies whenever possible.
        // For inferences based on regular expression unfolding, we do not use
        // the normal form of x. The reason is that it is better to unfold
        // regular expression memberships in a context-indepedent manner,
        // that is, not taking into account the current normal form of x, since
        // this ensures these lemmas are still relevant after backtracking.
        std::vector<Node> nfexp;
        std::vector<Node> rnfexp;
        // The normal form of x is stored in nx, while x is left unchanged.
        Node nx = x;
        if (!x.isConst())
        {
          nx = d_csolver.getNormalString(x, nfexp);
        }
        // If r is not a constant regular expression, we update it based on
        // normal forms, which may concretize its variables.
        if (!d_regexp_opr.checkConstRegExp(r))
        {
          r = getNormalSymRegExp(r, rnfexp);
          nfexp.insert(nfexp.end(), rnfexp.begin(), rnfexp.end());
          changed = true;
        }
        Trace("strings-regexp-nf") << "Term " << atom << " is normalized to "
                                   << nx << " IN " << r << std::endl;
        if (nx != x || changed)
        {
          // We rewrite the membership nx IN r.
          Node tmp = Rewriter::rewrite(nm->mkNode(STRING_IN_REGEXP, nx, r));
          Trace("strings-regexp-nf") << "Simplifies to " << tmp << std::endl;
          if (tmp.isConst())
          {
            if (tmp.getConst<bool>() == polarity)
            {
              // it is satisfied in this SAT context
              d_regexp_ccached.insert(assertion);
              continue;
            }
            else
            {
              // we have a conflict
              std::vector<Node> iexp = nfexp;
              std::vector<Node> noExplain;
              iexp.push_back(assertion);
              noExplain.push_back(assertion);
              Node conc = Node::null();
              d_im.sendInference(
                  iexp, noExplain, conc, InferenceId::STRINGS_RE_NF_CONFLICT);
              addedLemma = true;
              break;
            }
          }
        }
        if (e == 1 && repUnfold.find(rep) != repUnfold.end())
        {
          // do not unfold negative memberships of strings that have new
          // positive unfoldings. For example:
          //   x in ("A")* ^ NOT x in ("B")*
          // We unfold x = "A" ++ x' only. The intution here is that positive
          // unfoldings lead to stronger constraints (equalities are stronger
          // than disequalities), and are easier to check.
          continue;
        }
        if (polarity)
        {
          flag = checkPDerivative(x, r, atom, addedLemma, rnfexp);
        }
        else
        {
          if (!options::stringExp())
          {
            throw LogicException(
                "Strings Incomplete (due to Negative Membership) by default, "
                "try --strings-exp option.");
          }
        }
        if (flag)
        {
          // check if the term is atomic
          Trace("strings-regexp")
              << "Unroll/simplify membership of atomic term " << rep
              << std::endl;
          // if so, do simple unrolling
          Trace("strings-regexp") << "Simplify on " << atom << std::endl;
          Node conc = d_regexp_opr.simplify(atom, polarity);
          Trace("strings-regexp") << "...finished, got " << conc << std::endl;
          // if simplifying successfully generated a lemma
          if (!conc.isNull())
          {
            std::vector<Node> iexp;
            std::vector<Node> noExplain;
            iexp.push_back(assertion);
            noExplain.push_back(assertion);
            Assert(atom.getKind() == STRING_IN_REGEXP);
            if (polarity)
            {
              d_statistics.d_regexpUnfoldingsPos << atom[1].getKind();
            }
            else
            {
              d_statistics.d_regexpUnfoldingsNeg << atom[1].getKind();
            }
            InferenceId inf =
                polarity ? InferenceId::STRINGS_RE_UNFOLD_POS : InferenceId::STRINGS_RE_UNFOLD_NEG;
            // in very rare cases, we may find out that the unfolding lemma
            // for a membership is equivalent to true, in spite of the RE
            // not being rewritten to true.
            if (d_im.sendInference(iexp, noExplain, conc, inf))
            {
              addedLemma = true;
              if (e == 0)
              {
                // Remember that we have unfolded a membership for x
                // notice that we only do this here, after we have definitely
                // added a lemma.
                repUnfold.insert(rep);
              }
            }
            processed.push_back(assertion);
          }
          else
          {
            // otherwise we are incomplete
            d_im.setIncomplete(IncompleteId::STRINGS_REGEXP_NO_SIMPLIFY);
          }
        }
        if (d_state.isInConflict())
        {
          break;
        }
      }
    }
  }
  if (addedLemma)
  {
    if (!d_state.isInConflict())
    {
      for (unsigned i = 0; i < processed.size(); i++)
      {
        Trace("strings-regexp")
            << "...add " << processed[i] << " to u-cache." << std::endl;
        d_regexp_ucached.insert(processed[i]);
      }
    }
  }
}

bool RegExpSolver::checkEqcInclusion(std::vector<Node>& mems)
{
  std::unordered_set<Node> remove;

  for (const Node& m1 : mems)
  {
    bool m1Neg = m1.getKind() == NOT;
    Node m1Lit = m1Neg ? m1[0] : m1;

    if (remove.find(m1) != remove.end())
    {
      // Skip memberships marked for removal
      continue;
    }

    for (const Node& m2 : mems)
    {
      if (m1 == m2)
      {
        continue;
      }

      bool m2Neg = m2.getKind() == NOT;
      Node m2Lit = m2Neg ? m2[0] : m2;

      // Both regular expression memberships have the same polarity
      if (m1Neg == m2Neg)
      {
        if (d_regexp_opr.regExpIncludes(m1Lit[1], m2Lit[1]))
        {
          if (m1Neg)
          {
            // ~str.in.re(x, R1) includes ~str.in.re(x, R2) --->
            //   mark ~str.in.re(x, R2) as reduced
            d_im.markReduced(m2Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE_NEG);
            remove.insert(m2);
          }
          else
          {
            // str.in.re(x, R1) includes str.in.re(x, R2) --->
            //   mark str.in.re(x, R1) as reduced
            d_im.markReduced(m1Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE);
            remove.insert(m1);

            // We don't need to process m1 anymore
            break;
          }
        }
      }
      else
      {
        Node pos = m1Neg ? m2Lit : m1Lit;
        Node neg = m1Neg ? m1Lit : m2Lit;
        if (d_regexp_opr.regExpIncludes(neg[1], pos[1]))
        {
          // We have a conflict because we have a case where str.in.re(x, R1)
          // and ~str.in.re(x, R2) but R2 includes R1, so there is no
          // possible value for x that satisfies both memberships.
          std::vector<Node> vec_nodes;
          vec_nodes.push_back(pos);
          vec_nodes.push_back(neg.negate());

          if (pos[0] != neg[0])
          {
            vec_nodes.push_back(pos[0].eqNode(neg[0]));
          }

          Node conc;
          d_im.sendInference(
              vec_nodes, conc, InferenceId::STRINGS_RE_INTER_INCLUDE, false, true);
          return false;
        }
      }
    }
  }

  mems.erase(std::remove_if(
                 mems.begin(),
                 mems.end(),
                 [&remove](Node& n) { return remove.find(n) != remove.end(); }),
             mems.end());

  return true;
}

bool RegExpSolver::checkEqcIntersect(const std::vector<Node>& mems)
{
  // do not compute intersections if the re intersection mode is none
  if (options::stringRegExpInterMode() == options::RegExpInterMode::NONE)
  {
    return true;
  }
  if (mems.empty())
  {
    // nothing to do
    return true;
  }
  // the initial regular expression membership and its constant type
  Node mi;
  RegExpConstType rcti = RE_C_UNKNOWN;
  NodeManager* nm = NodeManager::currentNM();
  for (const Node& m : mems)
  {
    if (m.getKind() != STRING_IN_REGEXP)
    {
      // do not do negative
      Assert(m.getKind() == NOT && m[0].getKind() == STRING_IN_REGEXP);
      continue;
    }
    RegExpConstType rct = d_regexp_opr.getRegExpConstType(m[1]);
    if (rct == RE_C_VARIABLE
        || (options::stringRegExpInterMode()
                == options::RegExpInterMode::CONSTANT
            && rct != RE_C_CONRETE_CONSTANT))
    {
      // cannot do intersection on RE with variables, or with re.allchar based
      // on option.
      continue;
    }
    if (options::stringRegExpInterMode()
        == options::RegExpInterMode::ONE_CONSTANT)
    {
      if (!mi.isNull() && rcti >= RE_C_CONSTANT && rct >= RE_C_CONSTANT)
      {
        // if both have re.allchar, do not do intersection if the
        // options::RegExpInterMode::ONE_CONSTANT option is set.
        continue;
      }
    }
    if (mi.isNull())
    {
      // first regular expression seen
      mi = m;
      rcti = rct;
      continue;
    }
    Node resR = d_regexp_opr.intersect(mi[1], m[1]);
    // intersection should be computable
    Assert(!resR.isNull());
    if (resR == d_emptyRegexp)
    {
      // conflict, explain
      std::vector<Node> vec_nodes;
      vec_nodes.push_back(mi);
      vec_nodes.push_back(m);
      if (mi[0] != m[0])
      {
        vec_nodes.push_back(mi[0].eqNode(m[0]));
      }
      Node conc;
      d_im.sendInference(
          vec_nodes, conc, InferenceId::STRINGS_RE_INTER_CONF, false, true);
      // conflict, return
      return false;
    }
    // rewrite to ensure the equality checks below are precise
    Node mres = nm->mkNode(STRING_IN_REGEXP, mi[0], resR);
    Node mresr = Rewriter::rewrite(mres);
    if (mresr == mi)
    {
      // if R1 = intersect( R1, R2 ), then x in R1 ^ x in R2 is equivalent
      // to x in R1, hence x in R2 can be marked redundant.
      d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME);
    }
    else if (mresr == m)
    {
      // same as above, opposite direction
      d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME);
    }
    else
    {
      // new conclusion
      // (x in R1 ^ y in R2 ^ x = y) => (x in intersect(R1,R2))
      std::vector<Node> vec_nodes;
      vec_nodes.push_back(mi);
      vec_nodes.push_back(m);
      if (mi[0] != m[0])
      {
        vec_nodes.push_back(mi[0].eqNode(m[0]));
      }
      d_im.sendInference(
          vec_nodes, mres, InferenceId::STRINGS_RE_INTER_INFER, false, true);
      // both are reduced
      d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER);
      d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER);
      // do not send more than one lemma for this class
      return true;
    }
  }
  return true;
}

bool RegExpSolver::checkPDerivative(
    Node x, Node r, Node atom, bool& addedLemma, std::vector<Node>& nf_exp)
{
  if (d_state.areEqual(x, d_emptyString))
  {
    Node exp;
    switch (d_regexp_opr.delta(r, exp))
    {
      case 0:
      {
        std::vector<Node> noExplain;
        noExplain.push_back(atom);
        noExplain.push_back(x.eqNode(d_emptyString));
        std::vector<Node> iexp = nf_exp;
        iexp.insert(iexp.end(), noExplain.begin(), noExplain.end());
        d_im.sendInference(iexp, noExplain, exp, InferenceId::STRINGS_RE_DELTA);
        addedLemma = true;
        d_regexp_ccached.insert(atom);
        return false;
      }
      case 1:
      {
        d_regexp_ccached.insert(atom);
        break;
      }
      case 2:
      {
        std::vector<Node> noExplain;
        noExplain.push_back(atom);
        noExplain.push_back(x.eqNode(d_emptyString));
        std::vector<Node> iexp = nf_exp;
        iexp.insert(iexp.end(), noExplain.begin(), noExplain.end());
        d_im.sendInference(iexp, noExplain, d_false, InferenceId::STRINGS_RE_DELTA_CONF);
        addedLemma = true;
        d_regexp_ccached.insert(atom);
        return false;
      }
      default:
        // Impossible
        break;
    }
  }
  else
  {
    if (deriveRegExp(x, r, atom, nf_exp))
    {
      addedLemma = true;
      d_regexp_ccached.insert(atom);
      return false;
    }
  }
  return true;
}

cvc5::String RegExpSolver::getHeadConst(Node x)
{
  if (x.isConst())
  {
    return x.getConst<String>();
  }
  else if (x.getKind() == STRING_CONCAT)
  {
    if (x[0].isConst())
    {
      return x[0].getConst<String>();
    }
  }
  return d_emptyString.getConst<String>();
}

bool RegExpSolver::deriveRegExp(Node x,
                                Node r,
                                Node atom,
                                std::vector<Node>& ant)
{
  Assert(x != d_emptyString);
  Trace("regexp-derive") << "RegExpSolver::deriveRegExp: x=" << x
                         << ", r= " << r << std::endl;
  cvc5::String s = getHeadConst(x);
  // only allow RE_DERIVE for concrete constant regular expressions
  if (!s.empty() && d_regexp_opr.getRegExpConstType(r) == RE_C_CONRETE_CONSTANT)
  {
    Node conc = Node::null();
    Node dc = r;
    bool flag = true;
    for (unsigned i = 0; i < s.size(); ++i)
    {
      cvc5::String c = s.substr(i, 1);
      Node dc2;
      int rt = d_regexp_opr.derivativeS(dc, c, dc2);
      dc = dc2;
      if (rt == 2)
      {
        // CONFLICT
        flag = false;
        break;
      }
    }
    // send lemma
    if (flag)
    {
      if (x.isConst())
      {
        Assert(false)
            << "Impossible: RegExpSolver::deriveRegExp: const string in const "
               "regular expression.";
        return false;
      }
      else
      {
        Assert(x.getKind() == STRING_CONCAT);
        std::vector<Node> vec_nodes;
        for (unsigned int i = 1; i < x.getNumChildren(); ++i)
        {
          vec_nodes.push_back(x[i]);
        }
        Node left = utils::mkConcat(vec_nodes, x.getType());
        left = Rewriter::rewrite(left);
        conc = NodeManager::currentNM()->mkNode(STRING_IN_REGEXP, left, dc);
      }
    }
    std::vector<Node> iexp = ant;
    std::vector<Node> noExplain;
    noExplain.push_back(atom);
    iexp.push_back(atom);
    d_im.sendInference(iexp, noExplain, conc, InferenceId::STRINGS_RE_DERIVE);
    return true;
  }
  return false;
}

Node RegExpSolver::getNormalSymRegExp(Node r, std::vector<Node>& nf_exp)
{
  Node ret = r;
  switch (r.getKind())
  {
    case REGEXP_EMPTY:
    case REGEXP_SIGMA:
    case REGEXP_RANGE: break;
    case STRING_TO_REGEXP:
    {
      if (!r[0].isConst())
      {
        Node tmp = d_csolver.getNormalString(r[0], nf_exp);
        if (tmp != r[0])
        {
          ret = NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, tmp);
        }
      }
      break;
    }
    case REGEXP_CONCAT:
    case REGEXP_UNION:
    case REGEXP_INTER:
    case REGEXP_STAR:
    case REGEXP_COMPLEMENT:
    {
      std::vector<Node> vec_nodes;
      for (const Node& cr : r)
      {
        vec_nodes.push_back(getNormalSymRegExp(cr, nf_exp));
      }
      ret = Rewriter::rewrite(
          NodeManager::currentNM()->mkNode(r.getKind(), vec_nodes));
      break;
    }
    default:
    {
      Trace("strings-error") << "Unsupported term: " << r
                             << " in normalization SymRegExp." << std::endl;
      Assert(!utils::isRegExpKind(r.getKind()));
    }
  }
  return ret;
}

}  // namespace strings
}  // namespace theory
}  // namespace cvc5


Generated by: GCOVR (Version 3.2)

Line	Exec	Source
1		/******************************************************************************
2		* Top contributors (to current version):
3		* Andrew Reynolds, Andres Noetzli, Tianyi Liang
4		*
5		* This file is part of the cvc5 project.
6		*
7		* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8		* in the top-level source directory and their institutional affiliations.
9		* All rights reserved. See the file COPYING in the top-level source
10		* directory for licensing information.
11		* ****************************************************************************
12		*
13		* Implementation of the regular expression solver for the theory of strings.
14		*/
15
16		#include "theory/strings/regexp_solver.h"
17
18		#include <cmath>
19
20		#include "options/strings_options.h"
21		#include "smt/logic_exception.h"
22		#include "theory/ext_theory.h"
23		#include "theory/strings/theory_strings_utils.h"
24		#include "theory/theory_model.h"
25		#include "util/statistics_value.h"
26
27		using namespace std;
28		using namespace cvc5::context;
29		using namespace cvc5::kind;
30
31		namespace cvc5 {
32		namespace theory {
33		namespace strings {
34
35	8954	RegExpSolver::RegExpSolver(SolverState& s,
36		InferenceManager& im,
37		SkolemCache* skc,
38		CoreSolver& cs,
39		ExtfSolver& es,
40	8954	SequencesStatistics& stats)
41		: d_state(s),
42		d_im(im),
43		d_csolver(cs),
44		d_esolver(es),
45		d_statistics(stats),
46	8954	d_regexp_ucached(s.getUserContext()),
47		d_regexp_ccached(s.getSatContext()),
48		d_processed_memberships(s.getSatContext()),
49	17908	d_regexp_opr(skc)
50		{
51	8954	d_emptyString = NodeManager::currentNM()->mkConst(::cvc5::String(""));
52	17908	std::vector<Node> nvec;
53	8954	d_emptyRegexp = NodeManager::currentNM()->mkNode(REGEXP_EMPTY, nvec);
54	8954	d_true = NodeManager::currentNM()->mkConst(true);
55	8954	d_false = NodeManager::currentNM()->mkConst(false);
56	8954	}
57
58		Node RegExpSolver::mkAnd(Node c1, Node c2)
59		{
60		return NodeManager::currentNM()->mkNode(AND, c1, c2);
61		}
62
63	7356	void RegExpSolver::checkMemberships()
64		{
65		// add the memberships
66	14712	std::vector<Node> mems = d_esolver.getActive(STRING_IN_REGEXP);
67		// maps representatives to regular expression memberships in that class
68	14712	std::map<Node, std::vector<Node> > assertedMems;
69	7356	const std::map<Node, ExtfInfoTmp>& einfo = d_esolver.getInfo();
70	7356	std::map<Node, ExtfInfoTmp>::const_iterator it;
71	8351	for (unsigned i = 0; i < mems.size(); i++)
72		{
73	1990	Node n = mems[i];
74	995	Assert(n.getKind() == STRING_IN_REGEXP);
75	995	it = einfo.find(n);
76	995	Assert(it != einfo.end());
77	995	if (!it->second.d_const.isNull())
78		{
79	973	bool pol = it->second.d_const.getConst<bool>();
80	1946	Trace("strings-process-debug")
81	973	<< " add membership : " << n << ", pol = " << pol << std::endl;
82	1946	Node r = d_state.getRepresentative(n[0]);
83	973	assertedMems[r].push_back(pol ? n : n.negate());
84		}
85		else
86		{
87	44	Trace("strings-process-debug")
88	22	<< " irrelevant (non-asserted) membership : " << n << std::endl;
89		}
90		}
91	7356	check(assertedMems);
92	7356	}
93
94	7356	void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
95		{
96	7356	bool addedLemma = false;
97	7356	bool changed = false;
98	14690	std::vector<Node> processed;
99
100	7356	Trace("regexp-process") << "Checking Memberships ... " << std::endl;
101	8015	for (const std::pair<const Node, std::vector<Node> >& mr : mems)
102		{
103	1340	std::vector<Node> mems2 = mr.second;
104	1362	Trace("regexp-process")
105	681	<< "Memberships(" << mr.first << ") = " << mr.second << std::endl;
106	681	if (!checkEqcInclusion(mems2))
107		{
108		// conflict discovered, return
109	10	return;
110		}
111	671	if (!checkEqcIntersect(mems2))
112		{
113		// conflict discovered, return
114	12	return;
115		}
116		}
117
118	14668	Trace("regexp-debug")
119	7334	<< "... No Intersect Conflict in Memberships, addedLemma: " << addedLemma
120	7334	<< std::endl;
121	7334	if (!addedLemma)
122		{
123		// get all memberships
124	14668	std::map<Node, Node> allMems;
125	7993	for (const std::pair<const Node, std::vector<Node> >& mr : mems)
126		{
127	1573	for (const Node& m : mr.second)
128		{
129	914	bool polarity = m.getKind() != NOT;
130	1268	if (polarity \|\| !options::stringIgnNegMembership())
131		{
132	908	allMems[m] = mr.first;
133		}
134		}
135		}
136
137	7334	NodeManager* nm = NodeManager::currentNM();
138		// representatives of strings that are the LHS of positive memberships that
139		// we unfolded
140	14668	std::unordered_set<Node> repUnfold;
141		// check positive (e=0), then negative (e=1) memberships
142	22002	for (unsigned e = 0; e < 2; e++)
143		{
144	16432	for (const std::pair<const Node, Node>& mp : allMems)
145		{
146	2188	Node assertion = mp.first;
147	2188	Node rep = mp.second;
148		// check regular expression membership
149	3628	Trace("regexp-debug")
150	1814	<< "Check : " << assertion << " "
151	3628	<< (d_regexp_ucached.find(assertion) == d_regexp_ucached.end())
152	1814	<< " "
153	3628	<< (d_regexp_ccached.find(assertion) == d_regexp_ccached.end())
154	1814	<< std::endl;
155	3628	if (d_regexp_ucached.find(assertion) != d_regexp_ucached.end()
156	1814	\|\| d_regexp_ccached.find(assertion) != d_regexp_ccached.end())
157		{
158	906	continue;
159		}
160	1816	Trace("strings-regexp")
161	908	<< "We have regular expression assertion : " << assertion
162	908	<< std::endl;
163	1282	Node atom = assertion.getKind() == NOT ? assertion[0] : assertion;
164	908	Assert(atom == Rewriter::rewrite(atom));
165	908	bool polarity = assertion.getKind() != NOT;
166	908	if (polarity != (e == 0))
167		{
168	448	continue;
169		}
170	460	bool flag = true;
171	834	Node x = atom[0];
172	834	Node r = atom[1];
173	460	Assert(rep == d_state.getRepresentative(x));
174		// The following code takes normal forms into account for the purposes
175		// of simplifying a regular expression membership x in R. For example,
176		// if x = "A" in the current context, then we may be interested in
177		// reasoning about ( x in R ) * { x -> "A" }. Say we update the
178		// membership to nx in R', then:
179		// - nfexp => ( x in R ) <=> nx in R'
180		// - rnfexp => R = R'
181		// We use these explanations below as assumptions on inferences when
182		// appropriate. Notice that for inferring conflicts and tautologies,
183		// we use the normal form of x always. This is because we always want to
184		// discover conflicts/tautologies whenever possible.
185		// For inferences based on regular expression unfolding, we do not use
186		// the normal form of x. The reason is that it is better to unfold
187		// regular expression memberships in a context-indepedent manner,
188		// that is, not taking into account the current normal form of x, since
189		// this ensures these lemmas are still relevant after backtracking.
190	834	std::vector<Node> nfexp;
191	834	std::vector<Node> rnfexp;
192		// The normal form of x is stored in nx, while x is left unchanged.
193	834	Node nx = x;
194	460	if (!x.isConst())
195		{
196	447	nx = d_csolver.getNormalString(x, nfexp);
197		}
198		// If r is not a constant regular expression, we update it based on
199		// normal forms, which may concretize its variables.
200	460	if (!d_regexp_opr.checkConstRegExp(r))
201		{
202	93	r = getNormalSymRegExp(r, rnfexp);
203	93	nfexp.insert(nfexp.end(), rnfexp.begin(), rnfexp.end());
204	93	changed = true;
205		}
206	920	Trace("strings-regexp-nf") << "Term " << atom << " is normalized to "
207	460	<< nx << " IN " << r << std::endl;
208	460	if (nx != x \|\| changed)
209		{
210		// We rewrite the membership nx IN r.
211	430	Node tmp = Rewriter::rewrite(nm->mkNode(STRING_IN_REGEXP, nx, r));
212	243	Trace("strings-regexp-nf") << "Simplifies to " << tmp << std::endl;
213	243	if (tmp.isConst())
214		{
215	62	if (tmp.getConst<bool>() == polarity)
216		{
217		// it is satisfied in this SAT context
218	6	d_regexp_ccached.insert(assertion);
219	6	continue;
220		}
221		else
222		{
223		// we have a conflict
224	100	std::vector<Node> iexp = nfexp;
225	100	std::vector<Node> noExplain;
226	50	iexp.push_back(assertion);
227	50	noExplain.push_back(assertion);
228	100	Node conc = Node::null();
229	50	d_im.sendInference(
230		iexp, noExplain, conc, InferenceId::STRINGS_RE_NF_CONFLICT);
231	50	addedLemma = true;
232	50	break;
233		}
234		}
235		}
236	404	if (e == 1 && repUnfold.find(rep) != repUnfold.end())
237		{
238		// do not unfold negative memberships of strings that have new
239		// positive unfoldings. For example:
240		// x in ("A")* ^ NOT x in ("B")*
241		// We unfold x = "A" ++ x' only. The intution here is that positive
242		// unfoldings lead to stronger constraints (equalities are stronger
243		// than disequalities), and are easier to check.
244	30	continue;
245		}
246	374	if (polarity)
247		{
248	286	flag = checkPDerivative(x, r, atom, addedLemma, rnfexp);
249		}
250		else
251		{
252	88	if (!options::stringExp())
253		{
254		throw LogicException(
255		"Strings Incomplete (due to Negative Membership) by default, "
256		"try --strings-exp option.");
257		}
258		}
259	374	if (flag)
260		{
261		// check if the term is atomic
262	732	Trace("strings-regexp")
263	366	<< "Unroll/simplify membership of atomic term " << rep
264	366	<< std::endl;
265		// if so, do simple unrolling
266	366	Trace("strings-regexp") << "Simplify on " << atom << std::endl;
267	732	Node conc = d_regexp_opr.simplify(atom, polarity);
268	366	Trace("strings-regexp") << "...finished, got " << conc << std::endl;
269		// if simplifying successfully generated a lemma
270	366	if (!conc.isNull())
271		{
272	726	std::vector<Node> iexp;
273	726	std::vector<Node> noExplain;
274	363	iexp.push_back(assertion);
275	363	noExplain.push_back(assertion);
276	363	Assert(atom.getKind() == STRING_IN_REGEXP);
277	363	if (polarity)
278		{
279	276	d_statistics.d_regexpUnfoldingsPos << atom[1].getKind();
280		}
281		else
282		{
283	87	d_statistics.d_regexpUnfoldingsNeg << atom[1].getKind();
284		}
285	363	InferenceId inf =
286	363	polarity ? InferenceId::STRINGS_RE_UNFOLD_POS : InferenceId::STRINGS_RE_UNFOLD_NEG;
287		// in very rare cases, we may find out that the unfolding lemma
288		// for a membership is equivalent to true, in spite of the RE
289		// not being rewritten to true.
290	363	if (d_im.sendInference(iexp, noExplain, conc, inf))
291		{
292	361	addedLemma = true;
293	361	if (e == 0)
294		{
295		// Remember that we have unfolded a membership for x
296		// notice that we only do this here, after we have definitely
297		// added a lemma.
298	274	repUnfold.insert(rep);
299		}
300		}
301	363	processed.push_back(assertion);
302		}
303		else
304		{
305		// otherwise we are incomplete
306	3	d_im.setIncomplete(IncompleteId::STRINGS_REGEXP_NO_SIMPLIFY);
307		}
308		}
309	374	if (d_state.isInConflict())
310		{
311		break;
312		}
313		}
314		}
315		}
316	7334	if (addedLemma)
317		{
318	295	if (!d_state.isInConflict())
319		{
320	658	for (unsigned i = 0; i < processed.size(); i++)
321		{
322	726	Trace("strings-regexp")
323	363	<< "...add " << processed[i] << " to u-cache." << std::endl;
324	363	d_regexp_ucached.insert(processed[i]);
325		}
326		}
327		}
328		}
329
330	681	bool RegExpSolver::checkEqcInclusion(std::vector<Node>& mems)
331		{
332	1362	std::unordered_set<Node> remove;
333
334	1626	for (const Node& m1 : mems)
335		{
336	955	bool m1Neg = m1.getKind() == NOT;
337	1897	Node m1Lit = m1Neg ? m1[0] : m1;
338
339	955	if (remove.find(m1) != remove.end())
340		{
341		// Skip memberships marked for removal
342	3	continue;
343		}
344
345	2599	for (const Node& m2 : mems)
346		{
347	1667	if (m1 == m2)
348		{
349	946	continue;
350		}
351
352	721	bool m2Neg = m2.getKind() == NOT;
353	1422	Node m2Lit = m2Neg ? m2[0] : m2;
354
355		// Both regular expression memberships have the same polarity
356	721	if (m1Neg == m2Neg)
357		{
358	423	if (d_regexp_opr.regExpIncludes(m1Lit[1], m2Lit[1]))
359		{
360	15	if (m1Neg)
361		{
362		// ~str.in.re(x, R1) includes ~str.in.re(x, R2) --->
363		// mark ~str.in.re(x, R2) as reduced
364	5	d_im.markReduced(m2Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE_NEG);
365	5	remove.insert(m2);
366		}
367		else
368		{
369		// str.in.re(x, R1) includes str.in.re(x, R2) --->
370		// mark str.in.re(x, R1) as reduced
371	10	d_im.markReduced(m1Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE);
372	10	remove.insert(m1);
373
374		// We don't need to process m1 anymore
375	10	break;
376		}
377		}
378		}
379		else
380		{
381	586	Node pos = m1Neg ? m2Lit : m1Lit;
382	586	Node neg = m1Neg ? m1Lit : m2Lit;
383	298	if (d_regexp_opr.regExpIncludes(neg[1], pos[1]))
384		{
385		// We have a conflict because we have a case where str.in.re(x, R1)
386		// and ~str.in.re(x, R2) but R2 includes R1, so there is no
387		// possible value for x that satisfies both memberships.
388	20	std::vector<Node> vec_nodes;
389	10	vec_nodes.push_back(pos);
390	10	vec_nodes.push_back(neg.negate());
391
392	10	if (pos[0] != neg[0])
393		{
394		vec_nodes.push_back(pos[0].eqNode(neg[0]));
395		}
396
397	20	Node conc;
398	10	d_im.sendInference(
399		vec_nodes, conc, InferenceId::STRINGS_RE_INTER_INCLUDE, false, true);
400	10	return false;
401		}
402		}
403		}
404		}
405
406	2013	mems.erase(std::remove_if(
407		mems.begin(),
408		mems.end(),
409	1612	[&remove](Node& n) { return remove.find(n) != remove.end(); }),
410	2684	mems.end());
411
412	671	return true;
413		}
414
415	671	bool RegExpSolver::checkEqcIntersect(const std::vector<Node>& mems)
416		{
417		// do not compute intersections if the re intersection mode is none
418	1966	if (options::stringRegExpInterMode() == options::RegExpInterMode::NONE)
419		{
420		return true;
421		}
422	671	if (mems.empty())
423		{
424		// nothing to do
425		return true;
426		}
427		// the initial regular expression membership and its constant type
428	1342	Node mi;
429	671	RegExpConstType rcti = RE_C_UNKNOWN;
430	671	NodeManager* nm = NodeManager::currentNM();
431	1531	for (const Node& m : mems)
432		{
433	1255	if (m.getKind() != STRING_IN_REGEXP)
434		{
435		// do not do negative
436	347	Assert(m.getKind() == NOT && m[0].getKind() == STRING_IN_REGEXP);
437	1200	continue;
438		}
439	561	RegExpConstType rct = d_regexp_opr.getRegExpConstType(m[1]);
440	561	if (rct == RE_C_VARIABLE
441	929	\|\| (options::stringRegExpInterMode()
442		== options::RegExpInterMode::CONSTANT
443	431	&& rct != RE_C_CONRETE_CONSTANT))
444		{
445		// cannot do intersection on RE with variables, or with re.allchar based
446		// on option.
447	368	continue;
448		}
449	386	if (options::stringRegExpInterMode()
450	193	== options::RegExpInterMode::ONE_CONSTANT)
451		{
452		if (!mi.isNull() && rcti >= RE_C_CONSTANT && rct >= RE_C_CONSTANT)
453		{
454		// if both have re.allchar, do not do intersection if the
455		// options::RegExpInterMode::ONE_CONSTANT option is set.
456		continue;
457		}
458		}
459	331	if (mi.isNull())
460		{
461		// first regular expression seen
462	138	mi = m;
463	138	rcti = rct;
464	138	continue;
465		}
466	62	Node resR = d_regexp_opr.intersect(mi[1], m[1]);
467		// intersection should be computable
468	55	Assert(!resR.isNull());
469	55	if (resR == d_emptyRegexp)
470		{
471		// conflict, explain
472	24	std::vector<Node> vec_nodes;
473	12	vec_nodes.push_back(mi);
474	12	vec_nodes.push_back(m);
475	12	if (mi[0] != m[0])
476		{
477	8	vec_nodes.push_back(mi[0].eqNode(m[0]));
478		}
479	24	Node conc;
480	12	d_im.sendInference(
481		vec_nodes, conc, InferenceId::STRINGS_RE_INTER_CONF, false, true);
482		// conflict, return
483	12	return false;
484		}
485		// rewrite to ensure the equality checks below are precise
486	50	Node mres = nm->mkNode(STRING_IN_REGEXP, mi[0], resR);
487	50	Node mresr = Rewriter::rewrite(mres);
488	43	if (mresr == mi)
489		{
490		// if R1 = intersect( R1, R2 ), then x in R1 ^ x in R2 is equivalent
491		// to x in R1, hence x in R2 can be marked redundant.
492		d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME);
493		}
494	43	else if (mresr == m)
495		{
496		// same as above, opposite direction
497	7	d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME);
498		}
499		else
500		{
501		// new conclusion
502		// (x in R1 ^ y in R2 ^ x = y) => (x in intersect(R1,R2))
503	72	std::vector<Node> vec_nodes;
504	36	vec_nodes.push_back(mi);
505	36	vec_nodes.push_back(m);
506	36	if (mi[0] != m[0])
507		{
508		vec_nodes.push_back(mi[0].eqNode(m[0]));
509		}
510	36	d_im.sendInference(
511		vec_nodes, mres, InferenceId::STRINGS_RE_INTER_INFER, false, true);
512		// both are reduced
513	36	d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER);
514	36	d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER);
515		// do not send more than one lemma for this class
516	36	return true;
517		}
518		}
519	623	return true;
520		}
521
522	286	bool RegExpSolver::checkPDerivative(
523		Node x, Node r, Node atom, bool& addedLemma, std::vector<Node>& nf_exp)
524		{
525	286	if (d_state.areEqual(x, d_emptyString))
526		{
527	4	Node exp;
528	4	switch (d_regexp_opr.delta(r, exp))
529		{
530		case 0:
531		{
532		std::vector<Node> noExplain;
533		noExplain.push_back(atom);
534		noExplain.push_back(x.eqNode(d_emptyString));
535		std::vector<Node> iexp = nf_exp;
536		iexp.insert(iexp.end(), noExplain.begin(), noExplain.end());
537		d_im.sendInference(iexp, noExplain, exp, InferenceId::STRINGS_RE_DELTA);
538		addedLemma = true;
539		d_regexp_ccached.insert(atom);
540		return false;
541		}
542		case 1:
543		{
544		d_regexp_ccached.insert(atom);
545		break;
546		}
547	4	case 2:
548		{
549	8	std::vector<Node> noExplain;
550	4	noExplain.push_back(atom);
551	4	noExplain.push_back(x.eqNode(d_emptyString));
552	8	std::vector<Node> iexp = nf_exp;
553	4	iexp.insert(iexp.end(), noExplain.begin(), noExplain.end());
554	4	d_im.sendInference(iexp, noExplain, d_false, InferenceId::STRINGS_RE_DELTA_CONF);
555	4	addedLemma = true;
556	4	d_regexp_ccached.insert(atom);
557	4	return false;
558		}
559		default:
560		// Impossible
561		break;
562		}
563		}
564		else
565		{
566	282	if (deriveRegExp(x, r, atom, nf_exp))
567		{
568	4	addedLemma = true;
569	4	d_regexp_ccached.insert(atom);
570	4	return false;
571		}
572		}
573	278	return true;
574		}
575
576	282	cvc5::String RegExpSolver::getHeadConst(Node x)
577		{
578	282	if (x.isConst())
579		{
580	2	return x.getConst<String>();
581		}
582	280	else if (x.getKind() == STRING_CONCAT)
583		{
584	38	if (x[0].isConst())
585		{
586	6	return x[0].getConst<String>();
587		}
588		}
589	274	return d_emptyString.getConst<String>();
590		}
591
592	282	bool RegExpSolver::deriveRegExp(Node x,
593		Node r,
594		Node atom,
595		std::vector<Node>& ant)
596		{
597	282	Assert(x != d_emptyString);
598	564	Trace("regexp-derive") << "RegExpSolver::deriveRegExp: x=" << x
599	282	<< ", r= " << r << std::endl;
600	564	cvc5::String s = getHeadConst(x);
601		// only allow RE_DERIVE for concrete constant regular expressions
602	282	if (!s.empty() && d_regexp_opr.getRegExpConstType(r) == RE_C_CONRETE_CONSTANT)
603		{
604	8	Node conc = Node::null();
605	8	Node dc = r;
606	4	bool flag = true;
607	4	for (unsigned i = 0; i < s.size(); ++i)
608		{
609	4	cvc5::String c = s.substr(i, 1);
610	4	Node dc2;
611	4	int rt = d_regexp_opr.derivativeS(dc, c, dc2);
612	4	dc = dc2;
613	4	if (rt == 2)
614		{
615		// CONFLICT
616	4	flag = false;
617	4	break;
618		}
619		}
620		// send lemma
621	4	if (flag)
622		{
623		if (x.isConst())
624		{
625		Assert(false)
626		<< "Impossible: RegExpSolver::deriveRegExp: const string in const "
627		"regular expression.";
628		return false;
629		}
630		else
631		{
632		Assert(x.getKind() == STRING_CONCAT);
633		std::vector<Node> vec_nodes;
634		for (unsigned int i = 1; i < x.getNumChildren(); ++i)
635		{
636		vec_nodes.push_back(x[i]);
637		}
638		Node left = utils::mkConcat(vec_nodes, x.getType());
639		left = Rewriter::rewrite(left);
640		conc = NodeManager::currentNM()->mkNode(STRING_IN_REGEXP, left, dc);
641		}
642		}
643	8	std::vector<Node> iexp = ant;
644	8	std::vector<Node> noExplain;
645	4	noExplain.push_back(atom);
646	4	iexp.push_back(atom);
647	4	d_im.sendInference(iexp, noExplain, conc, InferenceId::STRINGS_RE_DERIVE);
648	4	return true;
649		}
650	278	return false;
651		}
652
653	499	Node RegExpSolver::getNormalSymRegExp(Node r, std::vector<Node>& nf_exp)
654		{
655	499	Node ret = r;
656	499	switch (r.getKind())
657		{
658	72	case REGEXP_EMPTY:
659		case REGEXP_SIGMA:
660	72	case REGEXP_RANGE: break;
661	182	case STRING_TO_REGEXP:
662		{
663	182	if (!r[0].isConst())
664		{
665	188	Node tmp = d_csolver.getNormalString(r[0], nf_exp);
666	94	if (tmp != r[0])
667		{
668	84	ret = NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, tmp);
669		}
670		}
671	182	break;
672		}
673	242	case REGEXP_CONCAT:
674		case REGEXP_UNION:
675		case REGEXP_INTER:
676		case REGEXP_STAR:
677		case REGEXP_COMPLEMENT:
678		{
679	484	std::vector<Node> vec_nodes;
680	648	for (const Node& cr : r)
681		{
682	406	vec_nodes.push_back(getNormalSymRegExp(cr, nf_exp));
683		}
684	242	ret = Rewriter::rewrite(
685	484	NodeManager::currentNM()->mkNode(r.getKind(), vec_nodes));
686	242	break;
687		}
688	3	default:
689		{
690	6	Trace("strings-error") << "Unsupported term: " << r
691	3	<< " in normalization SymRegExp." << std::endl;
692	3	Assert(!utils::isRegExpKind(r.getKind()));
693		}
694		}
695	499	return ret;
696		}
697
698		} // namespace strings
699		} // namespace theory
700	29384	} // namespace cvc5