Head

GCC Code Coverage Report

Directory:	.		Exec	Total	Coverage
File:	src/theory/strings/regexp_solver.cpp	Lines:	296	333	88.9 %
Date:	2021-05-22	Branches:	596	1241	48.0 %


/******************************************************************************
 * Top contributors (to current version):
 *   Andrew Reynolds, Andres Noetzli, Tianyi Liang
 *
 * This file is part of the cvc5 project.
 *
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
 * in the top-level source directory and their institutional affiliations.
 * All rights reserved.  See the file COPYING in the top-level source
 * directory for licensing information.
 * ****************************************************************************
 *
 * Implementation of the regular expression solver for the theory of strings.
 */

#include "theory/strings/regexp_solver.h"

#include <cmath>

#include "options/strings_options.h"
#include "smt/logic_exception.h"
#include "theory/ext_theory.h"
#include "theory/strings/theory_strings_utils.h"
#include "theory/theory_model.h"
#include "util/statistics_value.h"

using namespace std;
using namespace cvc5::context;
using namespace cvc5::kind;

namespace cvc5 {
namespace theory {
namespace strings {

RegExpSolver::RegExpSolver(SolverState& s,
                           InferenceManager& im,
                           SkolemCache* skc,
                           CoreSolver& cs,
                           ExtfSolver& es,
                           SequencesStatistics& stats)
    : d_state(s),
      d_im(im),
      d_csolver(cs),
      d_esolver(es),
      d_statistics(stats),
      d_regexp_ucached(s.getUserContext()),
      d_regexp_ccached(s.getSatContext()),
      d_processed_memberships(s.getSatContext()),
      d_regexp_opr(skc)
{
  d_emptyString = NodeManager::currentNM()->mkConst(::cvc5::String(""));
  d_emptyRegexp = NodeManager::currentNM()->mkNode(REGEXP_EMPTY);
  d_true = NodeManager::currentNM()->mkConst(true);
  d_false = NodeManager::currentNM()->mkConst(false);
}

Node RegExpSolver::mkAnd(Node c1, Node c2)
{
  return NodeManager::currentNM()->mkNode(AND, c1, c2);
}

void RegExpSolver::checkMemberships()
{
  // add the memberships
  std::vector<Node> mems = d_esolver.getActive(STRING_IN_REGEXP);
  // maps representatives to regular expression memberships in that class
  std::map<Node, std::vector<Node> > assertedMems;
  const std::map<Node, ExtfInfoTmp>& einfo = d_esolver.getInfo();
  std::map<Node, ExtfInfoTmp>::const_iterator it;
  for (unsigned i = 0; i < mems.size(); i++)
  {
    Node n = mems[i];
    Assert(n.getKind() == STRING_IN_REGEXP);
    it = einfo.find(n);
    Assert(it != einfo.end());
    if (!it->second.d_const.isNull())
    {
      bool pol = it->second.d_const.getConst<bool>();
      Trace("strings-process-debug")
          << "  add membership : " << n << ", pol = " << pol << std::endl;
      Node r = d_state.getRepresentative(n[0]);
      assertedMems[r].push_back(pol ? n : n.negate());
    }
    else
    {
      Trace("strings-process-debug")
          << "  irrelevant (non-asserted) membership : " << n << std::endl;
    }
  }
  check(assertedMems);
}

void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
{
  bool addedLemma = false;
  bool changed = false;
  std::vector<Node> processed;

  Trace("regexp-process") << "Checking Memberships ... " << std::endl;
  for (const std::pair<const Node, std::vector<Node> >& mr : mems)
  {
    std::vector<Node> mems2 = mr.second;
    Trace("regexp-process")
        << "Memberships(" << mr.first << ") = " << mr.second << std::endl;
    if (!checkEqcInclusion(mems2))
    {
      // conflict discovered, return
      return;
    }
    if (!checkEqcIntersect(mems2))
    {
      // conflict discovered, return
      return;
    }
  }

  Trace("regexp-debug")
      << "... No Intersect Conflict in Memberships, addedLemma: " << addedLemma
      << std::endl;
  if (!addedLemma)
  {
    // get all memberships
    std::map<Node, Node> allMems;
    for (const std::pair<const Node, std::vector<Node> >& mr : mems)
    {
      for (const Node& m : mr.second)
      {
        bool polarity = m.getKind() != NOT;
        if (polarity || !options::stringIgnNegMembership())
        {
          allMems[m] = mr.first;
        }
      }
    }

    NodeManager* nm = NodeManager::currentNM();
    // representatives of strings that are the LHS of positive memberships that
    // we unfolded
    std::unordered_set<Node> repUnfold;
    // check positive (e=0), then negative (e=1) memberships
    for (unsigned e = 0; e < 2; e++)
    {
      for (const std::pair<const Node, Node>& mp : allMems)
      {
        Node assertion = mp.first;
        Node rep = mp.second;
        // check regular expression membership
        Trace("regexp-debug")
            << "Check : " << assertion << " "
            << (d_regexp_ucached.find(assertion) == d_regexp_ucached.end())
            << " "
            << (d_regexp_ccached.find(assertion) == d_regexp_ccached.end())
            << std::endl;
        if (d_regexp_ucached.find(assertion) != d_regexp_ucached.end()
            || d_regexp_ccached.find(assertion) != d_regexp_ccached.end())
        {
          continue;
        }
        Trace("strings-regexp")
            << "We have regular expression assertion : " << assertion
            << std::endl;
        Node atom = assertion.getKind() == NOT ? assertion[0] : assertion;
        Assert(atom == Rewriter::rewrite(atom));
        bool polarity = assertion.getKind() != NOT;
        if (polarity != (e == 0))
        {
          continue;
        }
        bool flag = true;
        Node x = atom[0];
        Node r = atom[1];
        Assert(rep == d_state.getRepresentative(x));
        // The following code takes normal forms into account for the purposes
        // of simplifying a regular expression membership x in R. For example,
        // if x = "A" in the current context, then we may be interested in
        // reasoning about ( x in R ) * { x -> "A" }. Say we update the
        // membership to nx in R', then:
        // - nfexp => ( x in R ) <=> nx in R'
        // - rnfexp => R = R'
        // We use these explanations below as assumptions on inferences when
        // appropriate. Notice that for inferring conflicts and tautologies,
        // we use the normal form of x always. This is because we always want to
        // discover conflicts/tautologies whenever possible.
        // For inferences based on regular expression unfolding, we do not use
        // the normal form of x. The reason is that it is better to unfold
        // regular expression memberships in a context-indepedent manner,
        // that is, not taking into account the current normal form of x, since
        // this ensures these lemmas are still relevant after backtracking.
        std::vector<Node> nfexp;
        std::vector<Node> rnfexp;
        // The normal form of x is stored in nx, while x is left unchanged.
        Node nx = x;
        if (!x.isConst())
        {
          nx = d_csolver.getNormalString(x, nfexp);
        }
        // If r is not a constant regular expression, we update it based on
        // normal forms, which may concretize its variables.
        if (!d_regexp_opr.checkConstRegExp(r))
        {
          r = getNormalSymRegExp(r, rnfexp);
          nfexp.insert(nfexp.end(), rnfexp.begin(), rnfexp.end());
          changed = true;
        }
        Trace("strings-regexp-nf") << "Term " << atom << " is normalized to "
                                   << nx << " IN " << r << std::endl;
        if (nx != x || changed)
        {
          // We rewrite the membership nx IN r.
          Node tmp = Rewriter::rewrite(nm->mkNode(STRING_IN_REGEXP, nx, r));
          Trace("strings-regexp-nf") << "Simplifies to " << tmp << std::endl;
          if (tmp.isConst())
          {
            if (tmp.getConst<bool>() == polarity)
            {
              // it is satisfied in this SAT context
              d_regexp_ccached.insert(assertion);
              continue;
            }
            else
            {
              // we have a conflict
              std::vector<Node> iexp = nfexp;
              std::vector<Node> noExplain;
              iexp.push_back(assertion);
              noExplain.push_back(assertion);
              Node conc = Node::null();
              d_im.sendInference(
                  iexp, noExplain, conc, InferenceId::STRINGS_RE_NF_CONFLICT);
              addedLemma = true;
              break;
            }
          }
        }
        if (e == 1 && repUnfold.find(rep) != repUnfold.end())
        {
          // do not unfold negative memberships of strings that have new
          // positive unfoldings. For example:
          //   x in ("A")* ^ NOT x in ("B")*
          // We unfold x = "A" ++ x' only. The intution here is that positive
          // unfoldings lead to stronger constraints (equalities are stronger
          // than disequalities), and are easier to check.
          continue;
        }
        if (polarity)
        {
          flag = checkPDerivative(x, r, atom, addedLemma, rnfexp);
        }
        else
        {
          if (!options::stringExp())
          {
            throw LogicException(
                "Strings Incomplete (due to Negative Membership) by default, "
                "try --strings-exp option.");
          }
        }
        if (flag)
        {
          // check if the term is atomic
          Trace("strings-regexp")
              << "Unroll/simplify membership of atomic term " << rep
              << std::endl;
          // if so, do simple unrolling
          Trace("strings-regexp") << "Simplify on " << atom << std::endl;
          Node conc = d_regexp_opr.simplify(atom, polarity);
          Trace("strings-regexp") << "...finished, got " << conc << std::endl;
          // if simplifying successfully generated a lemma
          if (!conc.isNull())
          {
            std::vector<Node> iexp;
            std::vector<Node> noExplain;
            iexp.push_back(assertion);
            noExplain.push_back(assertion);
            Assert(atom.getKind() == STRING_IN_REGEXP);
            if (polarity)
            {
              d_statistics.d_regexpUnfoldingsPos << atom[1].getKind();
            }
            else
            {
              d_statistics.d_regexpUnfoldingsNeg << atom[1].getKind();
            }
            InferenceId inf =
                polarity ? InferenceId::STRINGS_RE_UNFOLD_POS : InferenceId::STRINGS_RE_UNFOLD_NEG;
            // in very rare cases, we may find out that the unfolding lemma
            // for a membership is equivalent to true, in spite of the RE
            // not being rewritten to true.
            if (d_im.sendInference(iexp, noExplain, conc, inf))
            {
              addedLemma = true;
              if (e == 0)
              {
                // Remember that we have unfolded a membership for x
                // notice that we only do this here, after we have definitely
                // added a lemma.
                repUnfold.insert(rep);
              }
            }
            processed.push_back(assertion);
          }
          else
          {
            // otherwise we are incomplete
            d_im.setIncomplete(IncompleteId::STRINGS_REGEXP_NO_SIMPLIFY);
          }
        }
        if (d_state.isInConflict())
        {
          break;
        }
      }
    }
  }
  if (addedLemma)
  {
    if (!d_state.isInConflict())
    {
      for (unsigned i = 0; i < processed.size(); i++)
      {
        Trace("strings-regexp")
            << "...add " << processed[i] << " to u-cache." << std::endl;
        d_regexp_ucached.insert(processed[i]);
      }
    }
  }
}

bool RegExpSolver::checkEqcInclusion(std::vector<Node>& mems)
{
  std::unordered_set<Node> remove;

  for (const Node& m1 : mems)
  {
    bool m1Neg = m1.getKind() == NOT;
    Node m1Lit = m1Neg ? m1[0] : m1;

    if (remove.find(m1) != remove.end())
    {
      // Skip memberships marked for removal
      continue;
    }

    for (const Node& m2 : mems)
    {
      if (m1 == m2)
      {
        continue;
      }

      bool m2Neg = m2.getKind() == NOT;
      Node m2Lit = m2Neg ? m2[0] : m2;

      // Both regular expression memberships have the same polarity
      if (m1Neg == m2Neg)
      {
        if (d_regexp_opr.regExpIncludes(m1Lit[1], m2Lit[1]))
        {
          if (m1Neg)
          {
            // ~str.in.re(x, R1) includes ~str.in.re(x, R2) --->
            //   mark ~str.in.re(x, R2) as reduced
            d_im.markReduced(m2Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE_NEG);
            remove.insert(m2);
          }
          else
          {
            // str.in.re(x, R1) includes str.in.re(x, R2) --->
            //   mark str.in.re(x, R1) as reduced
            d_im.markReduced(m1Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE);
            remove.insert(m1);

            // We don't need to process m1 anymore
            break;
          }
        }
      }
      else
      {
        Node pos = m1Neg ? m2Lit : m1Lit;
        Node neg = m1Neg ? m1Lit : m2Lit;
        if (d_regexp_opr.regExpIncludes(neg[1], pos[1]))
        {
          // We have a conflict because we have a case where str.in.re(x, R1)
          // and ~str.in.re(x, R2) but R2 includes R1, so there is no
          // possible value for x that satisfies both memberships.
          std::vector<Node> vec_nodes;
          vec_nodes.push_back(pos);
          vec_nodes.push_back(neg.negate());

          if (pos[0] != neg[0])
          {
            vec_nodes.push_back(pos[0].eqNode(neg[0]));
          }

          Node conc;
          d_im.sendInference(
              vec_nodes, conc, InferenceId::STRINGS_RE_INTER_INCLUDE, false, true);
          return false;
        }
      }
    }
  }

  mems.erase(std::remove_if(
                 mems.begin(),
                 mems.end(),
                 [&remove](Node& n) { return remove.find(n) != remove.end(); }),
             mems.end());

  return true;
}

bool RegExpSolver::checkEqcIntersect(const std::vector<Node>& mems)
{
  // do not compute intersections if the re intersection mode is none
  if (options::stringRegExpInterMode() == options::RegExpInterMode::NONE)
  {
    return true;
  }
  if (mems.empty())
  {
    // nothing to do
    return true;
  }
  // the initial regular expression membership and its constant type
  Node mi;
  RegExpConstType rcti = RE_C_UNKNOWN;
  NodeManager* nm = NodeManager::currentNM();
  for (const Node& m : mems)
  {
    if (m.getKind() != STRING_IN_REGEXP)
    {
      // do not do negative
      Assert(m.getKind() == NOT && m[0].getKind() == STRING_IN_REGEXP);
      continue;
    }
    RegExpConstType rct = d_regexp_opr.getRegExpConstType(m[1]);
    if (rct == RE_C_VARIABLE
        || (options::stringRegExpInterMode()
                == options::RegExpInterMode::CONSTANT
            && rct != RE_C_CONRETE_CONSTANT))
    {
      // cannot do intersection on RE with variables, or with re.allchar based
      // on option.
      continue;
    }
    if (options::stringRegExpInterMode()
        == options::RegExpInterMode::ONE_CONSTANT)
    {
      if (!mi.isNull() && rcti >= RE_C_CONSTANT && rct >= RE_C_CONSTANT)
      {
        // if both have re.allchar, do not do intersection if the
        // options::RegExpInterMode::ONE_CONSTANT option is set.
        continue;
      }
    }
    if (mi.isNull())
    {
      // first regular expression seen
      mi = m;
      rcti = rct;
      continue;
    }
    Node resR = d_regexp_opr.intersect(mi[1], m[1]);
    // intersection should be computable
    Assert(!resR.isNull());
    if (resR == d_emptyRegexp)
    {
      // conflict, explain
      std::vector<Node> vec_nodes;
      vec_nodes.push_back(mi);
      vec_nodes.push_back(m);
      if (mi[0] != m[0])
      {
        vec_nodes.push_back(mi[0].eqNode(m[0]));
      }
      Node conc;
      d_im.sendInference(
          vec_nodes, conc, InferenceId::STRINGS_RE_INTER_CONF, false, true);
      // conflict, return
      return false;
    }
    // rewrite to ensure the equality checks below are precise
    Node mres = nm->mkNode(STRING_IN_REGEXP, mi[0], resR);
    Node mresr = Rewriter::rewrite(mres);
    if (mresr == mi)
    {
      // if R1 = intersect( R1, R2 ), then x in R1 ^ x in R2 is equivalent
      // to x in R1, hence x in R2 can be marked redundant.
      d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME);
    }
    else if (mresr == m)
    {
      // same as above, opposite direction
      d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME);
    }
    else
    {
      // new conclusion
      // (x in R1 ^ y in R2 ^ x = y) => (x in intersect(R1,R2))
      std::vector<Node> vec_nodes;
      vec_nodes.push_back(mi);
      vec_nodes.push_back(m);
      if (mi[0] != m[0])
      {
        vec_nodes.push_back(mi[0].eqNode(m[0]));
      }
      d_im.sendInference(
          vec_nodes, mres, InferenceId::STRINGS_RE_INTER_INFER, false, true);
      // both are reduced
      d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER);
      d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER);
      // do not send more than one lemma for this class
      return true;
    }
  }
  return true;
}

bool RegExpSolver::checkPDerivative(
    Node x, Node r, Node atom, bool& addedLemma, std::vector<Node>& nf_exp)
{
  if (d_state.areEqual(x, d_emptyString))
  {
    Node exp;
    switch (d_regexp_opr.delta(r, exp))
    {
      case 0:
      {
        std::vector<Node> noExplain;
        noExplain.push_back(atom);
        noExplain.push_back(x.eqNode(d_emptyString));
        std::vector<Node> iexp = nf_exp;
        iexp.insert(iexp.end(), noExplain.begin(), noExplain.end());
        d_im.sendInference(iexp, noExplain, exp, InferenceId::STRINGS_RE_DELTA);
        addedLemma = true;
        d_regexp_ccached.insert(atom);
        return false;
      }
      case 1:
      {
        d_regexp_ccached.insert(atom);
        break;
      }
      case 2:
      {
        std::vector<Node> noExplain;
        noExplain.push_back(atom);
        noExplain.push_back(x.eqNode(d_emptyString));
        std::vector<Node> iexp = nf_exp;
        iexp.insert(iexp.end(), noExplain.begin(), noExplain.end());
        d_im.sendInference(iexp, noExplain, d_false, InferenceId::STRINGS_RE_DELTA_CONF);
        addedLemma = true;
        d_regexp_ccached.insert(atom);
        return false;
      }
      default:
        // Impossible
        break;
    }
  }
  else
  {
    if (deriveRegExp(x, r, atom, nf_exp))
    {
      addedLemma = true;
      d_regexp_ccached.insert(atom);
      return false;
    }
  }
  return true;
}

cvc5::String RegExpSolver::getHeadConst(Node x)
{
  if (x.isConst())
  {
    return x.getConst<String>();
  }
  else if (x.getKind() == STRING_CONCAT)
  {
    if (x[0].isConst())
    {
      return x[0].getConst<String>();
    }
  }
  return d_emptyString.getConst<String>();
}

bool RegExpSolver::deriveRegExp(Node x,
                                Node r,
                                Node atom,
                                std::vector<Node>& ant)
{
  Assert(x != d_emptyString);
  Trace("regexp-derive") << "RegExpSolver::deriveRegExp: x=" << x
                         << ", r= " << r << std::endl;
  cvc5::String s = getHeadConst(x);
  // only allow RE_DERIVE for concrete constant regular expressions
  if (!s.empty() && d_regexp_opr.getRegExpConstType(r) == RE_C_CONRETE_CONSTANT)
  {
    Node conc = Node::null();
    Node dc = r;
    bool flag = true;
    for (unsigned i = 0; i < s.size(); ++i)
    {
      cvc5::String c = s.substr(i, 1);
      Node dc2;
      int rt = d_regexp_opr.derivativeS(dc, c, dc2);
      dc = dc2;
      if (rt == 2)
      {
        // CONFLICT
        flag = false;
        break;
      }
    }
    // send lemma
    if (flag)
    {
      if (x.isConst())
      {
        Assert(false)
            << "Impossible: RegExpSolver::deriveRegExp: const string in const "
               "regular expression.";
        return false;
      }
      else
      {
        Assert(x.getKind() == STRING_CONCAT);
        std::vector<Node> vec_nodes;
        for (unsigned int i = 1; i < x.getNumChildren(); ++i)
        {
          vec_nodes.push_back(x[i]);
        }
        Node left = utils::mkConcat(vec_nodes, x.getType());
        left = Rewriter::rewrite(left);
        conc = NodeManager::currentNM()->mkNode(STRING_IN_REGEXP, left, dc);
      }
    }
    std::vector<Node> iexp = ant;
    std::vector<Node> noExplain;
    noExplain.push_back(atom);
    iexp.push_back(atom);
    d_im.sendInference(iexp, noExplain, conc, InferenceId::STRINGS_RE_DERIVE);
    return true;
  }
  return false;
}

Node RegExpSolver::getNormalSymRegExp(Node r, std::vector<Node>& nf_exp)
{
  Node ret = r;
  switch (r.getKind())
  {
    case REGEXP_EMPTY:
    case REGEXP_SIGMA:
    case REGEXP_RANGE: break;
    case STRING_TO_REGEXP:
    {
      if (!r[0].isConst())
      {
        Node tmp = d_csolver.getNormalString(r[0], nf_exp);
        if (tmp != r[0])
        {
          ret = NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, tmp);
        }
      }
      break;
    }
    case REGEXP_CONCAT:
    case REGEXP_UNION:
    case REGEXP_INTER:
    case REGEXP_STAR:
    case REGEXP_COMPLEMENT:
    {
      std::vector<Node> vec_nodes;
      for (const Node& cr : r)
      {
        vec_nodes.push_back(getNormalSymRegExp(cr, nf_exp));
      }
      ret = Rewriter::rewrite(
          NodeManager::currentNM()->mkNode(r.getKind(), vec_nodes));
      break;
    }
    default:
    {
      Trace("strings-error") << "Unsupported term: " << r
                             << " in normalization SymRegExp." << std::endl;
      Assert(!utils::isRegExpKind(r.getKind()));
    }
  }
  return ret;
}

}  // namespace strings
}  // namespace theory
}  // namespace cvc5


Generated by: GCOVR (Version 3.2)

Line	Exec	Source
1		/******************************************************************************
2		* Top contributors (to current version):
3		* Andrew Reynolds, Andres Noetzli, Tianyi Liang
4		*
5		* This file is part of the cvc5 project.
6		*
7		* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8		* in the top-level source directory and their institutional affiliations.
9		* All rights reserved. See the file COPYING in the top-level source
10		* directory for licensing information.
11		* ****************************************************************************
12		*
13		* Implementation of the regular expression solver for the theory of strings.
14		*/
15
16		#include "theory/strings/regexp_solver.h"
17
18		#include <cmath>
19
20		#include "options/strings_options.h"
21		#include "smt/logic_exception.h"
22		#include "theory/ext_theory.h"
23		#include "theory/strings/theory_strings_utils.h"
24		#include "theory/theory_model.h"
25		#include "util/statistics_value.h"
26
27		using namespace std;
28		using namespace cvc5::context;
29		using namespace cvc5::kind;
30
31		namespace cvc5 {
32		namespace theory {
33		namespace strings {
34
35	9460	RegExpSolver::RegExpSolver(SolverState& s,
36		InferenceManager& im,
37		SkolemCache* skc,
38		CoreSolver& cs,
39		ExtfSolver& es,
40	9460	SequencesStatistics& stats)
41		: d_state(s),
42		d_im(im),
43		d_csolver(cs),
44		d_esolver(es),
45		d_statistics(stats),
46	9460	d_regexp_ucached(s.getUserContext()),
47		d_regexp_ccached(s.getSatContext()),
48		d_processed_memberships(s.getSatContext()),
49	18920	d_regexp_opr(skc)
50		{
51	9460	d_emptyString = NodeManager::currentNM()->mkConst(::cvc5::String(""));
52	9460	d_emptyRegexp = NodeManager::currentNM()->mkNode(REGEXP_EMPTY);
53	9460	d_true = NodeManager::currentNM()->mkConst(true);
54	9460	d_false = NodeManager::currentNM()->mkConst(false);
55	9460	}
56
57		Node RegExpSolver::mkAnd(Node c1, Node c2)
58		{
59		return NodeManager::currentNM()->mkNode(AND, c1, c2);
60		}
61
62	7526	void RegExpSolver::checkMemberships()
63		{
64		// add the memberships
65	15052	std::vector<Node> mems = d_esolver.getActive(STRING_IN_REGEXP);
66		// maps representatives to regular expression memberships in that class
67	15052	std::map<Node, std::vector<Node> > assertedMems;
68	7526	const std::map<Node, ExtfInfoTmp>& einfo = d_esolver.getInfo();
69	7526	std::map<Node, ExtfInfoTmp>::const_iterator it;
70	8551	for (unsigned i = 0; i < mems.size(); i++)
71		{
72	2050	Node n = mems[i];
73	1025	Assert(n.getKind() == STRING_IN_REGEXP);
74	1025	it = einfo.find(n);
75	1025	Assert(it != einfo.end());
76	1025	if (!it->second.d_const.isNull())
77		{
78	1003	bool pol = it->second.d_const.getConst<bool>();
79	2006	Trace("strings-process-debug")
80	1003	<< " add membership : " << n << ", pol = " << pol << std::endl;
81	2006	Node r = d_state.getRepresentative(n[0]);
82	1003	assertedMems[r].push_back(pol ? n : n.negate());
83		}
84		else
85		{
86	44	Trace("strings-process-debug")
87	22	<< " irrelevant (non-asserted) membership : " << n << std::endl;
88		}
89		}
90	7526	check(assertedMems);
91	7526	}
92
93	7526	void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
94		{
95	7526	bool addedLemma = false;
96	7526	bool changed = false;
97	15030	std::vector<Node> processed;
98
99	7526	Trace("regexp-process") << "Checking Memberships ... " << std::endl;
100	8214	for (const std::pair<const Node, std::vector<Node> >& mr : mems)
101		{
102	1398	std::vector<Node> mems2 = mr.second;
103	1420	Trace("regexp-process")
104	710	<< "Memberships(" << mr.first << ") = " << mr.second << std::endl;
105	710	if (!checkEqcInclusion(mems2))
106		{
107		// conflict discovered, return
108	10	return;
109		}
110	700	if (!checkEqcIntersect(mems2))
111		{
112		// conflict discovered, return
113	12	return;
114		}
115		}
116
117	15008	Trace("regexp-debug")
118	7504	<< "... No Intersect Conflict in Memberships, addedLemma: " << addedLemma
119	7504	<< std::endl;
120	7504	if (!addedLemma)
121		{
122		// get all memberships
123	15008	std::map<Node, Node> allMems;
124	8192	for (const std::pair<const Node, std::vector<Node> >& mr : mems)
125		{
126	1632	for (const Node& m : mr.second)
127		{
128	944	bool polarity = m.getKind() != NOT;
129	1310	if (polarity \|\| !options::stringIgnNegMembership())
130		{
131	938	allMems[m] = mr.first;
132		}
133		}
134		}
135
136	7504	NodeManager* nm = NodeManager::currentNM();
137		// representatives of strings that are the LHS of positive memberships that
138		// we unfolded
139	15008	std::unordered_set<Node> repUnfold;
140		// check positive (e=0), then negative (e=1) memberships
141	22512	for (unsigned e = 0; e < 2; e++)
142		{
143	16832	for (const std::pair<const Node, Node>& mp : allMems)
144		{
145	2256	Node assertion = mp.first;
146	2256	Node rep = mp.second;
147		// check regular expression membership
148	3748	Trace("regexp-debug")
149	1874	<< "Check : " << assertion << " "
150	3748	<< (d_regexp_ucached.find(assertion) == d_regexp_ucached.end())
151	1874	<< " "
152	3748	<< (d_regexp_ccached.find(assertion) == d_regexp_ccached.end())
153	1874	<< std::endl;
154	3748	if (d_regexp_ucached.find(assertion) != d_regexp_ucached.end()
155	1874	\|\| d_regexp_ccached.find(assertion) != d_regexp_ccached.end())
156		{
157	950	continue;
158		}
159	1848	Trace("strings-regexp")
160	924	<< "We have regular expression assertion : " << assertion
161	924	<< std::endl;
162	1306	Node atom = assertion.getKind() == NOT ? assertion[0] : assertion;
163	924	Assert(atom == Rewriter::rewrite(atom));
164	924	bool polarity = assertion.getKind() != NOT;
165	924	if (polarity != (e == 0))
166		{
167	456	continue;
168		}
169	468	bool flag = true;
170	850	Node x = atom[0];
171	850	Node r = atom[1];
172	468	Assert(rep == d_state.getRepresentative(x));
173		// The following code takes normal forms into account for the purposes
174		// of simplifying a regular expression membership x in R. For example,
175		// if x = "A" in the current context, then we may be interested in
176		// reasoning about ( x in R ) * { x -> "A" }. Say we update the
177		// membership to nx in R', then:
178		// - nfexp => ( x in R ) <=> nx in R'
179		// - rnfexp => R = R'
180		// We use these explanations below as assumptions on inferences when
181		// appropriate. Notice that for inferring conflicts and tautologies,
182		// we use the normal form of x always. This is because we always want to
183		// discover conflicts/tautologies whenever possible.
184		// For inferences based on regular expression unfolding, we do not use
185		// the normal form of x. The reason is that it is better to unfold
186		// regular expression memberships in a context-indepedent manner,
187		// that is, not taking into account the current normal form of x, since
188		// this ensures these lemmas are still relevant after backtracking.
189	850	std::vector<Node> nfexp;
190	850	std::vector<Node> rnfexp;
191		// The normal form of x is stored in nx, while x is left unchanged.
192	850	Node nx = x;
193	468	if (!x.isConst())
194		{
195	455	nx = d_csolver.getNormalString(x, nfexp);
196		}
197		// If r is not a constant regular expression, we update it based on
198		// normal forms, which may concretize its variables.
199	468	if (!d_regexp_opr.checkConstRegExp(r))
200		{
201	93	r = getNormalSymRegExp(r, rnfexp);
202	93	nfexp.insert(nfexp.end(), rnfexp.begin(), rnfexp.end());
203	93	changed = true;
204		}
205	936	Trace("strings-regexp-nf") << "Term " << atom << " is normalized to "
206	468	<< nx << " IN " << r << std::endl;
207	468	if (nx != x \|\| changed)
208		{
209		// We rewrite the membership nx IN r.
210	442	Node tmp = Rewriter::rewrite(nm->mkNode(STRING_IN_REGEXP, nx, r));
211	249	Trace("strings-regexp-nf") << "Simplifies to " << tmp << std::endl;
212	249	if (tmp.isConst())
213		{
214	62	if (tmp.getConst<bool>() == polarity)
215		{
216		// it is satisfied in this SAT context
217	6	d_regexp_ccached.insert(assertion);
218	6	continue;
219		}
220		else
221		{
222		// we have a conflict
223	100	std::vector<Node> iexp = nfexp;
224	100	std::vector<Node> noExplain;
225	50	iexp.push_back(assertion);
226	50	noExplain.push_back(assertion);
227	100	Node conc = Node::null();
228	50	d_im.sendInference(
229		iexp, noExplain, conc, InferenceId::STRINGS_RE_NF_CONFLICT);
230	50	addedLemma = true;
231	50	break;
232		}
233		}
234		}
235	412	if (e == 1 && repUnfold.find(rep) != repUnfold.end())
236		{
237		// do not unfold negative memberships of strings that have new
238		// positive unfoldings. For example:
239		// x in ("A")* ^ NOT x in ("B")*
240		// We unfold x = "A" ++ x' only. The intution here is that positive
241		// unfoldings lead to stronger constraints (equalities are stronger
242		// than disequalities), and are easier to check.
243	30	continue;
244		}
245	382	if (polarity)
246		{
247	291	flag = checkPDerivative(x, r, atom, addedLemma, rnfexp);
248		}
249		else
250		{
251	91	if (!options::stringExp())
252		{
253		throw LogicException(
254		"Strings Incomplete (due to Negative Membership) by default, "
255		"try --strings-exp option.");
256		}
257		}
258	382	if (flag)
259		{
260		// check if the term is atomic
261	748	Trace("strings-regexp")
262	374	<< "Unroll/simplify membership of atomic term " << rep
263	374	<< std::endl;
264		// if so, do simple unrolling
265	374	Trace("strings-regexp") << "Simplify on " << atom << std::endl;
266	748	Node conc = d_regexp_opr.simplify(atom, polarity);
267	374	Trace("strings-regexp") << "...finished, got " << conc << std::endl;
268		// if simplifying successfully generated a lemma
269	374	if (!conc.isNull())
270		{
271	742	std::vector<Node> iexp;
272	742	std::vector<Node> noExplain;
273	371	iexp.push_back(assertion);
274	371	noExplain.push_back(assertion);
275	371	Assert(atom.getKind() == STRING_IN_REGEXP);
276	371	if (polarity)
277		{
278	281	d_statistics.d_regexpUnfoldingsPos << atom[1].getKind();
279		}
280		else
281		{
282	90	d_statistics.d_regexpUnfoldingsNeg << atom[1].getKind();
283		}
284	371	InferenceId inf =
285	371	polarity ? InferenceId::STRINGS_RE_UNFOLD_POS : InferenceId::STRINGS_RE_UNFOLD_NEG;
286		// in very rare cases, we may find out that the unfolding lemma
287		// for a membership is equivalent to true, in spite of the RE
288		// not being rewritten to true.
289	371	if (d_im.sendInference(iexp, noExplain, conc, inf))
290		{
291	369	addedLemma = true;
292	369	if (e == 0)
293		{
294		// Remember that we have unfolded a membership for x
295		// notice that we only do this here, after we have definitely
296		// added a lemma.
297	279	repUnfold.insert(rep);
298		}
299		}
300	371	processed.push_back(assertion);
301		}
302		else
303		{
304		// otherwise we are incomplete
305	3	d_im.setIncomplete(IncompleteId::STRINGS_REGEXP_NO_SIMPLIFY);
306		}
307		}
308	382	if (d_state.isInConflict())
309		{
310		break;
311		}
312		}
313		}
314		}
315	7504	if (addedLemma)
316		{
317	302	if (!d_state.isInConflict())
318		{
319	673	for (unsigned i = 0; i < processed.size(); i++)
320		{
321	742	Trace("strings-regexp")
322	371	<< "...add " << processed[i] << " to u-cache." << std::endl;
323	371	d_regexp_ucached.insert(processed[i]);
324		}
325		}
326		}
327		}
328
329	710	bool RegExpSolver::checkEqcInclusion(std::vector<Node>& mems)
330		{
331	1420	std::unordered_set<Node> remove;
332
333	1685	for (const Node& m1 : mems)
334		{
335	985	bool m1Neg = m1.getKind() == NOT;
336	1957	Node m1Lit = m1Neg ? m1[0] : m1;
337
338	985	if (remove.find(m1) != remove.end())
339		{
340		// Skip memberships marked for removal
341	3	continue;
342		}
343
344	2661	for (const Node& m2 : mems)
345		{
346	1699	if (m1 == m2)
347		{
348	976	continue;
349		}
350
351	723	bool m2Neg = m2.getKind() == NOT;
352	1426	Node m2Lit = m2Neg ? m2[0] : m2;
353
354		// Both regular expression memberships have the same polarity
355	723	if (m1Neg == m2Neg)
356		{
357	425	if (d_regexp_opr.regExpIncludes(m1Lit[1], m2Lit[1]))
358		{
359	15	if (m1Neg)
360		{
361		// ~str.in.re(x, R1) includes ~str.in.re(x, R2) --->
362		// mark ~str.in.re(x, R2) as reduced
363	5	d_im.markReduced(m2Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE_NEG);
364	5	remove.insert(m2);
365		}
366		else
367		{
368		// str.in.re(x, R1) includes str.in.re(x, R2) --->
369		// mark str.in.re(x, R1) as reduced
370	10	d_im.markReduced(m1Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE);
371	10	remove.insert(m1);
372
373		// We don't need to process m1 anymore
374	10	break;
375		}
376		}
377		}
378		else
379		{
380	586	Node pos = m1Neg ? m2Lit : m1Lit;
381	586	Node neg = m1Neg ? m1Lit : m2Lit;
382	298	if (d_regexp_opr.regExpIncludes(neg[1], pos[1]))
383		{
384		// We have a conflict because we have a case where str.in.re(x, R1)
385		// and ~str.in.re(x, R2) but R2 includes R1, so there is no
386		// possible value for x that satisfies both memberships.
387	20	std::vector<Node> vec_nodes;
388	10	vec_nodes.push_back(pos);
389	10	vec_nodes.push_back(neg.negate());
390
391	10	if (pos[0] != neg[0])
392		{
393		vec_nodes.push_back(pos[0].eqNode(neg[0]));
394		}
395
396	20	Node conc;
397	10	d_im.sendInference(
398		vec_nodes, conc, InferenceId::STRINGS_RE_INTER_INCLUDE, false, true);
399	10	return false;
400		}
401		}
402		}
403		}
404
405	2100	mems.erase(std::remove_if(
406		mems.begin(),
407		mems.end(),
408	1671	[&remove](Node& n) { return remove.find(n) != remove.end(); }),
409	2800	mems.end());
410
411	700	return true;
412		}
413
414	700	bool RegExpSolver::checkEqcIntersect(const std::vector<Node>& mems)
415		{
416		// do not compute intersections if the re intersection mode is none
417	2042	if (options::stringRegExpInterMode() == options::RegExpInterMode::NONE)
418		{
419		return true;
420		}
421	700	if (mems.empty())
422		{
423		// nothing to do
424		return true;
425		}
426		// the initial regular expression membership and its constant type
427	1400	Node mi;
428	700	RegExpConstType rcti = RE_C_UNKNOWN;
429	700	NodeManager* nm = NodeManager::currentNM();
430	1590	for (const Node& m : mems)
431		{
432	1297	if (m.getKind() != STRING_IN_REGEXP)
433		{
434		// do not do negative
435	359	Assert(m.getKind() == NOT && m[0].getKind() == STRING_IN_REGEXP);
436	1242	continue;
437		}
438	579	RegExpConstType rct = d_regexp_opr.getRegExpConstType(m[1]);
439	579	if (rct == RE_C_VARIABLE
440	965	\|\| (options::stringRegExpInterMode()
441		== options::RegExpInterMode::CONSTANT
442	449	&& rct != RE_C_CONRETE_CONSTANT))
443		{
444		// cannot do intersection on RE with variables, or with re.allchar based
445		// on option.
446	386	continue;
447		}
448	386	if (options::stringRegExpInterMode()
449	193	== options::RegExpInterMode::ONE_CONSTANT)
450		{
451		if (!mi.isNull() && rcti >= RE_C_CONSTANT && rct >= RE_C_CONSTANT)
452		{
453		// if both have re.allchar, do not do intersection if the
454		// options::RegExpInterMode::ONE_CONSTANT option is set.
455		continue;
456		}
457		}
458	331	if (mi.isNull())
459		{
460		// first regular expression seen
461	138	mi = m;
462	138	rcti = rct;
463	138	continue;
464		}
465	62	Node resR = d_regexp_opr.intersect(mi[1], m[1]);
466		// intersection should be computable
467	55	Assert(!resR.isNull());
468	55	if (resR == d_emptyRegexp)
469		{
470		// conflict, explain
471	24	std::vector<Node> vec_nodes;
472	12	vec_nodes.push_back(mi);
473	12	vec_nodes.push_back(m);
474	12	if (mi[0] != m[0])
475		{
476	8	vec_nodes.push_back(mi[0].eqNode(m[0]));
477		}
478	24	Node conc;
479	12	d_im.sendInference(
480		vec_nodes, conc, InferenceId::STRINGS_RE_INTER_CONF, false, true);
481		// conflict, return
482	12	return false;
483		}
484		// rewrite to ensure the equality checks below are precise
485	50	Node mres = nm->mkNode(STRING_IN_REGEXP, mi[0], resR);
486	50	Node mresr = Rewriter::rewrite(mres);
487	43	if (mresr == mi)
488		{
489		// if R1 = intersect( R1, R2 ), then x in R1 ^ x in R2 is equivalent
490		// to x in R1, hence x in R2 can be marked redundant.
491		d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME);
492		}
493	43	else if (mresr == m)
494		{
495		// same as above, opposite direction
496	7	d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME);
497		}
498		else
499		{
500		// new conclusion
501		// (x in R1 ^ y in R2 ^ x = y) => (x in intersect(R1,R2))
502	72	std::vector<Node> vec_nodes;
503	36	vec_nodes.push_back(mi);
504	36	vec_nodes.push_back(m);
505	36	if (mi[0] != m[0])
506		{
507		vec_nodes.push_back(mi[0].eqNode(m[0]));
508		}
509	36	d_im.sendInference(
510		vec_nodes, mres, InferenceId::STRINGS_RE_INTER_INFER, false, true);
511		// both are reduced
512	36	d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER);
513	36	d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER);
514		// do not send more than one lemma for this class
515	36	return true;
516		}
517		}
518	652	return true;
519		}
520
521	291	bool RegExpSolver::checkPDerivative(
522		Node x, Node r, Node atom, bool& addedLemma, std::vector<Node>& nf_exp)
523		{
524	291	if (d_state.areEqual(x, d_emptyString))
525		{
526	4	Node exp;
527	4	switch (d_regexp_opr.delta(r, exp))
528		{
529		case 0:
530		{
531		std::vector<Node> noExplain;
532		noExplain.push_back(atom);
533		noExplain.push_back(x.eqNode(d_emptyString));
534		std::vector<Node> iexp = nf_exp;
535		iexp.insert(iexp.end(), noExplain.begin(), noExplain.end());
536		d_im.sendInference(iexp, noExplain, exp, InferenceId::STRINGS_RE_DELTA);
537		addedLemma = true;
538		d_regexp_ccached.insert(atom);
539		return false;
540		}
541		case 1:
542		{
543		d_regexp_ccached.insert(atom);
544		break;
545		}
546	4	case 2:
547		{
548	8	std::vector<Node> noExplain;
549	4	noExplain.push_back(atom);
550	4	noExplain.push_back(x.eqNode(d_emptyString));
551	8	std::vector<Node> iexp = nf_exp;
552	4	iexp.insert(iexp.end(), noExplain.begin(), noExplain.end());
553	4	d_im.sendInference(iexp, noExplain, d_false, InferenceId::STRINGS_RE_DELTA_CONF);
554	4	addedLemma = true;
555	4	d_regexp_ccached.insert(atom);
556	4	return false;
557		}
558		default:
559		// Impossible
560		break;
561		}
562		}
563		else
564		{
565	287	if (deriveRegExp(x, r, atom, nf_exp))
566		{
567	4	addedLemma = true;
568	4	d_regexp_ccached.insert(atom);
569	4	return false;
570		}
571		}
572	283	return true;
573		}
574
575	287	cvc5::String RegExpSolver::getHeadConst(Node x)
576		{
577	287	if (x.isConst())
578		{
579	2	return x.getConst<String>();
580		}
581	285	else if (x.getKind() == STRING_CONCAT)
582		{
583	39	if (x[0].isConst())
584		{
585	7	return x[0].getConst<String>();
586		}
587		}
588	278	return d_emptyString.getConst<String>();
589		}
590
591	287	bool RegExpSolver::deriveRegExp(Node x,
592		Node r,
593		Node atom,
594		std::vector<Node>& ant)
595		{
596	287	Assert(x != d_emptyString);
597	574	Trace("regexp-derive") << "RegExpSolver::deriveRegExp: x=" << x
598	287	<< ", r= " << r << std::endl;
599	574	cvc5::String s = getHeadConst(x);
600		// only allow RE_DERIVE for concrete constant regular expressions
601	287	if (!s.empty() && d_regexp_opr.getRegExpConstType(r) == RE_C_CONRETE_CONSTANT)
602		{
603	8	Node conc = Node::null();
604	8	Node dc = r;
605	4	bool flag = true;
606	4	for (unsigned i = 0; i < s.size(); ++i)
607		{
608	4	cvc5::String c = s.substr(i, 1);
609	4	Node dc2;
610	4	int rt = d_regexp_opr.derivativeS(dc, c, dc2);
611	4	dc = dc2;
612	4	if (rt == 2)
613		{
614		// CONFLICT
615	4	flag = false;
616	4	break;
617		}
618		}
619		// send lemma
620	4	if (flag)
621		{
622		if (x.isConst())
623		{
624		Assert(false)
625		<< "Impossible: RegExpSolver::deriveRegExp: const string in const "
626		"regular expression.";
627		return false;
628		}
629		else
630		{
631		Assert(x.getKind() == STRING_CONCAT);
632		std::vector<Node> vec_nodes;
633		for (unsigned int i = 1; i < x.getNumChildren(); ++i)
634		{
635		vec_nodes.push_back(x[i]);
636		}
637		Node left = utils::mkConcat(vec_nodes, x.getType());
638		left = Rewriter::rewrite(left);
639		conc = NodeManager::currentNM()->mkNode(STRING_IN_REGEXP, left, dc);
640		}
641		}
642	8	std::vector<Node> iexp = ant;
643	8	std::vector<Node> noExplain;
644	4	noExplain.push_back(atom);
645	4	iexp.push_back(atom);
646	4	d_im.sendInference(iexp, noExplain, conc, InferenceId::STRINGS_RE_DERIVE);
647	4	return true;
648		}
649	283	return false;
650		}
651
652	499	Node RegExpSolver::getNormalSymRegExp(Node r, std::vector<Node>& nf_exp)
653		{
654	499	Node ret = r;
655	499	switch (r.getKind())
656		{
657	72	case REGEXP_EMPTY:
658		case REGEXP_SIGMA:
659	72	case REGEXP_RANGE: break;
660	182	case STRING_TO_REGEXP:
661		{
662	182	if (!r[0].isConst())
663		{
664	188	Node tmp = d_csolver.getNormalString(r[0], nf_exp);
665	94	if (tmp != r[0])
666		{
667	84	ret = NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, tmp);
668		}
669		}
670	182	break;
671		}
672	242	case REGEXP_CONCAT:
673		case REGEXP_UNION:
674		case REGEXP_INTER:
675		case REGEXP_STAR:
676		case REGEXP_COMPLEMENT:
677		{
678	484	std::vector<Node> vec_nodes;
679	648	for (const Node& cr : r)
680		{
681	406	vec_nodes.push_back(getNormalSymRegExp(cr, nf_exp));
682		}
683	242	ret = Rewriter::rewrite(
684	484	NodeManager::currentNM()->mkNode(r.getKind(), vec_nodes));
685	242	break;
686		}
687	3	default:
688		{
689	6	Trace("strings-error") << "Unsupported term: " << r
690	3	<< " in normalization SymRegExp." << std::endl;
691	3	Assert(!utils::isRegExpKind(r.getKind()));
692		}
693		}
694	499	return ret;
695		}
696
697		} // namespace strings
698		} // namespace theory
699	29902	} // namespace cvc5