Head

GCC Code Coverage Report

Directory:	.		Exec	Total	Coverage
File:	src/theory/strings/regexp_solver.cpp	Lines:	295	331	89.1 %
Date:	2021-11-06	Branches:	589	1219	48.3 %


/******************************************************************************
 * Top contributors (to current version):
 *   Andrew Reynolds, Andres Noetzli, Tianyi Liang
 *
 * This file is part of the cvc5 project.
 *
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
 * in the top-level source directory and their institutional affiliations.
 * All rights reserved.  See the file COPYING in the top-level source
 * directory for licensing information.
 * ****************************************************************************
 *
 * Implementation of the regular expression solver for the theory of strings.
 */

#include "theory/strings/regexp_solver.h"

#include <cmath>

#include "options/strings_options.h"
#include "smt/logic_exception.h"
#include "theory/ext_theory.h"
#include "theory/strings/term_registry.h"
#include "theory/strings/theory_strings_utils.h"
#include "theory/theory_model.h"
#include "util/statistics_value.h"

using namespace std;
using namespace cvc5::context;
using namespace cvc5::kind;

namespace cvc5 {
namespace theory {
namespace strings {

RegExpSolver::RegExpSolver(Env& env,
                           SolverState& s,
                           InferenceManager& im,
                           TermRegistry& tr,
                           CoreSolver& cs,
                           ExtfSolver& es,
                           SequencesStatistics& stats)
    : EnvObj(env),
      d_state(s),
      d_im(im),
      d_csolver(cs),
      d_esolver(es),
      d_statistics(stats),
      d_regexp_ucached(userContext()),
      d_regexp_ccached(context()),
      d_processed_memberships(context()),
      d_regexp_opr(env, tr.getSkolemCache())
{
  d_emptyString = NodeManager::currentNM()->mkConst(::cvc5::String(""));
  d_emptyRegexp = NodeManager::currentNM()->mkNode(REGEXP_EMPTY);
  d_true = NodeManager::currentNM()->mkConst(true);
  d_false = NodeManager::currentNM()->mkConst(false);
}

Node RegExpSolver::mkAnd(Node c1, Node c2)
{
  return NodeManager::currentNM()->mkNode(AND, c1, c2);
}

void RegExpSolver::checkMemberships()
{
  // add the memberships
  std::vector<Node> mems = d_esolver.getActive(STRING_IN_REGEXP);
  // maps representatives to regular expression memberships in that class
  std::map<Node, std::vector<Node> > assertedMems;
  const std::map<Node, ExtfInfoTmp>& einfo = d_esolver.getInfo();
  std::map<Node, ExtfInfoTmp>::const_iterator it;
  for (unsigned i = 0; i < mems.size(); i++)
  {
    Node n = mems[i];
    Assert(n.getKind() == STRING_IN_REGEXP);
    it = einfo.find(n);
    Assert(it != einfo.end());
    if (!it->second.d_const.isNull())
    {
      bool pol = it->second.d_const.getConst<bool>();
      Trace("strings-process-debug")
          << "  add membership : " << n << ", pol = " << pol << std::endl;
      Node r = d_state.getRepresentative(n[0]);
      assertedMems[r].push_back(pol ? n : n.negate());
    }
    else
    {
      Trace("strings-process-debug")
          << "  irrelevant (non-asserted) membership : " << n << std::endl;
    }
  }
  check(assertedMems);
}

void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
{
  bool addedLemma = false;
  bool changed = false;
  std::vector<Node> processed;

  Trace("regexp-process") << "Checking Memberships ... " << std::endl;
  for (const std::pair<const Node, std::vector<Node> >& mr : mems)
  {
    std::vector<Node> mems2 = mr.second;
    Trace("regexp-process")
        << "Memberships(" << mr.first << ") = " << mr.second << std::endl;
    if (!checkEqcInclusion(mems2))
    {
      // conflict discovered, return
      return;
    }
    if (!checkEqcIntersect(mems2))
    {
      // conflict discovered, return
      return;
    }
  }

  Trace("regexp-debug")
      << "... No Intersect Conflict in Memberships, addedLemma: " << addedLemma
      << std::endl;
  if (!addedLemma)
  {
    // get all memberships
    std::map<Node, Node> allMems;
    for (const std::pair<const Node, std::vector<Node> >& mr : mems)
    {
      for (const Node& m : mr.second)
      {
        allMems[m] = mr.first;
      }
    }

    NodeManager* nm = NodeManager::currentNM();
    // representatives of strings that are the LHS of positive memberships that
    // we unfolded
    std::unordered_set<Node> repUnfold;
    // check positive (e=0), then negative (e=1) memberships
    for (unsigned e = 0; e < 2; e++)
    {
      for (const std::pair<const Node, Node>& mp : allMems)
      {
        Node assertion = mp.first;
        Node rep = mp.second;
        // check regular expression membership
        Trace("regexp-debug")
            << "Check : " << assertion << " "
            << (d_regexp_ucached.find(assertion) == d_regexp_ucached.end())
            << " "
            << (d_regexp_ccached.find(assertion) == d_regexp_ccached.end())
            << std::endl;
        if (d_regexp_ucached.find(assertion) != d_regexp_ucached.end()
            || d_regexp_ccached.find(assertion) != d_regexp_ccached.end())
        {
          continue;
        }
        Trace("strings-regexp")
            << "We have regular expression assertion : " << assertion
            << std::endl;
        Node atom = assertion.getKind() == NOT ? assertion[0] : assertion;
        Assert(atom == rewrite(atom));
        bool polarity = assertion.getKind() != NOT;
        if (polarity != (e == 0))
        {
          continue;
        }
        bool flag = true;
        Node x = atom[0];
        Node r = atom[1];
        Assert(rep == d_state.getRepresentative(x));
        // The following code takes normal forms into account for the purposes
        // of simplifying a regular expression membership x in R. For example,
        // if x = "A" in the current context, then we may be interested in
        // reasoning about ( x in R ) * { x -> "A" }. Say we update the
        // membership to nx in R', then:
        // - nfexp => ( x in R ) <=> nx in R'
        // - rnfexp => R = R'
        // We use these explanations below as assumptions on inferences when
        // appropriate. Notice that for inferring conflicts and tautologies,
        // we use the normal form of x always. This is because we always want to
        // discover conflicts/tautologies whenever possible.
        // For inferences based on regular expression unfolding, we do not use
        // the normal form of x. The reason is that it is better to unfold
        // regular expression memberships in a context-indepedent manner,
        // that is, not taking into account the current normal form of x, since
        // this ensures these lemmas are still relevant after backtracking.
        std::vector<Node> nfexp;
        std::vector<Node> rnfexp;
        // The normal form of x is stored in nx, while x is left unchanged.
        Node nx = x;
        if (!x.isConst())
        {
          nx = d_csolver.getNormalString(x, nfexp);
        }
        // If r is not a constant regular expression, we update it based on
        // normal forms, which may concretize its variables.
        if (!d_regexp_opr.checkConstRegExp(r))
        {
          r = getNormalSymRegExp(r, rnfexp);
          nfexp.insert(nfexp.end(), rnfexp.begin(), rnfexp.end());
          changed = true;
        }
        Trace("strings-regexp-nf") << "Term " << atom << " is normalized to "
                                   << nx << " IN " << r << std::endl;
        if (nx != x || changed)
        {
          // We rewrite the membership nx IN r.
          Node tmp = rewrite(nm->mkNode(STRING_IN_REGEXP, nx, r));
          Trace("strings-regexp-nf") << "Simplifies to " << tmp << std::endl;
          if (tmp.isConst())
          {
            if (tmp.getConst<bool>() == polarity)
            {
              // it is satisfied in this SAT context
              d_regexp_ccached.insert(assertion);
              continue;
            }
            else
            {
              // we have a conflict
              std::vector<Node> iexp = nfexp;
              std::vector<Node> noExplain;
              iexp.push_back(assertion);
              noExplain.push_back(assertion);
              Node conc = Node::null();
              d_im.sendInference(
                  iexp, noExplain, conc, InferenceId::STRINGS_RE_NF_CONFLICT);
              addedLemma = true;
              break;
            }
          }
        }
        if (e == 1 && repUnfold.find(rep) != repUnfold.end())
        {
          // do not unfold negative memberships of strings that have new
          // positive unfoldings. For example:
          //   x in ("A")* ^ NOT x in ("B")*
          // We unfold x = "A" ++ x' only. The intution here is that positive
          // unfoldings lead to stronger constraints (equalities are stronger
          // than disequalities), and are easier to check.
          continue;
        }
        if (polarity)
        {
          flag = checkPDerivative(x, r, atom, addedLemma, rnfexp);
        }
        else
        {
          if (!options::stringExp())
          {
            throw LogicException(
                "Strings Incomplete (due to Negative Membership) by default, "
                "try --strings-exp option.");
          }
        }
        if (flag)
        {
          // check if the term is atomic
          Trace("strings-regexp")
              << "Unroll/simplify membership of atomic term " << rep
              << std::endl;
          // if so, do simple unrolling
          Trace("strings-regexp") << "Simplify on " << atom << std::endl;
          Node conc = d_regexp_opr.simplify(atom, polarity);
          Trace("strings-regexp") << "...finished, got " << conc << std::endl;
          // if simplifying successfully generated a lemma
          if (!conc.isNull())
          {
            std::vector<Node> iexp;
            std::vector<Node> noExplain;
            iexp.push_back(assertion);
            noExplain.push_back(assertion);
            Assert(atom.getKind() == STRING_IN_REGEXP);
            if (polarity)
            {
              d_statistics.d_regexpUnfoldingsPos << atom[1].getKind();
            }
            else
            {
              d_statistics.d_regexpUnfoldingsNeg << atom[1].getKind();
            }
            InferenceId inf =
                polarity ? InferenceId::STRINGS_RE_UNFOLD_POS : InferenceId::STRINGS_RE_UNFOLD_NEG;
            // in very rare cases, we may find out that the unfolding lemma
            // for a membership is equivalent to true, in spite of the RE
            // not being rewritten to true.
            if (d_im.sendInference(iexp, noExplain, conc, inf))
            {
              addedLemma = true;
              if (e == 0)
              {
                // Remember that we have unfolded a membership for x
                // notice that we only do this here, after we have definitely
                // added a lemma.
                repUnfold.insert(rep);
              }
            }
            processed.push_back(assertion);
          }
          else
          {
            // otherwise we are incomplete
            d_im.setIncomplete(IncompleteId::STRINGS_REGEXP_NO_SIMPLIFY);
          }
        }
        if (d_state.isInConflict())
        {
          break;
        }
      }
    }
  }
  if (addedLemma)
  {
    if (!d_state.isInConflict())
    {
      for (unsigned i = 0; i < processed.size(); i++)
      {
        Trace("strings-regexp")
            << "...add " << processed[i] << " to u-cache." << std::endl;
        d_regexp_ucached.insert(processed[i]);
      }
    }
  }
}

bool RegExpSolver::checkEqcInclusion(std::vector<Node>& mems)
{
  std::unordered_set<Node> remove;

  for (const Node& m1 : mems)
  {
    bool m1Neg = m1.getKind() == NOT;
    Node m1Lit = m1Neg ? m1[0] : m1;

    if (remove.find(m1) != remove.end())
    {
      // Skip memberships marked for removal
      continue;
    }

    for (const Node& m2 : mems)
    {
      if (m1 == m2)
      {
        continue;
      }

      bool m2Neg = m2.getKind() == NOT;
      Node m2Lit = m2Neg ? m2[0] : m2;

      // Both regular expression memberships have the same polarity
      if (m1Neg == m2Neg)
      {
        if (d_regexp_opr.regExpIncludes(m1Lit[1], m2Lit[1]))
        {
          if (m1Neg)
          {
            // ~str.in.re(x, R1) includes ~str.in.re(x, R2) --->
            //   mark ~str.in.re(x, R2) as reduced
            d_im.markReduced(m2Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE_NEG);
            remove.insert(m2);
          }
          else
          {
            // str.in.re(x, R1) includes str.in.re(x, R2) --->
            //   mark str.in.re(x, R1) as reduced
            d_im.markReduced(m1Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE);
            remove.insert(m1);

            // We don't need to process m1 anymore
            break;
          }
        }
      }
      else
      {
        Node pos = m1Neg ? m2Lit : m1Lit;
        Node neg = m1Neg ? m1Lit : m2Lit;
        if (d_regexp_opr.regExpIncludes(neg[1], pos[1]))
        {
          // We have a conflict because we have a case where str.in.re(x, R1)
          // and ~str.in.re(x, R2) but R2 includes R1, so there is no
          // possible value for x that satisfies both memberships.
          std::vector<Node> vec_nodes;
          vec_nodes.push_back(pos);
          vec_nodes.push_back(neg.negate());

          if (pos[0] != neg[0])
          {
            vec_nodes.push_back(pos[0].eqNode(neg[0]));
          }

          Node conc;
          d_im.sendInference(
              vec_nodes, conc, InferenceId::STRINGS_RE_INTER_INCLUDE, false, true);
          return false;
        }
      }
    }
  }

  mems.erase(std::remove_if(
                 mems.begin(),
                 mems.end(),
                 [&remove](Node& n) { return remove.find(n) != remove.end(); }),
             mems.end());

  return true;
}

bool RegExpSolver::checkEqcIntersect(const std::vector<Node>& mems)
{
  // do not compute intersections if the re intersection mode is none
  if (options::stringRegExpInterMode() == options::RegExpInterMode::NONE)
  {
    return true;
  }
  if (mems.empty())
  {
    // nothing to do
    return true;
  }
  // the initial regular expression membership and its constant type
  Node mi;
  RegExpConstType rcti = RE_C_UNKNOWN;
  NodeManager* nm = NodeManager::currentNM();
  for (const Node& m : mems)
  {
    if (m.getKind() != STRING_IN_REGEXP)
    {
      // do not do negative
      Assert(m.getKind() == NOT && m[0].getKind() == STRING_IN_REGEXP);
      continue;
    }
    RegExpConstType rct = d_regexp_opr.getRegExpConstType(m[1]);
    if (rct == RE_C_VARIABLE
        || (options::stringRegExpInterMode()
                == options::RegExpInterMode::CONSTANT
            && rct != RE_C_CONRETE_CONSTANT))
    {
      // cannot do intersection on RE with variables, or with re.allchar based
      // on option.
      continue;
    }
    if (options::stringRegExpInterMode()
        == options::RegExpInterMode::ONE_CONSTANT)
    {
      if (!mi.isNull() && rcti >= RE_C_CONSTANT && rct >= RE_C_CONSTANT)
      {
        // if both have re.allchar, do not do intersection if the
        // options::RegExpInterMode::ONE_CONSTANT option is set.
        continue;
      }
    }
    if (mi.isNull())
    {
      // first regular expression seen
      mi = m;
      rcti = rct;
      continue;
    }
    Node resR = d_regexp_opr.intersect(mi[1], m[1]);
    // intersection should be computable
    Assert(!resR.isNull());
    if (resR == d_emptyRegexp)
    {
      // conflict, explain
      std::vector<Node> vec_nodes;
      vec_nodes.push_back(mi);
      vec_nodes.push_back(m);
      if (mi[0] != m[0])
      {
        vec_nodes.push_back(mi[0].eqNode(m[0]));
      }
      Node conc;
      d_im.sendInference(
          vec_nodes, conc, InferenceId::STRINGS_RE_INTER_CONF, false, true);
      // conflict, return
      return false;
    }
    // rewrite to ensure the equality checks below are precise
    Node mres = nm->mkNode(STRING_IN_REGEXP, mi[0], resR);
    Node mresr = rewrite(mres);
    if (mresr == mi)
    {
      // if R1 = intersect( R1, R2 ), then x in R1 ^ x in R2 is equivalent
      // to x in R1, hence x in R2 can be marked redundant.
      d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME);
    }
    else if (mresr == m)
    {
      // same as above, opposite direction
      d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME);
    }
    else
    {
      // new conclusion
      // (x in R1 ^ y in R2 ^ x = y) => (x in intersect(R1,R2))
      std::vector<Node> vec_nodes;
      vec_nodes.push_back(mi);
      vec_nodes.push_back(m);
      if (mi[0] != m[0])
      {
        vec_nodes.push_back(mi[0].eqNode(m[0]));
      }
      d_im.sendInference(
          vec_nodes, mres, InferenceId::STRINGS_RE_INTER_INFER, false, true);
      // both are reduced
      d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER);
      d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER);
      // do not send more than one lemma for this class
      return true;
    }
  }
  return true;
}

bool RegExpSolver::checkPDerivative(
    Node x, Node r, Node atom, bool& addedLemma, std::vector<Node>& nf_exp)
{
  if (d_state.areEqual(x, d_emptyString))
  {
    Node exp;
    switch (d_regexp_opr.delta(r, exp))
    {
      case 0:
      {
        std::vector<Node> noExplain;
        noExplain.push_back(atom);
        noExplain.push_back(x.eqNode(d_emptyString));
        std::vector<Node> iexp = nf_exp;
        iexp.insert(iexp.end(), noExplain.begin(), noExplain.end());
        d_im.sendInference(iexp, noExplain, exp, InferenceId::STRINGS_RE_DELTA);
        addedLemma = true;
        d_regexp_ccached.insert(atom);
        return false;
      }
      case 1:
      {
        d_regexp_ccached.insert(atom);
        break;
      }
      case 2:
      {
        std::vector<Node> noExplain;
        noExplain.push_back(atom);
        if (x != d_emptyString)
        {
          noExplain.push_back(x.eqNode(d_emptyString));
        }
        std::vector<Node> iexp = nf_exp;
        iexp.insert(iexp.end(), noExplain.begin(), noExplain.end());
        d_im.sendInference(iexp, noExplain, d_false, InferenceId::STRINGS_RE_DELTA_CONF);
        addedLemma = true;
        d_regexp_ccached.insert(atom);
        return false;
      }
      default:
        // Impossible
        break;
    }
  }
  else
  {
    if (deriveRegExp(x, r, atom, nf_exp))
    {
      addedLemma = true;
      d_regexp_ccached.insert(atom);
      return false;
    }
  }
  return true;
}

cvc5::String RegExpSolver::getHeadConst(Node x)
{
  if (x.isConst())
  {
    return x.getConst<String>();
  }
  else if (x.getKind() == STRING_CONCAT)
  {
    if (x[0].isConst())
    {
      return x[0].getConst<String>();
    }
  }
  return d_emptyString.getConst<String>();
}

bool RegExpSolver::deriveRegExp(Node x,
                                Node r,
                                Node atom,
                                std::vector<Node>& ant)
{
  Assert(x != d_emptyString);
  Trace("regexp-derive") << "RegExpSolver::deriveRegExp: x=" << x
                         << ", r= " << r << std::endl;
  cvc5::String s = getHeadConst(x);
  // only allow RE_DERIVE for concrete constant regular expressions
  if (!s.empty() && d_regexp_opr.getRegExpConstType(r) == RE_C_CONRETE_CONSTANT)
  {
    Node conc = Node::null();
    Node dc = r;
    bool flag = true;
    for (unsigned i = 0; i < s.size(); ++i)
    {
      cvc5::String c = s.substr(i, 1);
      Node dc2;
      int rt = d_regexp_opr.derivativeS(dc, c, dc2);
      dc = dc2;
      if (rt == 2)
      {
        // CONFLICT
        flag = false;
        break;
      }
    }
    // send lemma
    if (flag)
    {
      if (x.isConst())
      {
        Assert(false)
            << "Impossible: RegExpSolver::deriveRegExp: const string in const "
               "regular expression.";
        return false;
      }
      else
      {
        Assert(x.getKind() == STRING_CONCAT);
        std::vector<Node> vec_nodes;
        for (unsigned int i = 1; i < x.getNumChildren(); ++i)
        {
          vec_nodes.push_back(x[i]);
        }
        Node left = utils::mkConcat(vec_nodes, x.getType());
        left = rewrite(left);
        conc = NodeManager::currentNM()->mkNode(STRING_IN_REGEXP, left, dc);
      }
    }
    std::vector<Node> iexp = ant;
    std::vector<Node> noExplain;
    noExplain.push_back(atom);
    iexp.push_back(atom);
    d_im.sendInference(iexp, noExplain, conc, InferenceId::STRINGS_RE_DERIVE);
    return true;
  }
  return false;
}

Node RegExpSolver::getNormalSymRegExp(Node r, std::vector<Node>& nf_exp)
{
  Node ret = r;
  switch (r.getKind())
  {
    case REGEXP_EMPTY:
    case REGEXP_SIGMA:
    case REGEXP_RANGE: break;
    case STRING_TO_REGEXP:
    {
      if (!r[0].isConst())
      {
        Node tmp = d_csolver.getNormalString(r[0], nf_exp);
        if (tmp != r[0])
        {
          ret = NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, tmp);
        }
      }
      break;
    }
    case REGEXP_CONCAT:
    case REGEXP_UNION:
    case REGEXP_INTER:
    case REGEXP_STAR:
    case REGEXP_COMPLEMENT:
    {
      std::vector<Node> vec_nodes;
      for (const Node& cr : r)
      {
        vec_nodes.push_back(getNormalSymRegExp(cr, nf_exp));
      }
      ret = rewrite(NodeManager::currentNM()->mkNode(r.getKind(), vec_nodes));
      break;
    }
    default:
    {
      Trace("strings-error") << "Unsupported term: " << r
                             << " in normalization SymRegExp." << std::endl;
      Assert(!utils::isRegExpKind(r.getKind()));
    }
  }
  return ret;
}

}  // namespace strings
}  // namespace theory
}  // namespace cvc5


Generated by: GCOVR (Version 3.2)

Line	Exec	Source
1		/******************************************************************************
2		* Top contributors (to current version):
3		* Andrew Reynolds, Andres Noetzli, Tianyi Liang
4		*
5		* This file is part of the cvc5 project.
6		*
7		* Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8		* in the top-level source directory and their institutional affiliations.
9		* All rights reserved. See the file COPYING in the top-level source
10		* directory for licensing information.
11		* ****************************************************************************
12		*
13		* Implementation of the regular expression solver for the theory of strings.
14		*/
15
16		#include "theory/strings/regexp_solver.h"
17
18		#include <cmath>
19
20		#include "options/strings_options.h"
21		#include "smt/logic_exception.h"
22		#include "theory/ext_theory.h"
23		#include "theory/strings/term_registry.h"
24		#include "theory/strings/theory_strings_utils.h"
25		#include "theory/theory_model.h"
26		#include "util/statistics_value.h"
27
28		using namespace std;
29		using namespace cvc5::context;
30		using namespace cvc5::kind;
31
32		namespace cvc5 {
33		namespace theory {
34		namespace strings {
35
36	15272	RegExpSolver::RegExpSolver(Env& env,
37		SolverState& s,
38		InferenceManager& im,
39		TermRegistry& tr,
40		CoreSolver& cs,
41		ExtfSolver& es,
42	15272	SequencesStatistics& stats)
43		: EnvObj(env),
44		d_state(s),
45		d_im(im),
46		d_csolver(cs),
47		d_esolver(es),
48		d_statistics(stats),
49	15272	d_regexp_ucached(userContext()),
50		d_regexp_ccached(context()),
51		d_processed_memberships(context()),
52	30544	d_regexp_opr(env, tr.getSkolemCache())
53		{
54	15272	d_emptyString = NodeManager::currentNM()->mkConst(::cvc5::String(""));
55	15272	d_emptyRegexp = NodeManager::currentNM()->mkNode(REGEXP_EMPTY);
56	15272	d_true = NodeManager::currentNM()->mkConst(true);
57	15272	d_false = NodeManager::currentNM()->mkConst(false);
58	15272	}
59
60		Node RegExpSolver::mkAnd(Node c1, Node c2)
61		{
62		return NodeManager::currentNM()->mkNode(AND, c1, c2);
63		}
64
65	10650	void RegExpSolver::checkMemberships()
66		{
67		// add the memberships
68	21300	std::vector<Node> mems = d_esolver.getActive(STRING_IN_REGEXP);
69		// maps representatives to regular expression memberships in that class
70	21300	std::map<Node, std::vector<Node> > assertedMems;
71	10650	const std::map<Node, ExtfInfoTmp>& einfo = d_esolver.getInfo();
72	10650	std::map<Node, ExtfInfoTmp>::const_iterator it;
73	12185	for (unsigned i = 0; i < mems.size(); i++)
74		{
75	3070	Node n = mems[i];
76	1535	Assert(n.getKind() == STRING_IN_REGEXP);
77	1535	it = einfo.find(n);
78	1535	Assert(it != einfo.end());
79	1535	if (!it->second.d_const.isNull())
80		{
81	1503	bool pol = it->second.d_const.getConst<bool>();
82	3006	Trace("strings-process-debug")
83	1503	<< " add membership : " << n << ", pol = " << pol << std::endl;
84	3006	Node r = d_state.getRepresentative(n[0]);
85	1503	assertedMems[r].push_back(pol ? n : n.negate());
86		}
87		else
88		{
89	64	Trace("strings-process-debug")
90	32	<< " irrelevant (non-asserted) membership : " << n << std::endl;
91		}
92		}
93	10650	check(assertedMems);
94	10650	}
95
96	10650	void RegExpSolver::check(const std::map<Node, std::vector<Node> >& mems)
97		{
98	10650	bool addedLemma = false;
99	10650	bool changed = false;
100	21278	std::vector<Node> processed;
101
102	10650	Trace("regexp-process") << "Checking Memberships ... " << std::endl;
103	11653	for (const std::pair<const Node, std::vector<Node> >& mr : mems)
104		{
105	2028	std::vector<Node> mems2 = mr.second;
106	2050	Trace("regexp-process")
107	1025	<< "Memberships(" << mr.first << ") = " << mr.second << std::endl;
108	1025	if (!checkEqcInclusion(mems2))
109		{
110		// conflict discovered, return
111	10	return;
112		}
113	1015	if (!checkEqcIntersect(mems2))
114		{
115		// conflict discovered, return
116	12	return;
117		}
118		}
119
120	21256	Trace("regexp-debug")
121	10628	<< "... No Intersect Conflict in Memberships, addedLemma: " << addedLemma
122	10628	<< std::endl;
123	10628	if (!addedLemma)
124		{
125		// get all memberships
126	21256	std::map<Node, Node> allMems;
127	11631	for (const std::pair<const Node, std::vector<Node> >& mr : mems)
128		{
129	2447	for (const Node& m : mr.second)
130		{
131	1444	allMems[m] = mr.first;
132		}
133		}
134
135	10628	NodeManager* nm = NodeManager::currentNM();
136		// representatives of strings that are the LHS of positive memberships that
137		// we unfolded
138	21256	std::unordered_set<Node> repUnfold;
139		// check positive (e=0), then negative (e=1) memberships
140	31884	for (unsigned e = 0; e < 2; e++)
141		{
142	24066	for (const std::pair<const Node, Node>& mp : allMems)
143		{
144	3450	Node assertion = mp.first;
145	3450	Node rep = mp.second;
146		// check regular expression membership
147	5740	Trace("regexp-debug")
148	2870	<< "Check : " << assertion << " "
149	5740	<< (d_regexp_ucached.find(assertion) == d_regexp_ucached.end())
150	2870	<< " "
151	5740	<< (d_regexp_ccached.find(assertion) == d_regexp_ccached.end())
152	2870	<< std::endl;
153	5740	if (d_regexp_ucached.find(assertion) != d_regexp_ucached.end()
154	2870	\|\| d_regexp_ccached.find(assertion) != d_regexp_ccached.end())
155		{
156	1470	continue;
157		}
158	2800	Trace("strings-regexp")
159	1400	<< "We have regular expression assertion : " << assertion
160	1400	<< std::endl;
161	1980	Node atom = assertion.getKind() == NOT ? assertion[0] : assertion;
162	1400	Assert(atom == rewrite(atom));
163	1400	bool polarity = assertion.getKind() != NOT;
164	1400	if (polarity != (e == 0))
165		{
166	686	continue;
167		}
168	714	bool flag = true;
169	1294	Node x = atom[0];
170	1294	Node r = atom[1];
171	714	Assert(rep == d_state.getRepresentative(x));
172		// The following code takes normal forms into account for the purposes
173		// of simplifying a regular expression membership x in R. For example,
174		// if x = "A" in the current context, then we may be interested in
175		// reasoning about ( x in R ) * { x -> "A" }. Say we update the
176		// membership to nx in R', then:
177		// - nfexp => ( x in R ) <=> nx in R'
178		// - rnfexp => R = R'
179		// We use these explanations below as assumptions on inferences when
180		// appropriate. Notice that for inferring conflicts and tautologies,
181		// we use the normal form of x always. This is because we always want to
182		// discover conflicts/tautologies whenever possible.
183		// For inferences based on regular expression unfolding, we do not use
184		// the normal form of x. The reason is that it is better to unfold
185		// regular expression memberships in a context-indepedent manner,
186		// that is, not taking into account the current normal form of x, since
187		// this ensures these lemmas are still relevant after backtracking.
188	1294	std::vector<Node> nfexp;
189	1294	std::vector<Node> rnfexp;
190		// The normal form of x is stored in nx, while x is left unchanged.
191	1294	Node nx = x;
192	714	if (!x.isConst())
193		{
194	664	nx = d_csolver.getNormalString(x, nfexp);
195		}
196		// If r is not a constant regular expression, we update it based on
197		// normal forms, which may concretize its variables.
198	714	if (!d_regexp_opr.checkConstRegExp(r))
199		{
200	142	r = getNormalSymRegExp(r, rnfexp);
201	142	nfexp.insert(nfexp.end(), rnfexp.begin(), rnfexp.end());
202	142	changed = true;
203		}
204	1428	Trace("strings-regexp-nf") << "Term " << atom << " is normalized to "
205	714	<< nx << " IN " << r << std::endl;
206	714	if (nx != x \|\| changed)
207		{
208		// We rewrite the membership nx IN r.
209	726	Node tmp = rewrite(nm->mkNode(STRING_IN_REGEXP, nx, r));
210	399	Trace("strings-regexp-nf") << "Simplifies to " << tmp << std::endl;
211	399	if (tmp.isConst())
212		{
213	84	if (tmp.getConst<bool>() == polarity)
214		{
215		// it is satisfied in this SAT context
216	12	d_regexp_ccached.insert(assertion);
217	12	continue;
218		}
219		else
220		{
221		// we have a conflict
222	120	std::vector<Node> iexp = nfexp;
223	120	std::vector<Node> noExplain;
224	60	iexp.push_back(assertion);
225	60	noExplain.push_back(assertion);
226	120	Node conc = Node::null();
227	60	d_im.sendInference(
228		iexp, noExplain, conc, InferenceId::STRINGS_RE_NF_CONFLICT);
229	60	addedLemma = true;
230	60	break;
231		}
232		}
233		}
234	642	if (e == 1 && repUnfold.find(rep) != repUnfold.end())
235		{
236		// do not unfold negative memberships of strings that have new
237		// positive unfoldings. For example:
238		// x in ("A")* ^ NOT x in ("B")*
239		// We unfold x = "A" ++ x' only. The intution here is that positive
240		// unfoldings lead to stronger constraints (equalities are stronger
241		// than disequalities), and are easier to check.
242	62	continue;
243		}
244	580	if (polarity)
245		{
246	436	flag = checkPDerivative(x, r, atom, addedLemma, rnfexp);
247		}
248		else
249		{
250	144	if (!options::stringExp())
251		{
252		throw LogicException(
253		"Strings Incomplete (due to Negative Membership) by default, "
254		"try --strings-exp option.");
255		}
256		}
257	580	if (flag)
258		{
259		// check if the term is atomic
260	1120	Trace("strings-regexp")
261	560	<< "Unroll/simplify membership of atomic term " << rep
262	560	<< std::endl;
263		// if so, do simple unrolling
264	560	Trace("strings-regexp") << "Simplify on " << atom << std::endl;
265	1120	Node conc = d_regexp_opr.simplify(atom, polarity);
266	560	Trace("strings-regexp") << "...finished, got " << conc << std::endl;
267		// if simplifying successfully generated a lemma
268	560	if (!conc.isNull())
269		{
270	1108	std::vector<Node> iexp;
271	1108	std::vector<Node> noExplain;
272	554	iexp.push_back(assertion);
273	554	noExplain.push_back(assertion);
274	554	Assert(atom.getKind() == STRING_IN_REGEXP);
275	554	if (polarity)
276		{
277	412	d_statistics.d_regexpUnfoldingsPos << atom[1].getKind();
278		}
279		else
280		{
281	142	d_statistics.d_regexpUnfoldingsNeg << atom[1].getKind();
282		}
283	554	InferenceId inf =
284	554	polarity ? InferenceId::STRINGS_RE_UNFOLD_POS : InferenceId::STRINGS_RE_UNFOLD_NEG;
285		// in very rare cases, we may find out that the unfolding lemma
286		// for a membership is equivalent to true, in spite of the RE
287		// not being rewritten to true.
288	554	if (d_im.sendInference(iexp, noExplain, conc, inf))
289		{
290	548	addedLemma = true;
291	548	if (e == 0)
292		{
293		// Remember that we have unfolded a membership for x
294		// notice that we only do this here, after we have definitely
295		// added a lemma.
296	410	repUnfold.insert(rep);
297		}
298		}
299	554	processed.push_back(assertion);
300		}
301		else
302		{
303		// otherwise we are incomplete
304	6	d_im.setIncomplete(IncompleteId::STRINGS_REGEXP_NO_SIMPLIFY);
305		}
306		}
307	580	if (d_state.isInConflict())
308		{
309		break;
310		}
311		}
312		}
313		}
314	10628	if (addedLemma)
315		{
316	412	if (!d_state.isInConflict())
317		{
318	962	for (unsigned i = 0; i < processed.size(); i++)
319		{
320	1100	Trace("strings-regexp")
321	550	<< "...add " << processed[i] << " to u-cache." << std::endl;
322	550	d_regexp_ucached.insert(processed[i]);
323		}
324		}
325		}
326		}
327
328	1025	bool RegExpSolver::checkEqcInclusion(std::vector<Node>& mems)
329		{
330	2050	std::unordered_set<Node> remove;
331
332	2500	for (const Node& m1 : mems)
333		{
334	1485	bool m1Neg = m1.getKind() == NOT;
335	2922	Node m1Lit = m1Neg ? m1[0] : m1;
336
337	1485	if (remove.find(m1) != remove.end())
338		{
339		// Skip memberships marked for removal
340	38	continue;
341		}
342
343	4112	for (const Node& m2 : mems)
344		{
345	2729	if (m1 == m2)
346		{
347	1419	continue;
348		}
349
350	1310	bool m2Neg = m2.getKind() == NOT;
351	2556	Node m2Lit = m2Neg ? m2[0] : m2;
352
353		// Both regular expression memberships have the same polarity
354	1310	if (m1Neg == m2Neg)
355		{
356	864	if (d_regexp_opr.regExpIncludes(m1Lit[1], m2Lit[1]))
357		{
358	94	if (m1Neg)
359		{
360		// ~str.in.re(x, R1) includes ~str.in.re(x, R2) --->
361		// mark ~str.in.re(x, R2) as reduced
362	40	d_im.markReduced(m2Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE_NEG);
363	40	remove.insert(m2);
364		}
365		else
366		{
367		// str.in.re(x, R1) includes str.in.re(x, R2) --->
368		// mark str.in.re(x, R1) as reduced
369	54	d_im.markReduced(m1Lit, ExtReducedId::STRINGS_REGEXP_INCLUDE);
370	54	remove.insert(m1);
371
372		// We don't need to process m1 anymore
373	54	break;
374		}
375		}
376		}
377		else
378		{
379	882	Node pos = m1Neg ? m2Lit : m1Lit;
380	882	Node neg = m1Neg ? m1Lit : m2Lit;
381	446	if (d_regexp_opr.regExpIncludes(neg[1], pos[1]))
382		{
383		// We have a conflict because we have a case where str.in.re(x, R1)
384		// and ~str.in.re(x, R2) but R2 includes R1, so there is no
385		// possible value for x that satisfies both memberships.
386	20	std::vector<Node> vec_nodes;
387	10	vec_nodes.push_back(pos);
388	10	vec_nodes.push_back(neg.negate());
389
390	10	if (pos[0] != neg[0])
391		{
392		vec_nodes.push_back(pos[0].eqNode(neg[0]));
393		}
394
395	20	Node conc;
396	10	d_im.sendInference(
397		vec_nodes, conc, InferenceId::STRINGS_RE_INTER_INCLUDE, false, true);
398	10	return false;
399		}
400		}
401		}
402		}
403
404	3045	mems.erase(std::remove_if(
405		mems.begin(),
406		mems.end(),
407	2486	[&remove](Node& n) { return remove.find(n) != remove.end(); }),
408	4060	mems.end());
409
410	1015	return true;
411		}
412
413	1015	bool RegExpSolver::checkEqcIntersect(const std::vector<Node>& mems)
414		{
415		// do not compute intersections if the re intersection mode is none
416	1015	if (options::stringRegExpInterMode() == options::RegExpInterMode::NONE)
417		{
418		return true;
419		}
420	1015	if (mems.empty())
421		{
422		// nothing to do
423	22	return true;
424		}
425		// the initial regular expression membership and its constant type
426	1986	Node mi;
427	993	RegExpConstType rcti = RE_C_UNKNOWN;
428	993	NodeManager* nm = NodeManager::currentNM();
429	2262	for (const Node& m : mems)
430		{
431	1835	if (m.getKind() != STRING_IN_REGEXP)
432		{
433		// do not do negative
434	506	Assert(m.getKind() == NOT && m[0].getKind() == STRING_IN_REGEXP);
435	1768	continue;
436		}
437	823	RegExpConstType rct = d_regexp_opr.getRegExpConstType(m[1]);
438	823	if (rct == RE_C_VARIABLE
439	1415	\|\| (options::stringRegExpInterMode()
440		== options::RegExpInterMode::CONSTANT
441	645	&& rct != RE_C_CONRETE_CONSTANT))
442		{
443		// cannot do intersection on RE with variables, or with re.allchar based
444		// on option.
445	592	continue;
446		}
447	462	if (options::stringRegExpInterMode()
448	231	== options::RegExpInterMode::ONE_CONSTANT)
449		{
450		if (!mi.isNull() && rcti >= RE_C_CONSTANT && rct >= RE_C_CONSTANT)
451		{
452		// if both have re.allchar, do not do intersection if the
453		// options::RegExpInterMode::ONE_CONSTANT option is set.
454		continue;
455		}
456		}
457	395	if (mi.isNull())
458		{
459		// first regular expression seen
460	164	mi = m;
461	164	rcti = rct;
462	164	continue;
463		}
464	74	Node resR = d_regexp_opr.intersect(mi[1], m[1]);
465		// intersection should be computable
466	67	Assert(!resR.isNull());
467	67	if (resR == d_emptyRegexp)
468		{
469		// conflict, explain
470	24	std::vector<Node> vec_nodes;
471	12	vec_nodes.push_back(mi);
472	12	vec_nodes.push_back(m);
473	12	if (mi[0] != m[0])
474		{
475	8	vec_nodes.push_back(mi[0].eqNode(m[0]));
476		}
477	24	Node conc;
478	12	d_im.sendInference(
479		vec_nodes, conc, InferenceId::STRINGS_RE_INTER_CONF, false, true);
480		// conflict, return
481	12	return false;
482		}
483		// rewrite to ensure the equality checks below are precise
484	62	Node mres = nm->mkNode(STRING_IN_REGEXP, mi[0], resR);
485	62	Node mresr = rewrite(mres);
486	55	if (mresr == mi)
487		{
488		// if R1 = intersect( R1, R2 ), then x in R1 ^ x in R2 is equivalent
489		// to x in R1, hence x in R2 can be marked redundant.
490		d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME);
491		}
492	55	else if (mresr == m)
493		{
494		// same as above, opposite direction
495	7	d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER_SUBSUME);
496		}
497		else
498		{
499		// new conclusion
500		// (x in R1 ^ y in R2 ^ x = y) => (x in intersect(R1,R2))
501	96	std::vector<Node> vec_nodes;
502	48	vec_nodes.push_back(mi);
503	48	vec_nodes.push_back(m);
504	48	if (mi[0] != m[0])
505		{
506	8	vec_nodes.push_back(mi[0].eqNode(m[0]));
507		}
508	48	d_im.sendInference(
509		vec_nodes, mres, InferenceId::STRINGS_RE_INTER_INFER, false, true);
510		// both are reduced
511	48	d_im.markReduced(m, ExtReducedId::STRINGS_REGEXP_INTER);
512	48	d_im.markReduced(mi, ExtReducedId::STRINGS_REGEXP_INTER);
513		// do not send more than one lemma for this class
514	48	return true;
515		}
516		}
517	933	return true;
518		}
519
520	436	bool RegExpSolver::checkPDerivative(
521		Node x, Node r, Node atom, bool& addedLemma, std::vector<Node>& nf_exp)
522		{
523	436	if (d_state.areEqual(x, d_emptyString))
524		{
525	16	Node exp;
526	16	switch (d_regexp_opr.delta(r, exp))
527		{
528		case 0:
529		{
530		std::vector<Node> noExplain;
531		noExplain.push_back(atom);
532		noExplain.push_back(x.eqNode(d_emptyString));
533		std::vector<Node> iexp = nf_exp;
534		iexp.insert(iexp.end(), noExplain.begin(), noExplain.end());
535		d_im.sendInference(iexp, noExplain, exp, InferenceId::STRINGS_RE_DELTA);
536		addedLemma = true;
537		d_regexp_ccached.insert(atom);
538		return false;
539		}
540		case 1:
541		{
542		d_regexp_ccached.insert(atom);
543		break;
544		}
545	16	case 2:
546		{
547	32	std::vector<Node> noExplain;
548	16	noExplain.push_back(atom);
549	16	if (x != d_emptyString)
550		{
551		noExplain.push_back(x.eqNode(d_emptyString));
552		}
553	32	std::vector<Node> iexp = nf_exp;
554	16	iexp.insert(iexp.end(), noExplain.begin(), noExplain.end());
555	16	d_im.sendInference(iexp, noExplain, d_false, InferenceId::STRINGS_RE_DELTA_CONF);
556	16	addedLemma = true;
557	16	d_regexp_ccached.insert(atom);
558	16	return false;
559		}
560		default:
561		// Impossible
562		break;
563		}
564		}
565		else
566		{
567	420	if (deriveRegExp(x, r, atom, nf_exp))
568		{
569	4	addedLemma = true;
570	4	d_regexp_ccached.insert(atom);
571	4	return false;
572		}
573		}
574	416	return true;
575		}
576
577	420	cvc5::String RegExpSolver::getHeadConst(Node x)
578		{
579	420	if (x.isConst())
580		{
581	4	return x.getConst<String>();
582		}
583	416	else if (x.getKind() == STRING_CONCAT)
584		{
585	50	if (x[0].isConst())
586		{
587	6	return x[0].getConst<String>();
588		}
589		}
590	410	return d_emptyString.getConst<String>();
591		}
592
593	420	bool RegExpSolver::deriveRegExp(Node x,
594		Node r,
595		Node atom,
596		std::vector<Node>& ant)
597		{
598	420	Assert(x != d_emptyString);
599	840	Trace("regexp-derive") << "RegExpSolver::deriveRegExp: x=" << x
600	420	<< ", r= " << r << std::endl;
601	840	cvc5::String s = getHeadConst(x);
602		// only allow RE_DERIVE for concrete constant regular expressions
603	420	if (!s.empty() && d_regexp_opr.getRegExpConstType(r) == RE_C_CONRETE_CONSTANT)
604		{
605	8	Node conc = Node::null();
606	8	Node dc = r;
607	4	bool flag = true;
608	4	for (unsigned i = 0; i < s.size(); ++i)
609		{
610	4	cvc5::String c = s.substr(i, 1);
611	4	Node dc2;
612	4	int rt = d_regexp_opr.derivativeS(dc, c, dc2);
613	4	dc = dc2;
614	4	if (rt == 2)
615		{
616		// CONFLICT
617	4	flag = false;
618	4	break;
619		}
620		}
621		// send lemma
622	4	if (flag)
623		{
624		if (x.isConst())
625		{
626		Assert(false)
627		<< "Impossible: RegExpSolver::deriveRegExp: const string in const "
628		"regular expression.";
629		return false;
630		}
631		else
632		{
633		Assert(x.getKind() == STRING_CONCAT);
634		std::vector<Node> vec_nodes;
635		for (unsigned int i = 1; i < x.getNumChildren(); ++i)
636		{
637		vec_nodes.push_back(x[i]);
638		}
639		Node left = utils::mkConcat(vec_nodes, x.getType());
640		left = rewrite(left);
641		conc = NodeManager::currentNM()->mkNode(STRING_IN_REGEXP, left, dc);
642		}
643		}
644	8	std::vector<Node> iexp = ant;
645	8	std::vector<Node> noExplain;
646	4	noExplain.push_back(atom);
647	4	iexp.push_back(atom);
648	4	d_im.sendInference(iexp, noExplain, conc, InferenceId::STRINGS_RE_DERIVE);
649	4	return true;
650		}
651	416	return false;
652		}
653
654	808	Node RegExpSolver::getNormalSymRegExp(Node r, std::vector<Node>& nf_exp)
655		{
656	808	Node ret = r;
657	808	switch (r.getKind())
658		{
659	108	case REGEXP_EMPTY:
660		case REGEXP_SIGMA:
661	108	case REGEXP_RANGE: break;
662	272	case STRING_TO_REGEXP:
663		{
664	272	if (!r[0].isConst())
665		{
666	332	Node tmp = d_csolver.getNormalString(r[0], nf_exp);
667	166	if (tmp != r[0])
668		{
669	120	ret = NodeManager::currentNM()->mkNode(STRING_TO_REGEXP, tmp);
670		}
671		}
672	272	break;
673		}
674	422	case REGEXP_CONCAT:
675		case REGEXP_UNION:
676		case REGEXP_INTER:
677		case REGEXP_STAR:
678		case REGEXP_COMPLEMENT:
679		{
680	844	std::vector<Node> vec_nodes;
681	1088	for (const Node& cr : r)
682		{
683	666	vec_nodes.push_back(getNormalSymRegExp(cr, nf_exp));
684		}
685	422	ret = rewrite(NodeManager::currentNM()->mkNode(r.getKind(), vec_nodes));
686	422	break;
687		}
688	6	default:
689		{
690	12	Trace("strings-error") << "Unsupported term: " << r
691	6	<< " in normalization SymRegExp." << std::endl;
692	6	Assert(!utils::isRegExpKind(r.getKind()));
693		}
694		}
695	808	return ret;
696		}
697
698		} // namespace strings
699		} // namespace theory
700	31137	} // namespace cvc5