GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/util/didyoumean.cpp Lines: 64 65 98.5 %
Date: 2021-11-07 Branches: 57 82 69.5 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Kshitij Bansal, Tim King, Clark Barrett
4
 *
5
 * This file is part of the cvc5 project.
6
 *
7
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
8
 * in the top-level source directory and their institutional affiliations.
9
 * All rights reserved.  See the file COPYING in the top-level source
10
 * directory for licensing information.
11
 * ****************************************************************************
12
 *
13
 * Did-you-mean style suggestions.
14
 *
15
 * ``What do you mean? I don't understand.'' An attempt to be more
16
 * helpful than that. Similar to one in git.
17
 *
18
 * There are no dependencies on cvc5 (except namespace).
19
 */
20
21
#include "util/didyoumean.h"
22
23
#include <algorithm>
24
#include <array>
25
#include <sstream>
26
#include <string>
27
#include <vector>
28
29
namespace cvc5 {
30
31
namespace {
32
33
36
uint64_t editDistance(const std::string& a, const std::string& b) {
34
  // input string: a
35
  // desired string: b
36
37
36
  constexpr uint64_t swapCost = 0;
38
36
  constexpr uint64_t substituteCost = 2;
39
36
  constexpr uint64_t addCost = 1;
40
36
  constexpr uint64_t deleteCost = 3;
41
36
  constexpr uint64_t switchCaseCost = 0;
42
43
36
  uint64_t len1 = a.size();
44
36
  uint64_t len2 = b.size();
45
46
72
  std::array<std::vector<uint64_t>, 3> C;
47
144
  for (auto& c: C)
48
  {
49
108
    c.resize(len2 + 1);
50
  }
51
  //  int C[3][len2+1];             // cost
52
53
276
  for (uint64_t j = 0; j <= len2; ++j)
54
  {
55
240
    C[0][j] = j * addCost;
56
  }
57
58
228
  for (uint64_t i = 1; i <= len1; ++i)
59
  {
60
192
    uint64_t cur = i % 3;
61
192
    uint64_t prv = (i + 2) % 3;
62
192
    uint64_t pr2 = (i + 1) % 3;
63
64
192
    C[cur][0] = i * deleteCost;
65
66
1280
    for (uint64_t j = 1; j <= len2; ++j)
67
    {
68
1088
      C[cur][j] = 100000000;  // INF
69
70
1088
      if (a[i - 1] == b[j - 1]) {
71
        // match
72
96
        C[cur][j] = std::min(C[cur][j], C[prv][j - 1]);
73
992
      } else if (tolower(a[i - 1]) == tolower(b[j - 1])) {
74
        // switch case
75
        C[cur][j] = std::min(C[cur][j], C[prv][j - 1] + switchCaseCost);
76
      } else {
77
        // substitute
78
992
        C[cur][j] = std::min(C[cur][j], C[prv][j - 1] + substituteCost);
79
      }
80
81
      // swap
82
1088
      if (i >= 2 && j >= 2 && a[i - 1] == b[j - 2] && a[i - 2] == b[j - 1]) {
83
8
        C[cur][j] = std::min(C[cur][j], C[pr2][j - 2] + swapCost);
84
      }
85
86
      // add
87
1088
      C[cur][j] = std::min(C[cur][j], C[cur][j - 1] + addCost);
88
89
      // delete
90
1088
      C[cur][j] = std::min(C[cur][j], C[prv][j] + deleteCost);
91
    }
92
  }
93
72
  return C[len1 % 3][len2];
94
}
95
96
}
97
98
14
std::vector<std::string> DidYouMean::getMatch(const std::string& input)
99
{
100
  {
101
14
    std::sort(d_words.begin(), d_words.end());
102
14
    auto it = std::unique(d_words.begin(), d_words.end());
103
14
    d_words.erase(it, d_words.end());
104
  }
105
106
  /** Magic numbers */
107
14
  constexpr uint64_t similarityThreshold = 7;
108
14
  constexpr uint64_t numMatchesThreshold = 10;
109
110
28
  std::vector<std::pair<uint64_t,std::string>> scores;
111
14
  std::vector<std::string> ret;
112
50
  for (const auto& s: d_words) {
113
38
    if (s == input) {
114
      // if input matches AS-IS just return that
115
2
      ret.emplace_back(s);
116
2
      return ret;
117
    }
118
36
    uint64_t score = 0;
119
36
    if (s.compare(0, input.size(), input) != 0) {
120
36
      score = editDistance(input, s) + 1;
121
    }
122
36
    scores.emplace_back(std::make_pair(score, s));
123
  }
124
12
  std::sort(scores.begin(), scores.end());
125
12
  const uint64_t min_score = scores.begin()->first;
126
24
  for (const auto& score: scores) {
127
    // from here on, matches are not similar enough
128
24
    if (score.first > similarityThreshold) break;
129
    // from here on, matches are way worse than the best one
130
12
    if (score.first > min_score + 2) break;
131
    // we already have enough matches
132
12
    if (ret.size() >= numMatchesThreshold) break;
133
12
    ret.push_back(score.second);
134
  }
135
12
  return ret;
136
}
137
138
6
std::string DidYouMean::getMatchAsString(const std::string& input)
139
{
140
12
  std::vector<std::string> matches = getMatch(input);
141
12
  std::ostringstream oss;
142
6
  if (matches.size() > 0) {
143
4
    oss << std::endl << std::endl;
144
4
    if (matches.size() == 1) {
145
2
      oss << "Did you mean this?";
146
    } else {
147
2
      oss << "Did you mean any of these?";
148
    }
149
10
    for (size_t i = 0; i < matches.size(); ++i)
150
    {
151
6
      oss << "\n        " << matches[i];
152
    }
153
  }
154
12
  return oss.str();
155
}
156
157
}  // namespace cvc5