xref: /aosp_15_r20/external/pytorch/torch/csrc/jit/frontend/edit_distance.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #include <torch/csrc/jit/frontend/edit_distance.h>
2 #include <algorithm>
3 #include <cstring>
4 #include <memory>
5 
6 namespace torch::jit {
7 
8 // computes levenshtein edit distance between two words
9 // returns maxEditDistance + 1 if the edit distance exceeds MaxEditDistance
10 // reference: http://llvm.org/doxygen/edit__distance_8h_source.html
ComputeEditDistance(const char * word1,const char * word2,size_t maxEditDistance)11 size_t ComputeEditDistance(
12     const char* word1,
13     const char* word2,
14     size_t maxEditDistance) {
15   size_t m = strlen(word1);
16   size_t n = strlen(word2);
17 
18   const unsigned small_buffer_size = 64;
19   // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
20   unsigned small_buffer[small_buffer_size];
21   // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
22   std::unique_ptr<unsigned[]> allocated;
23   unsigned* row = small_buffer;
24   if (n + 1 > small_buffer_size) {
25     row = new unsigned[n + 1];
26     allocated.reset(row);
27   }
28 
29   for (unsigned i = 1; i <= n; ++i)
30     row[i] = i;
31 
32   for (size_t y = 1; y <= m; ++y) {
33     row[0] = y;
34     unsigned best_this_row = row[0];
35 
36     unsigned previous = y - 1;
37     for (size_t x = 1; x <= n; ++x) {
38       const auto old_row = row[x];
39       row[x] = std::min(
40           previous + (word1[y - 1] == word2[x - 1] ? 0u : 1u),
41           std::min(row[x - 1], row[x]) + 1);
42       previous = old_row;
43       best_this_row = std::min(best_this_row, row[x]);
44     }
45 
46     if (maxEditDistance && best_this_row > maxEditDistance)
47       return maxEditDistance + 1;
48   }
49 
50   // NOLINTNEXTLINE(clang-analyzer-core.uninitialized.Assign)
51   unsigned result = row[n];
52   return result;
53 }
54 
55 } // namespace torch::jit
56