1 #include <torch/csrc/jit/frontend/edit_distance.h> 2 #include <algorithm> 3 #include <cstring> 4 #include <memory> 5 6 namespace torch::jit { 7 8 // computes levenshtein edit distance between two words 9 // returns maxEditDistance + 1 if the edit distance exceeds MaxEditDistance 10 // reference: http://llvm.org/doxygen/edit__distance_8h_source.html ComputeEditDistance(const char * word1,const char * word2,size_t maxEditDistance)11size_t ComputeEditDistance( 12 const char* word1, 13 const char* word2, 14 size_t maxEditDistance) { 15 size_t m = strlen(word1); 16 size_t n = strlen(word2); 17 18 const unsigned small_buffer_size = 64; 19 // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays) 20 unsigned small_buffer[small_buffer_size]; 21 // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays) 22 std::unique_ptr<unsigned[]> allocated; 23 unsigned* row = small_buffer; 24 if (n + 1 > small_buffer_size) { 25 row = new unsigned[n + 1]; 26 allocated.reset(row); 27 } 28 29 for (unsigned i = 1; i <= n; ++i) 30 row[i] = i; 31 32 for (size_t y = 1; y <= m; ++y) { 33 row[0] = y; 34 unsigned best_this_row = row[0]; 35 36 unsigned previous = y - 1; 37 for (size_t x = 1; x <= n; ++x) { 38 const auto old_row = row[x]; 39 row[x] = std::min( 40 previous + (word1[y - 1] == word2[x - 1] ? 0u : 1u), 41 std::min(row[x - 1], row[x]) + 1); 42 previous = old_row; 43 best_this_row = std::min(best_this_row, row[x]); 44 } 45 46 if (maxEditDistance && best_this_row > maxEditDistance) 47 return maxEditDistance + 1; 48 } 49 50 // NOLINTNEXTLINE(clang-analyzer-core.uninitialized.Assign) 51 unsigned result = row[n]; 52 return result; 53 } 54 55 } // namespace torch::jit 56