xref: /aosp_15_r20/external/pytorch/caffe2/utils/string_utils.cc (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #include "caffe2/utils/string_utils.h"
2 
3 #include <algorithm>
4 #include <sstream>
5 #include <vector>
6 #include <cstdint>
7 
8 namespace caffe2 {
9 
10 std::vector<std::string>
split(char separator,const std::string & string,bool ignore_empty)11 split(char separator, const std::string& string, bool ignore_empty) {
12   std::vector<std::string> pieces;
13   std::stringstream ss(string);
14   std::string item;
15   while (getline(ss, item, separator)) {
16     if (!ignore_empty || !item.empty()) {
17       pieces.push_back(std::move(item));
18     }
19   }
20   return pieces;
21 }
22 
trim(const std::string & str)23 std::string trim(const std::string& str) {
24   size_t left = str.find_first_not_of(' ');
25   if (left == std::string::npos) {
26     return str;
27   }
28   size_t right = str.find_last_not_of(' ');
29   return str.substr(left, (right - left + 1));
30 }
31 
editDistance(const std::string & s1,const std::string & s2,size_t max_distance)32 size_t editDistance(
33   const std::string& s1, const std::string& s2, size_t max_distance)
34   {
35     std::vector<size_t> current(s1.length() + 1);
36     std::vector<size_t> previous(s1.length() + 1);
37     std::vector<size_t> previous1(s1.length() + 1);
38 
39     return editDistanceHelper(
40         s1.c_str(),
41         s1.length(),
42         s2.c_str(),
43         s2.length(),
44         current,
45         previous,
46         previous1,
47         max_distance
48     );
49   }
50   #define NEXT_UNSAFE(s, i, c) { \
51       (c)=(uint8_t)(s)[(i)++]; \
52   }
53 
editDistanceHelper(const char * s1,size_t s1_len,const char * s2,size_t s2_len,std::vector<size_t> & current,std::vector<size_t> & previous,std::vector<size_t> & previous1,size_t max_distance)54 int32_t editDistanceHelper(const char* s1,
55   size_t s1_len,
56   const char* s2,
57   size_t s2_len,
58   std::vector<size_t> &current,
59   std::vector<size_t> &previous,
60   std::vector<size_t> &previous1,
61   size_t max_distance) {
62     if (max_distance) {
63       if (std::max(s1_len, s2_len) - std::min(s1_len, s2_len) > max_distance) {
64         return max_distance+1;
65       }
66     }
67 
68     for (size_t j = 0; j <= s1_len; ++j) {
69       current[j] = j;
70     }
71 
72     int32_t str2_offset = 0;
73     char prev2 = 0;
74     for (size_t i = 1; i <= s2_len; ++i) {
75       swap(previous1, previous);
76       swap(current, previous);
77       current[0] = i;
78 
79       // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores)
80       char c2 = s2[str2_offset];
81       char prev1 = 0;
82       int32_t str1_offset = 0;
83 
84       NEXT_UNSAFE(s2, str2_offset, c2);
85 
86       size_t current_min = s1_len;
87       for (size_t j = 1; j <= s1_len; ++j) {
88         size_t insertion = previous[j] + 1;
89         size_t deletion = current[j - 1] + 1;
90         size_t substitution = previous[j - 1];
91         size_t transposition = insertion;
92         // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores)
93         char c1 = s1[str1_offset];
94 
95         NEXT_UNSAFE(s1, str1_offset, c1);
96 
97         if (c1 != c2) {
98           substitution += 1;
99         }
100 
101 
102         if (prev1 == c2 && prev2 == c1 && j > 1 && i > 1) {
103           transposition = previous1[j - 2] + 1;
104         }
105         prev1 = c1;
106 
107         current[j] = std::min(std::min(insertion, deletion),
108                          std::min(substitution, transposition));
109         current_min = std::min(current_min, current[j]);
110       }
111 
112 
113       if (max_distance != 0 && current_min > max_distance) {
114         return max_distance+1;
115       }
116 
117       prev2 = c2;
118     }
119 
120     return current[s1_len];
121   }
122 } // namespace caffe2
123