xref: /aosp_15_r20/external/pytorch/caffe2/utils/string_utils.cc (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1*da0073e9SAndroid Build Coastguard Worker #include "caffe2/utils/string_utils.h"
2*da0073e9SAndroid Build Coastguard Worker 
3*da0073e9SAndroid Build Coastguard Worker #include <algorithm>
4*da0073e9SAndroid Build Coastguard Worker #include <sstream>
5*da0073e9SAndroid Build Coastguard Worker #include <vector>
6*da0073e9SAndroid Build Coastguard Worker #include <cstdint>
7*da0073e9SAndroid Build Coastguard Worker 
8*da0073e9SAndroid Build Coastguard Worker namespace caffe2 {
9*da0073e9SAndroid Build Coastguard Worker 
10*da0073e9SAndroid Build Coastguard Worker std::vector<std::string>
split(char separator,const std::string & string,bool ignore_empty)11*da0073e9SAndroid Build Coastguard Worker split(char separator, const std::string& string, bool ignore_empty) {
12*da0073e9SAndroid Build Coastguard Worker   std::vector<std::string> pieces;
13*da0073e9SAndroid Build Coastguard Worker   std::stringstream ss(string);
14*da0073e9SAndroid Build Coastguard Worker   std::string item;
15*da0073e9SAndroid Build Coastguard Worker   while (getline(ss, item, separator)) {
16*da0073e9SAndroid Build Coastguard Worker     if (!ignore_empty || !item.empty()) {
17*da0073e9SAndroid Build Coastguard Worker       pieces.push_back(std::move(item));
18*da0073e9SAndroid Build Coastguard Worker     }
19*da0073e9SAndroid Build Coastguard Worker   }
20*da0073e9SAndroid Build Coastguard Worker   return pieces;
21*da0073e9SAndroid Build Coastguard Worker }
22*da0073e9SAndroid Build Coastguard Worker 
trim(const std::string & str)23*da0073e9SAndroid Build Coastguard Worker std::string trim(const std::string& str) {
24*da0073e9SAndroid Build Coastguard Worker   size_t left = str.find_first_not_of(' ');
25*da0073e9SAndroid Build Coastguard Worker   if (left == std::string::npos) {
26*da0073e9SAndroid Build Coastguard Worker     return str;
27*da0073e9SAndroid Build Coastguard Worker   }
28*da0073e9SAndroid Build Coastguard Worker   size_t right = str.find_last_not_of(' ');
29*da0073e9SAndroid Build Coastguard Worker   return str.substr(left, (right - left + 1));
30*da0073e9SAndroid Build Coastguard Worker }
31*da0073e9SAndroid Build Coastguard Worker 
editDistance(const std::string & s1,const std::string & s2,size_t max_distance)32*da0073e9SAndroid Build Coastguard Worker size_t editDistance(
33*da0073e9SAndroid Build Coastguard Worker   const std::string& s1, const std::string& s2, size_t max_distance)
34*da0073e9SAndroid Build Coastguard Worker   {
35*da0073e9SAndroid Build Coastguard Worker     std::vector<size_t> current(s1.length() + 1);
36*da0073e9SAndroid Build Coastguard Worker     std::vector<size_t> previous(s1.length() + 1);
37*da0073e9SAndroid Build Coastguard Worker     std::vector<size_t> previous1(s1.length() + 1);
38*da0073e9SAndroid Build Coastguard Worker 
39*da0073e9SAndroid Build Coastguard Worker     return editDistanceHelper(
40*da0073e9SAndroid Build Coastguard Worker         s1.c_str(),
41*da0073e9SAndroid Build Coastguard Worker         s1.length(),
42*da0073e9SAndroid Build Coastguard Worker         s2.c_str(),
43*da0073e9SAndroid Build Coastguard Worker         s2.length(),
44*da0073e9SAndroid Build Coastguard Worker         current,
45*da0073e9SAndroid Build Coastguard Worker         previous,
46*da0073e9SAndroid Build Coastguard Worker         previous1,
47*da0073e9SAndroid Build Coastguard Worker         max_distance
48*da0073e9SAndroid Build Coastguard Worker     );
49*da0073e9SAndroid Build Coastguard Worker   }
50*da0073e9SAndroid Build Coastguard Worker   #define NEXT_UNSAFE(s, i, c) { \
51*da0073e9SAndroid Build Coastguard Worker       (c)=(uint8_t)(s)[(i)++]; \
52*da0073e9SAndroid Build Coastguard Worker   }
53*da0073e9SAndroid Build Coastguard Worker 
editDistanceHelper(const char * s1,size_t s1_len,const char * s2,size_t s2_len,std::vector<size_t> & current,std::vector<size_t> & previous,std::vector<size_t> & previous1,size_t max_distance)54*da0073e9SAndroid Build Coastguard Worker int32_t editDistanceHelper(const char* s1,
55*da0073e9SAndroid Build Coastguard Worker   size_t s1_len,
56*da0073e9SAndroid Build Coastguard Worker   const char* s2,
57*da0073e9SAndroid Build Coastguard Worker   size_t s2_len,
58*da0073e9SAndroid Build Coastguard Worker   std::vector<size_t> &current,
59*da0073e9SAndroid Build Coastguard Worker   std::vector<size_t> &previous,
60*da0073e9SAndroid Build Coastguard Worker   std::vector<size_t> &previous1,
61*da0073e9SAndroid Build Coastguard Worker   size_t max_distance) {
62*da0073e9SAndroid Build Coastguard Worker     if (max_distance) {
63*da0073e9SAndroid Build Coastguard Worker       if (std::max(s1_len, s2_len) - std::min(s1_len, s2_len) > max_distance) {
64*da0073e9SAndroid Build Coastguard Worker         return max_distance+1;
65*da0073e9SAndroid Build Coastguard Worker       }
66*da0073e9SAndroid Build Coastguard Worker     }
67*da0073e9SAndroid Build Coastguard Worker 
68*da0073e9SAndroid Build Coastguard Worker     for (size_t j = 0; j <= s1_len; ++j) {
69*da0073e9SAndroid Build Coastguard Worker       current[j] = j;
70*da0073e9SAndroid Build Coastguard Worker     }
71*da0073e9SAndroid Build Coastguard Worker 
72*da0073e9SAndroid Build Coastguard Worker     int32_t str2_offset = 0;
73*da0073e9SAndroid Build Coastguard Worker     char prev2 = 0;
74*da0073e9SAndroid Build Coastguard Worker     for (size_t i = 1; i <= s2_len; ++i) {
75*da0073e9SAndroid Build Coastguard Worker       swap(previous1, previous);
76*da0073e9SAndroid Build Coastguard Worker       swap(current, previous);
77*da0073e9SAndroid Build Coastguard Worker       current[0] = i;
78*da0073e9SAndroid Build Coastguard Worker 
79*da0073e9SAndroid Build Coastguard Worker       // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores)
80*da0073e9SAndroid Build Coastguard Worker       char c2 = s2[str2_offset];
81*da0073e9SAndroid Build Coastguard Worker       char prev1 = 0;
82*da0073e9SAndroid Build Coastguard Worker       int32_t str1_offset = 0;
83*da0073e9SAndroid Build Coastguard Worker 
84*da0073e9SAndroid Build Coastguard Worker       NEXT_UNSAFE(s2, str2_offset, c2);
85*da0073e9SAndroid Build Coastguard Worker 
86*da0073e9SAndroid Build Coastguard Worker       size_t current_min = s1_len;
87*da0073e9SAndroid Build Coastguard Worker       for (size_t j = 1; j <= s1_len; ++j) {
88*da0073e9SAndroid Build Coastguard Worker         size_t insertion = previous[j] + 1;
89*da0073e9SAndroid Build Coastguard Worker         size_t deletion = current[j - 1] + 1;
90*da0073e9SAndroid Build Coastguard Worker         size_t substitution = previous[j - 1];
91*da0073e9SAndroid Build Coastguard Worker         size_t transposition = insertion;
92*da0073e9SAndroid Build Coastguard Worker         // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores)
93*da0073e9SAndroid Build Coastguard Worker         char c1 = s1[str1_offset];
94*da0073e9SAndroid Build Coastguard Worker 
95*da0073e9SAndroid Build Coastguard Worker         NEXT_UNSAFE(s1, str1_offset, c1);
96*da0073e9SAndroid Build Coastguard Worker 
97*da0073e9SAndroid Build Coastguard Worker         if (c1 != c2) {
98*da0073e9SAndroid Build Coastguard Worker           substitution += 1;
99*da0073e9SAndroid Build Coastguard Worker         }
100*da0073e9SAndroid Build Coastguard Worker 
101*da0073e9SAndroid Build Coastguard Worker 
102*da0073e9SAndroid Build Coastguard Worker         if (prev1 == c2 && prev2 == c1 && j > 1 && i > 1) {
103*da0073e9SAndroid Build Coastguard Worker           transposition = previous1[j - 2] + 1;
104*da0073e9SAndroid Build Coastguard Worker         }
105*da0073e9SAndroid Build Coastguard Worker         prev1 = c1;
106*da0073e9SAndroid Build Coastguard Worker 
107*da0073e9SAndroid Build Coastguard Worker         current[j] = std::min(std::min(insertion, deletion),
108*da0073e9SAndroid Build Coastguard Worker                          std::min(substitution, transposition));
109*da0073e9SAndroid Build Coastguard Worker         current_min = std::min(current_min, current[j]);
110*da0073e9SAndroid Build Coastguard Worker       }
111*da0073e9SAndroid Build Coastguard Worker 
112*da0073e9SAndroid Build Coastguard Worker 
113*da0073e9SAndroid Build Coastguard Worker       if (max_distance != 0 && current_min > max_distance) {
114*da0073e9SAndroid Build Coastguard Worker         return max_distance+1;
115*da0073e9SAndroid Build Coastguard Worker       }
116*da0073e9SAndroid Build Coastguard Worker 
117*da0073e9SAndroid Build Coastguard Worker       prev2 = c2;
118*da0073e9SAndroid Build Coastguard Worker     }
119*da0073e9SAndroid Build Coastguard Worker 
120*da0073e9SAndroid Build Coastguard Worker     return current[s1_len];
121*da0073e9SAndroid Build Coastguard Worker   }
122*da0073e9SAndroid Build Coastguard Worker } // namespace caffe2
123