1 #ifndef MARISA_GRIMOIRE_VECTOR_FLAT_VECTOR_H_ 2 #define MARISA_GRIMOIRE_VECTOR_FLAT_VECTOR_H_ 3 4 #include "marisa/grimoire/vector/vector.h" 5 6 namespace marisa { 7 namespace grimoire { 8 namespace vector { 9 10 class FlatVector { 11 public: 12 #if MARISA_WORD_SIZE == 64 13 typedef UInt64 Unit; 14 #else // MARISA_WORD_SIZE == 64 15 typedef UInt32 Unit; 16 #endif // MARISA_WORD_SIZE == 64 17 FlatVector()18 FlatVector() : units_(), value_size_(0), mask_(0), size_(0) {} 19 build(const Vector<UInt32> & values)20 void build(const Vector<UInt32> &values) { 21 FlatVector temp; 22 temp.build_(values); 23 swap(temp); 24 } 25 map(Mapper & mapper)26 void map(Mapper &mapper) { 27 FlatVector temp; 28 temp.map_(mapper); 29 swap(temp); 30 } read(Reader & reader)31 void read(Reader &reader) { 32 FlatVector temp; 33 temp.read_(reader); 34 swap(temp); 35 } write(Writer & writer)36 void write(Writer &writer) const { 37 write_(writer); 38 } 39 40 UInt32 operator[](std::size_t i) const { 41 MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR); 42 43 const std::size_t pos = i * value_size_; 44 const std::size_t unit_id = pos / MARISA_WORD_SIZE; 45 const std::size_t unit_offset = pos % MARISA_WORD_SIZE; 46 47 if ((unit_offset + value_size_) <= MARISA_WORD_SIZE) { 48 return (UInt32)(units_[unit_id] >> unit_offset) & mask_; 49 } else { 50 return (UInt32)((units_[unit_id] >> unit_offset) 51 | (units_[unit_id + 1] << (MARISA_WORD_SIZE - unit_offset))) & mask_; 52 } 53 } 54 value_size()55 std::size_t value_size() const { 56 return value_size_; 57 } mask()58 UInt32 mask() const { 59 return mask_; 60 } 61 empty()62 bool empty() const { 63 return size_ == 0; 64 } size()65 std::size_t size() const { 66 return size_; 67 } total_size()68 std::size_t total_size() const { 69 return units_.total_size(); 70 } io_size()71 std::size_t io_size() const { 72 return units_.io_size() + (sizeof(UInt32) * 2) + sizeof(UInt64); 73 } 74 clear()75 void clear() { 76 FlatVector().swap(*this); 77 } swap(FlatVector & rhs)78 void swap(FlatVector &rhs) { 79 units_.swap(rhs.units_); 80 marisa::swap(value_size_, rhs.value_size_); 81 marisa::swap(mask_, rhs.mask_); 82 marisa::swap(size_, rhs.size_); 83 } 84 85 private: 86 Vector<Unit> units_; 87 std::size_t value_size_; 88 UInt32 mask_; 89 std::size_t size_; 90 build_(const Vector<UInt32> & values)91 void build_(const Vector<UInt32> &values) { 92 UInt32 max_value = 0; 93 for (std::size_t i = 0; i < values.size(); ++i) { 94 if (values[i] > max_value) { 95 max_value = values[i]; 96 } 97 } 98 99 std::size_t value_size = 0; 100 while (max_value != 0) { 101 ++value_size; 102 max_value >>= 1; 103 } 104 105 std::size_t num_units = values.empty() ? 0 : (64 / MARISA_WORD_SIZE); 106 if (value_size != 0) { 107 num_units = (std::size_t)( 108 (((UInt64)value_size * values.size()) + (MARISA_WORD_SIZE - 1)) 109 / MARISA_WORD_SIZE); 110 num_units += num_units % (64 / MARISA_WORD_SIZE); 111 } 112 113 units_.resize(num_units); 114 if (num_units > 0) { 115 units_.back() = 0; 116 } 117 118 value_size_ = value_size; 119 if (value_size != 0) { 120 mask_ = MARISA_UINT32_MAX >> (32 - value_size); 121 } 122 size_ = values.size(); 123 124 for (std::size_t i = 0; i < values.size(); ++i) { 125 set(i, values[i]); 126 } 127 } 128 map_(Mapper & mapper)129 void map_(Mapper &mapper) { 130 units_.map(mapper); 131 { 132 UInt32 temp_value_size; 133 mapper.map(&temp_value_size); 134 MARISA_THROW_IF(temp_value_size > 32, MARISA_FORMAT_ERROR); 135 value_size_ = temp_value_size; 136 } 137 { 138 UInt32 temp_mask; 139 mapper.map(&temp_mask); 140 mask_ = temp_mask; 141 } 142 { 143 UInt64 temp_size; 144 mapper.map(&temp_size); 145 MARISA_THROW_IF(temp_size > MARISA_SIZE_MAX, MARISA_SIZE_ERROR); 146 size_ = (std::size_t)temp_size; 147 } 148 } 149 read_(Reader & reader)150 void read_(Reader &reader) { 151 units_.read(reader); 152 { 153 UInt32 temp_value_size; 154 reader.read(&temp_value_size); 155 MARISA_THROW_IF(temp_value_size > 32, MARISA_FORMAT_ERROR); 156 value_size_ = temp_value_size; 157 } 158 { 159 UInt32 temp_mask; 160 reader.read(&temp_mask); 161 mask_ = temp_mask; 162 } 163 { 164 UInt64 temp_size; 165 reader.read(&temp_size); 166 MARISA_THROW_IF(temp_size > MARISA_SIZE_MAX, MARISA_SIZE_ERROR); 167 size_ = (std::size_t)temp_size; 168 } 169 } 170 write_(Writer & writer)171 void write_(Writer &writer) const { 172 units_.write(writer); 173 writer.write((UInt32)value_size_); 174 writer.write((UInt32)mask_); 175 writer.write((UInt64)size_); 176 } 177 set(std::size_t i,UInt32 value)178 void set(std::size_t i, UInt32 value) { 179 MARISA_DEBUG_IF(i >= size_, MARISA_BOUND_ERROR); 180 MARISA_DEBUG_IF(value > mask_, MARISA_RANGE_ERROR); 181 182 const std::size_t pos = i * value_size_; 183 const std::size_t unit_id = pos / MARISA_WORD_SIZE; 184 const std::size_t unit_offset = pos % MARISA_WORD_SIZE; 185 186 units_[unit_id] &= ~((Unit)mask_ << unit_offset); 187 units_[unit_id] |= (Unit)(value & mask_) << unit_offset; 188 if ((unit_offset + value_size_) > MARISA_WORD_SIZE) { 189 units_[unit_id + 1] &= 190 ~((Unit)mask_ >> (MARISA_WORD_SIZE - unit_offset)); 191 units_[unit_id + 1] |= 192 (Unit)(value & mask_) >> (MARISA_WORD_SIZE - unit_offset); 193 } 194 } 195 196 // Disallows copy and assignment. 197 FlatVector(const FlatVector &); 198 FlatVector &operator=(const FlatVector &); 199 }; 200 201 } // namespace vector 202 } // namespace grimoire 203 } // namespace marisa 204 205 #endif // MARISA_GRIMOIRE_VECTOR_FLAT_VECTOR_H_ 206