xref: /aosp_15_r20/external/leveldb/table/table.cc (revision 9507f98c5f32dee4b5f9e4a38cd499f3ff5c4490)
1*9507f98cSAndroid Build Coastguard Worker // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2*9507f98cSAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*9507f98cSAndroid Build Coastguard Worker // found in the LICENSE file. See the AUTHORS file for names of contributors.
4*9507f98cSAndroid Build Coastguard Worker 
5*9507f98cSAndroid Build Coastguard Worker #include "leveldb/table.h"
6*9507f98cSAndroid Build Coastguard Worker 
7*9507f98cSAndroid Build Coastguard Worker #include "leveldb/cache.h"
8*9507f98cSAndroid Build Coastguard Worker #include "leveldb/comparator.h"
9*9507f98cSAndroid Build Coastguard Worker #include "leveldb/env.h"
10*9507f98cSAndroid Build Coastguard Worker #include "leveldb/filter_policy.h"
11*9507f98cSAndroid Build Coastguard Worker #include "leveldb/options.h"
12*9507f98cSAndroid Build Coastguard Worker #include "table/block.h"
13*9507f98cSAndroid Build Coastguard Worker #include "table/filter_block.h"
14*9507f98cSAndroid Build Coastguard Worker #include "table/format.h"
15*9507f98cSAndroid Build Coastguard Worker #include "table/two_level_iterator.h"
16*9507f98cSAndroid Build Coastguard Worker #include "util/coding.h"
17*9507f98cSAndroid Build Coastguard Worker 
18*9507f98cSAndroid Build Coastguard Worker namespace leveldb {
19*9507f98cSAndroid Build Coastguard Worker 
20*9507f98cSAndroid Build Coastguard Worker struct Table::Rep {
~Repleveldb::Table::Rep21*9507f98cSAndroid Build Coastguard Worker   ~Rep() {
22*9507f98cSAndroid Build Coastguard Worker     delete filter;
23*9507f98cSAndroid Build Coastguard Worker     delete[] filter_data;
24*9507f98cSAndroid Build Coastguard Worker     delete index_block;
25*9507f98cSAndroid Build Coastguard Worker   }
26*9507f98cSAndroid Build Coastguard Worker 
27*9507f98cSAndroid Build Coastguard Worker   Options options;
28*9507f98cSAndroid Build Coastguard Worker   Status status;
29*9507f98cSAndroid Build Coastguard Worker   RandomAccessFile* file;
30*9507f98cSAndroid Build Coastguard Worker   uint64_t cache_id;
31*9507f98cSAndroid Build Coastguard Worker   FilterBlockReader* filter;
32*9507f98cSAndroid Build Coastguard Worker   const char* filter_data;
33*9507f98cSAndroid Build Coastguard Worker 
34*9507f98cSAndroid Build Coastguard Worker   BlockHandle metaindex_handle;  // Handle to metaindex_block: saved from footer
35*9507f98cSAndroid Build Coastguard Worker   Block* index_block;
36*9507f98cSAndroid Build Coastguard Worker };
37*9507f98cSAndroid Build Coastguard Worker 
Open(const Options & options,RandomAccessFile * file,uint64_t size,Table ** table)38*9507f98cSAndroid Build Coastguard Worker Status Table::Open(const Options& options, RandomAccessFile* file,
39*9507f98cSAndroid Build Coastguard Worker                    uint64_t size, Table** table) {
40*9507f98cSAndroid Build Coastguard Worker   *table = nullptr;
41*9507f98cSAndroid Build Coastguard Worker   if (size < Footer::kEncodedLength) {
42*9507f98cSAndroid Build Coastguard Worker     return Status::Corruption("file is too short to be an sstable");
43*9507f98cSAndroid Build Coastguard Worker   }
44*9507f98cSAndroid Build Coastguard Worker 
45*9507f98cSAndroid Build Coastguard Worker   char footer_space[Footer::kEncodedLength];
46*9507f98cSAndroid Build Coastguard Worker   Slice footer_input;
47*9507f98cSAndroid Build Coastguard Worker   Status s = file->Read(size - Footer::kEncodedLength, Footer::kEncodedLength,
48*9507f98cSAndroid Build Coastguard Worker                         &footer_input, footer_space);
49*9507f98cSAndroid Build Coastguard Worker   if (!s.ok()) return s;
50*9507f98cSAndroid Build Coastguard Worker 
51*9507f98cSAndroid Build Coastguard Worker   Footer footer;
52*9507f98cSAndroid Build Coastguard Worker   s = footer.DecodeFrom(&footer_input);
53*9507f98cSAndroid Build Coastguard Worker   if (!s.ok()) return s;
54*9507f98cSAndroid Build Coastguard Worker 
55*9507f98cSAndroid Build Coastguard Worker   // Read the index block
56*9507f98cSAndroid Build Coastguard Worker   BlockContents index_block_contents;
57*9507f98cSAndroid Build Coastguard Worker   ReadOptions opt;
58*9507f98cSAndroid Build Coastguard Worker   if (options.paranoid_checks) {
59*9507f98cSAndroid Build Coastguard Worker     opt.verify_checksums = true;
60*9507f98cSAndroid Build Coastguard Worker   }
61*9507f98cSAndroid Build Coastguard Worker   s = ReadBlock(file, opt, footer.index_handle(), &index_block_contents);
62*9507f98cSAndroid Build Coastguard Worker 
63*9507f98cSAndroid Build Coastguard Worker   if (s.ok()) {
64*9507f98cSAndroid Build Coastguard Worker     // We've successfully read the footer and the index block: we're
65*9507f98cSAndroid Build Coastguard Worker     // ready to serve requests.
66*9507f98cSAndroid Build Coastguard Worker     Block* index_block = new Block(index_block_contents);
67*9507f98cSAndroid Build Coastguard Worker     Rep* rep = new Table::Rep;
68*9507f98cSAndroid Build Coastguard Worker     rep->options = options;
69*9507f98cSAndroid Build Coastguard Worker     rep->file = file;
70*9507f98cSAndroid Build Coastguard Worker     rep->metaindex_handle = footer.metaindex_handle();
71*9507f98cSAndroid Build Coastguard Worker     rep->index_block = index_block;
72*9507f98cSAndroid Build Coastguard Worker     rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0);
73*9507f98cSAndroid Build Coastguard Worker     rep->filter_data = nullptr;
74*9507f98cSAndroid Build Coastguard Worker     rep->filter = nullptr;
75*9507f98cSAndroid Build Coastguard Worker     *table = new Table(rep);
76*9507f98cSAndroid Build Coastguard Worker     (*table)->ReadMeta(footer);
77*9507f98cSAndroid Build Coastguard Worker   }
78*9507f98cSAndroid Build Coastguard Worker 
79*9507f98cSAndroid Build Coastguard Worker   return s;
80*9507f98cSAndroid Build Coastguard Worker }
81*9507f98cSAndroid Build Coastguard Worker 
ReadMeta(const Footer & footer)82*9507f98cSAndroid Build Coastguard Worker void Table::ReadMeta(const Footer& footer) {
83*9507f98cSAndroid Build Coastguard Worker   if (rep_->options.filter_policy == nullptr) {
84*9507f98cSAndroid Build Coastguard Worker     return;  // Do not need any metadata
85*9507f98cSAndroid Build Coastguard Worker   }
86*9507f98cSAndroid Build Coastguard Worker 
87*9507f98cSAndroid Build Coastguard Worker   // TODO(sanjay): Skip this if footer.metaindex_handle() size indicates
88*9507f98cSAndroid Build Coastguard Worker   // it is an empty block.
89*9507f98cSAndroid Build Coastguard Worker   ReadOptions opt;
90*9507f98cSAndroid Build Coastguard Worker   if (rep_->options.paranoid_checks) {
91*9507f98cSAndroid Build Coastguard Worker     opt.verify_checksums = true;
92*9507f98cSAndroid Build Coastguard Worker   }
93*9507f98cSAndroid Build Coastguard Worker   BlockContents contents;
94*9507f98cSAndroid Build Coastguard Worker   if (!ReadBlock(rep_->file, opt, footer.metaindex_handle(), &contents).ok()) {
95*9507f98cSAndroid Build Coastguard Worker     // Do not propagate errors since meta info is not needed for operation
96*9507f98cSAndroid Build Coastguard Worker     return;
97*9507f98cSAndroid Build Coastguard Worker   }
98*9507f98cSAndroid Build Coastguard Worker   Block* meta = new Block(contents);
99*9507f98cSAndroid Build Coastguard Worker 
100*9507f98cSAndroid Build Coastguard Worker   Iterator* iter = meta->NewIterator(BytewiseComparator());
101*9507f98cSAndroid Build Coastguard Worker   std::string key = "filter.";
102*9507f98cSAndroid Build Coastguard Worker   key.append(rep_->options.filter_policy->Name());
103*9507f98cSAndroid Build Coastguard Worker   iter->Seek(key);
104*9507f98cSAndroid Build Coastguard Worker   if (iter->Valid() && iter->key() == Slice(key)) {
105*9507f98cSAndroid Build Coastguard Worker     ReadFilter(iter->value());
106*9507f98cSAndroid Build Coastguard Worker   }
107*9507f98cSAndroid Build Coastguard Worker   delete iter;
108*9507f98cSAndroid Build Coastguard Worker   delete meta;
109*9507f98cSAndroid Build Coastguard Worker }
110*9507f98cSAndroid Build Coastguard Worker 
ReadFilter(const Slice & filter_handle_value)111*9507f98cSAndroid Build Coastguard Worker void Table::ReadFilter(const Slice& filter_handle_value) {
112*9507f98cSAndroid Build Coastguard Worker   Slice v = filter_handle_value;
113*9507f98cSAndroid Build Coastguard Worker   BlockHandle filter_handle;
114*9507f98cSAndroid Build Coastguard Worker   if (!filter_handle.DecodeFrom(&v).ok()) {
115*9507f98cSAndroid Build Coastguard Worker     return;
116*9507f98cSAndroid Build Coastguard Worker   }
117*9507f98cSAndroid Build Coastguard Worker 
118*9507f98cSAndroid Build Coastguard Worker   // We might want to unify with ReadBlock() if we start
119*9507f98cSAndroid Build Coastguard Worker   // requiring checksum verification in Table::Open.
120*9507f98cSAndroid Build Coastguard Worker   ReadOptions opt;
121*9507f98cSAndroid Build Coastguard Worker   if (rep_->options.paranoid_checks) {
122*9507f98cSAndroid Build Coastguard Worker     opt.verify_checksums = true;
123*9507f98cSAndroid Build Coastguard Worker   }
124*9507f98cSAndroid Build Coastguard Worker   BlockContents block;
125*9507f98cSAndroid Build Coastguard Worker   if (!ReadBlock(rep_->file, opt, filter_handle, &block).ok()) {
126*9507f98cSAndroid Build Coastguard Worker     return;
127*9507f98cSAndroid Build Coastguard Worker   }
128*9507f98cSAndroid Build Coastguard Worker   if (block.heap_allocated) {
129*9507f98cSAndroid Build Coastguard Worker     rep_->filter_data = block.data.data();  // Will need to delete later
130*9507f98cSAndroid Build Coastguard Worker   }
131*9507f98cSAndroid Build Coastguard Worker   rep_->filter = new FilterBlockReader(rep_->options.filter_policy, block.data);
132*9507f98cSAndroid Build Coastguard Worker }
133*9507f98cSAndroid Build Coastguard Worker 
~Table()134*9507f98cSAndroid Build Coastguard Worker Table::~Table() { delete rep_; }
135*9507f98cSAndroid Build Coastguard Worker 
DeleteBlock(void * arg,void * ignored)136*9507f98cSAndroid Build Coastguard Worker static void DeleteBlock(void* arg, void* ignored) {
137*9507f98cSAndroid Build Coastguard Worker   delete reinterpret_cast<Block*>(arg);
138*9507f98cSAndroid Build Coastguard Worker }
139*9507f98cSAndroid Build Coastguard Worker 
DeleteCachedBlock(const Slice & key,void * value)140*9507f98cSAndroid Build Coastguard Worker static void DeleteCachedBlock(const Slice& key, void* value) {
141*9507f98cSAndroid Build Coastguard Worker   Block* block = reinterpret_cast<Block*>(value);
142*9507f98cSAndroid Build Coastguard Worker   delete block;
143*9507f98cSAndroid Build Coastguard Worker }
144*9507f98cSAndroid Build Coastguard Worker 
ReleaseBlock(void * arg,void * h)145*9507f98cSAndroid Build Coastguard Worker static void ReleaseBlock(void* arg, void* h) {
146*9507f98cSAndroid Build Coastguard Worker   Cache* cache = reinterpret_cast<Cache*>(arg);
147*9507f98cSAndroid Build Coastguard Worker   Cache::Handle* handle = reinterpret_cast<Cache::Handle*>(h);
148*9507f98cSAndroid Build Coastguard Worker   cache->Release(handle);
149*9507f98cSAndroid Build Coastguard Worker }
150*9507f98cSAndroid Build Coastguard Worker 
151*9507f98cSAndroid Build Coastguard Worker // Convert an index iterator value (i.e., an encoded BlockHandle)
152*9507f98cSAndroid Build Coastguard Worker // into an iterator over the contents of the corresponding block.
BlockReader(void * arg,const ReadOptions & options,const Slice & index_value)153*9507f98cSAndroid Build Coastguard Worker Iterator* Table::BlockReader(void* arg, const ReadOptions& options,
154*9507f98cSAndroid Build Coastguard Worker                              const Slice& index_value) {
155*9507f98cSAndroid Build Coastguard Worker   Table* table = reinterpret_cast<Table*>(arg);
156*9507f98cSAndroid Build Coastguard Worker   Cache* block_cache = table->rep_->options.block_cache;
157*9507f98cSAndroid Build Coastguard Worker   Block* block = nullptr;
158*9507f98cSAndroid Build Coastguard Worker   Cache::Handle* cache_handle = nullptr;
159*9507f98cSAndroid Build Coastguard Worker 
160*9507f98cSAndroid Build Coastguard Worker   BlockHandle handle;
161*9507f98cSAndroid Build Coastguard Worker   Slice input = index_value;
162*9507f98cSAndroid Build Coastguard Worker   Status s = handle.DecodeFrom(&input);
163*9507f98cSAndroid Build Coastguard Worker   // We intentionally allow extra stuff in index_value so that we
164*9507f98cSAndroid Build Coastguard Worker   // can add more features in the future.
165*9507f98cSAndroid Build Coastguard Worker 
166*9507f98cSAndroid Build Coastguard Worker   if (s.ok()) {
167*9507f98cSAndroid Build Coastguard Worker     BlockContents contents;
168*9507f98cSAndroid Build Coastguard Worker     if (block_cache != nullptr) {
169*9507f98cSAndroid Build Coastguard Worker       char cache_key_buffer[16];
170*9507f98cSAndroid Build Coastguard Worker       EncodeFixed64(cache_key_buffer, table->rep_->cache_id);
171*9507f98cSAndroid Build Coastguard Worker       EncodeFixed64(cache_key_buffer + 8, handle.offset());
172*9507f98cSAndroid Build Coastguard Worker       Slice key(cache_key_buffer, sizeof(cache_key_buffer));
173*9507f98cSAndroid Build Coastguard Worker       cache_handle = block_cache->Lookup(key);
174*9507f98cSAndroid Build Coastguard Worker       if (cache_handle != nullptr) {
175*9507f98cSAndroid Build Coastguard Worker         block = reinterpret_cast<Block*>(block_cache->Value(cache_handle));
176*9507f98cSAndroid Build Coastguard Worker       } else {
177*9507f98cSAndroid Build Coastguard Worker         s = ReadBlock(table->rep_->file, options, handle, &contents);
178*9507f98cSAndroid Build Coastguard Worker         if (s.ok()) {
179*9507f98cSAndroid Build Coastguard Worker           block = new Block(contents);
180*9507f98cSAndroid Build Coastguard Worker           if (contents.cachable && options.fill_cache) {
181*9507f98cSAndroid Build Coastguard Worker             cache_handle = block_cache->Insert(key, block, block->size(),
182*9507f98cSAndroid Build Coastguard Worker                                                &DeleteCachedBlock);
183*9507f98cSAndroid Build Coastguard Worker           }
184*9507f98cSAndroid Build Coastguard Worker         }
185*9507f98cSAndroid Build Coastguard Worker       }
186*9507f98cSAndroid Build Coastguard Worker     } else {
187*9507f98cSAndroid Build Coastguard Worker       s = ReadBlock(table->rep_->file, options, handle, &contents);
188*9507f98cSAndroid Build Coastguard Worker       if (s.ok()) {
189*9507f98cSAndroid Build Coastguard Worker         block = new Block(contents);
190*9507f98cSAndroid Build Coastguard Worker       }
191*9507f98cSAndroid Build Coastguard Worker     }
192*9507f98cSAndroid Build Coastguard Worker   }
193*9507f98cSAndroid Build Coastguard Worker 
194*9507f98cSAndroid Build Coastguard Worker   Iterator* iter;
195*9507f98cSAndroid Build Coastguard Worker   if (block != nullptr) {
196*9507f98cSAndroid Build Coastguard Worker     iter = block->NewIterator(table->rep_->options.comparator);
197*9507f98cSAndroid Build Coastguard Worker     if (cache_handle == nullptr) {
198*9507f98cSAndroid Build Coastguard Worker       iter->RegisterCleanup(&DeleteBlock, block, nullptr);
199*9507f98cSAndroid Build Coastguard Worker     } else {
200*9507f98cSAndroid Build Coastguard Worker       iter->RegisterCleanup(&ReleaseBlock, block_cache, cache_handle);
201*9507f98cSAndroid Build Coastguard Worker     }
202*9507f98cSAndroid Build Coastguard Worker   } else {
203*9507f98cSAndroid Build Coastguard Worker     iter = NewErrorIterator(s);
204*9507f98cSAndroid Build Coastguard Worker   }
205*9507f98cSAndroid Build Coastguard Worker   return iter;
206*9507f98cSAndroid Build Coastguard Worker }
207*9507f98cSAndroid Build Coastguard Worker 
NewIterator(const ReadOptions & options) const208*9507f98cSAndroid Build Coastguard Worker Iterator* Table::NewIterator(const ReadOptions& options) const {
209*9507f98cSAndroid Build Coastguard Worker   return NewTwoLevelIterator(
210*9507f98cSAndroid Build Coastguard Worker       rep_->index_block->NewIterator(rep_->options.comparator),
211*9507f98cSAndroid Build Coastguard Worker       &Table::BlockReader, const_cast<Table*>(this), options);
212*9507f98cSAndroid Build Coastguard Worker }
213*9507f98cSAndroid Build Coastguard Worker 
InternalGet(const ReadOptions & options,const Slice & k,void * arg,void (* handle_result)(void *,const Slice &,const Slice &))214*9507f98cSAndroid Build Coastguard Worker Status Table::InternalGet(const ReadOptions& options, const Slice& k, void* arg,
215*9507f98cSAndroid Build Coastguard Worker                           void (*handle_result)(void*, const Slice&,
216*9507f98cSAndroid Build Coastguard Worker                                                 const Slice&)) {
217*9507f98cSAndroid Build Coastguard Worker   Status s;
218*9507f98cSAndroid Build Coastguard Worker   Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
219*9507f98cSAndroid Build Coastguard Worker   iiter->Seek(k);
220*9507f98cSAndroid Build Coastguard Worker   if (iiter->Valid()) {
221*9507f98cSAndroid Build Coastguard Worker     Slice handle_value = iiter->value();
222*9507f98cSAndroid Build Coastguard Worker     FilterBlockReader* filter = rep_->filter;
223*9507f98cSAndroid Build Coastguard Worker     BlockHandle handle;
224*9507f98cSAndroid Build Coastguard Worker     if (filter != nullptr && handle.DecodeFrom(&handle_value).ok() &&
225*9507f98cSAndroid Build Coastguard Worker         !filter->KeyMayMatch(handle.offset(), k)) {
226*9507f98cSAndroid Build Coastguard Worker       // Not found
227*9507f98cSAndroid Build Coastguard Worker     } else {
228*9507f98cSAndroid Build Coastguard Worker       Iterator* block_iter = BlockReader(this, options, iiter->value());
229*9507f98cSAndroid Build Coastguard Worker       block_iter->Seek(k);
230*9507f98cSAndroid Build Coastguard Worker       if (block_iter->Valid()) {
231*9507f98cSAndroid Build Coastguard Worker         (*handle_result)(arg, block_iter->key(), block_iter->value());
232*9507f98cSAndroid Build Coastguard Worker       }
233*9507f98cSAndroid Build Coastguard Worker       s = block_iter->status();
234*9507f98cSAndroid Build Coastguard Worker       delete block_iter;
235*9507f98cSAndroid Build Coastguard Worker     }
236*9507f98cSAndroid Build Coastguard Worker   }
237*9507f98cSAndroid Build Coastguard Worker   if (s.ok()) {
238*9507f98cSAndroid Build Coastguard Worker     s = iiter->status();
239*9507f98cSAndroid Build Coastguard Worker   }
240*9507f98cSAndroid Build Coastguard Worker   delete iiter;
241*9507f98cSAndroid Build Coastguard Worker   return s;
242*9507f98cSAndroid Build Coastguard Worker }
243*9507f98cSAndroid Build Coastguard Worker 
ApproximateOffsetOf(const Slice & key) const244*9507f98cSAndroid Build Coastguard Worker uint64_t Table::ApproximateOffsetOf(const Slice& key) const {
245*9507f98cSAndroid Build Coastguard Worker   Iterator* index_iter =
246*9507f98cSAndroid Build Coastguard Worker       rep_->index_block->NewIterator(rep_->options.comparator);
247*9507f98cSAndroid Build Coastguard Worker   index_iter->Seek(key);
248*9507f98cSAndroid Build Coastguard Worker   uint64_t result;
249*9507f98cSAndroid Build Coastguard Worker   if (index_iter->Valid()) {
250*9507f98cSAndroid Build Coastguard Worker     BlockHandle handle;
251*9507f98cSAndroid Build Coastguard Worker     Slice input = index_iter->value();
252*9507f98cSAndroid Build Coastguard Worker     Status s = handle.DecodeFrom(&input);
253*9507f98cSAndroid Build Coastguard Worker     if (s.ok()) {
254*9507f98cSAndroid Build Coastguard Worker       result = handle.offset();
255*9507f98cSAndroid Build Coastguard Worker     } else {
256*9507f98cSAndroid Build Coastguard Worker       // Strange: we can't decode the block handle in the index block.
257*9507f98cSAndroid Build Coastguard Worker       // We'll just return the offset of the metaindex block, which is
258*9507f98cSAndroid Build Coastguard Worker       // close to the whole file size for this case.
259*9507f98cSAndroid Build Coastguard Worker       result = rep_->metaindex_handle.offset();
260*9507f98cSAndroid Build Coastguard Worker     }
261*9507f98cSAndroid Build Coastguard Worker   } else {
262*9507f98cSAndroid Build Coastguard Worker     // key is past the last key in the file.  Approximate the offset
263*9507f98cSAndroid Build Coastguard Worker     // by returning the offset of the metaindex block (which is
264*9507f98cSAndroid Build Coastguard Worker     // right near the end of the file).
265*9507f98cSAndroid Build Coastguard Worker     result = rep_->metaindex_handle.offset();
266*9507f98cSAndroid Build Coastguard Worker   }
267*9507f98cSAndroid Build Coastguard Worker   delete index_iter;
268*9507f98cSAndroid Build Coastguard Worker   return result;
269*9507f98cSAndroid Build Coastguard Worker }
270*9507f98cSAndroid Build Coastguard Worker 
271*9507f98cSAndroid Build Coastguard Worker }  // namespace leveldb
272