1*9507f98cSAndroid Build Coastguard Worker // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2*9507f98cSAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*9507f98cSAndroid Build Coastguard Worker // found in the LICENSE file. See the AUTHORS file for names of contributors.
4*9507f98cSAndroid Build Coastguard Worker
5*9507f98cSAndroid Build Coastguard Worker #include "leveldb/table.h"
6*9507f98cSAndroid Build Coastguard Worker
7*9507f98cSAndroid Build Coastguard Worker #include "leveldb/cache.h"
8*9507f98cSAndroid Build Coastguard Worker #include "leveldb/comparator.h"
9*9507f98cSAndroid Build Coastguard Worker #include "leveldb/env.h"
10*9507f98cSAndroid Build Coastguard Worker #include "leveldb/filter_policy.h"
11*9507f98cSAndroid Build Coastguard Worker #include "leveldb/options.h"
12*9507f98cSAndroid Build Coastguard Worker #include "table/block.h"
13*9507f98cSAndroid Build Coastguard Worker #include "table/filter_block.h"
14*9507f98cSAndroid Build Coastguard Worker #include "table/format.h"
15*9507f98cSAndroid Build Coastguard Worker #include "table/two_level_iterator.h"
16*9507f98cSAndroid Build Coastguard Worker #include "util/coding.h"
17*9507f98cSAndroid Build Coastguard Worker
18*9507f98cSAndroid Build Coastguard Worker namespace leveldb {
19*9507f98cSAndroid Build Coastguard Worker
20*9507f98cSAndroid Build Coastguard Worker struct Table::Rep {
~Repleveldb::Table::Rep21*9507f98cSAndroid Build Coastguard Worker ~Rep() {
22*9507f98cSAndroid Build Coastguard Worker delete filter;
23*9507f98cSAndroid Build Coastguard Worker delete[] filter_data;
24*9507f98cSAndroid Build Coastguard Worker delete index_block;
25*9507f98cSAndroid Build Coastguard Worker }
26*9507f98cSAndroid Build Coastguard Worker
27*9507f98cSAndroid Build Coastguard Worker Options options;
28*9507f98cSAndroid Build Coastguard Worker Status status;
29*9507f98cSAndroid Build Coastguard Worker RandomAccessFile* file;
30*9507f98cSAndroid Build Coastguard Worker uint64_t cache_id;
31*9507f98cSAndroid Build Coastguard Worker FilterBlockReader* filter;
32*9507f98cSAndroid Build Coastguard Worker const char* filter_data;
33*9507f98cSAndroid Build Coastguard Worker
34*9507f98cSAndroid Build Coastguard Worker BlockHandle metaindex_handle; // Handle to metaindex_block: saved from footer
35*9507f98cSAndroid Build Coastguard Worker Block* index_block;
36*9507f98cSAndroid Build Coastguard Worker };
37*9507f98cSAndroid Build Coastguard Worker
Open(const Options & options,RandomAccessFile * file,uint64_t size,Table ** table)38*9507f98cSAndroid Build Coastguard Worker Status Table::Open(const Options& options, RandomAccessFile* file,
39*9507f98cSAndroid Build Coastguard Worker uint64_t size, Table** table) {
40*9507f98cSAndroid Build Coastguard Worker *table = nullptr;
41*9507f98cSAndroid Build Coastguard Worker if (size < Footer::kEncodedLength) {
42*9507f98cSAndroid Build Coastguard Worker return Status::Corruption("file is too short to be an sstable");
43*9507f98cSAndroid Build Coastguard Worker }
44*9507f98cSAndroid Build Coastguard Worker
45*9507f98cSAndroid Build Coastguard Worker char footer_space[Footer::kEncodedLength];
46*9507f98cSAndroid Build Coastguard Worker Slice footer_input;
47*9507f98cSAndroid Build Coastguard Worker Status s = file->Read(size - Footer::kEncodedLength, Footer::kEncodedLength,
48*9507f98cSAndroid Build Coastguard Worker &footer_input, footer_space);
49*9507f98cSAndroid Build Coastguard Worker if (!s.ok()) return s;
50*9507f98cSAndroid Build Coastguard Worker
51*9507f98cSAndroid Build Coastguard Worker Footer footer;
52*9507f98cSAndroid Build Coastguard Worker s = footer.DecodeFrom(&footer_input);
53*9507f98cSAndroid Build Coastguard Worker if (!s.ok()) return s;
54*9507f98cSAndroid Build Coastguard Worker
55*9507f98cSAndroid Build Coastguard Worker // Read the index block
56*9507f98cSAndroid Build Coastguard Worker BlockContents index_block_contents;
57*9507f98cSAndroid Build Coastguard Worker ReadOptions opt;
58*9507f98cSAndroid Build Coastguard Worker if (options.paranoid_checks) {
59*9507f98cSAndroid Build Coastguard Worker opt.verify_checksums = true;
60*9507f98cSAndroid Build Coastguard Worker }
61*9507f98cSAndroid Build Coastguard Worker s = ReadBlock(file, opt, footer.index_handle(), &index_block_contents);
62*9507f98cSAndroid Build Coastguard Worker
63*9507f98cSAndroid Build Coastguard Worker if (s.ok()) {
64*9507f98cSAndroid Build Coastguard Worker // We've successfully read the footer and the index block: we're
65*9507f98cSAndroid Build Coastguard Worker // ready to serve requests.
66*9507f98cSAndroid Build Coastguard Worker Block* index_block = new Block(index_block_contents);
67*9507f98cSAndroid Build Coastguard Worker Rep* rep = new Table::Rep;
68*9507f98cSAndroid Build Coastguard Worker rep->options = options;
69*9507f98cSAndroid Build Coastguard Worker rep->file = file;
70*9507f98cSAndroid Build Coastguard Worker rep->metaindex_handle = footer.metaindex_handle();
71*9507f98cSAndroid Build Coastguard Worker rep->index_block = index_block;
72*9507f98cSAndroid Build Coastguard Worker rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0);
73*9507f98cSAndroid Build Coastguard Worker rep->filter_data = nullptr;
74*9507f98cSAndroid Build Coastguard Worker rep->filter = nullptr;
75*9507f98cSAndroid Build Coastguard Worker *table = new Table(rep);
76*9507f98cSAndroid Build Coastguard Worker (*table)->ReadMeta(footer);
77*9507f98cSAndroid Build Coastguard Worker }
78*9507f98cSAndroid Build Coastguard Worker
79*9507f98cSAndroid Build Coastguard Worker return s;
80*9507f98cSAndroid Build Coastguard Worker }
81*9507f98cSAndroid Build Coastguard Worker
ReadMeta(const Footer & footer)82*9507f98cSAndroid Build Coastguard Worker void Table::ReadMeta(const Footer& footer) {
83*9507f98cSAndroid Build Coastguard Worker if (rep_->options.filter_policy == nullptr) {
84*9507f98cSAndroid Build Coastguard Worker return; // Do not need any metadata
85*9507f98cSAndroid Build Coastguard Worker }
86*9507f98cSAndroid Build Coastguard Worker
87*9507f98cSAndroid Build Coastguard Worker // TODO(sanjay): Skip this if footer.metaindex_handle() size indicates
88*9507f98cSAndroid Build Coastguard Worker // it is an empty block.
89*9507f98cSAndroid Build Coastguard Worker ReadOptions opt;
90*9507f98cSAndroid Build Coastguard Worker if (rep_->options.paranoid_checks) {
91*9507f98cSAndroid Build Coastguard Worker opt.verify_checksums = true;
92*9507f98cSAndroid Build Coastguard Worker }
93*9507f98cSAndroid Build Coastguard Worker BlockContents contents;
94*9507f98cSAndroid Build Coastguard Worker if (!ReadBlock(rep_->file, opt, footer.metaindex_handle(), &contents).ok()) {
95*9507f98cSAndroid Build Coastguard Worker // Do not propagate errors since meta info is not needed for operation
96*9507f98cSAndroid Build Coastguard Worker return;
97*9507f98cSAndroid Build Coastguard Worker }
98*9507f98cSAndroid Build Coastguard Worker Block* meta = new Block(contents);
99*9507f98cSAndroid Build Coastguard Worker
100*9507f98cSAndroid Build Coastguard Worker Iterator* iter = meta->NewIterator(BytewiseComparator());
101*9507f98cSAndroid Build Coastguard Worker std::string key = "filter.";
102*9507f98cSAndroid Build Coastguard Worker key.append(rep_->options.filter_policy->Name());
103*9507f98cSAndroid Build Coastguard Worker iter->Seek(key);
104*9507f98cSAndroid Build Coastguard Worker if (iter->Valid() && iter->key() == Slice(key)) {
105*9507f98cSAndroid Build Coastguard Worker ReadFilter(iter->value());
106*9507f98cSAndroid Build Coastguard Worker }
107*9507f98cSAndroid Build Coastguard Worker delete iter;
108*9507f98cSAndroid Build Coastguard Worker delete meta;
109*9507f98cSAndroid Build Coastguard Worker }
110*9507f98cSAndroid Build Coastguard Worker
ReadFilter(const Slice & filter_handle_value)111*9507f98cSAndroid Build Coastguard Worker void Table::ReadFilter(const Slice& filter_handle_value) {
112*9507f98cSAndroid Build Coastguard Worker Slice v = filter_handle_value;
113*9507f98cSAndroid Build Coastguard Worker BlockHandle filter_handle;
114*9507f98cSAndroid Build Coastguard Worker if (!filter_handle.DecodeFrom(&v).ok()) {
115*9507f98cSAndroid Build Coastguard Worker return;
116*9507f98cSAndroid Build Coastguard Worker }
117*9507f98cSAndroid Build Coastguard Worker
118*9507f98cSAndroid Build Coastguard Worker // We might want to unify with ReadBlock() if we start
119*9507f98cSAndroid Build Coastguard Worker // requiring checksum verification in Table::Open.
120*9507f98cSAndroid Build Coastguard Worker ReadOptions opt;
121*9507f98cSAndroid Build Coastguard Worker if (rep_->options.paranoid_checks) {
122*9507f98cSAndroid Build Coastguard Worker opt.verify_checksums = true;
123*9507f98cSAndroid Build Coastguard Worker }
124*9507f98cSAndroid Build Coastguard Worker BlockContents block;
125*9507f98cSAndroid Build Coastguard Worker if (!ReadBlock(rep_->file, opt, filter_handle, &block).ok()) {
126*9507f98cSAndroid Build Coastguard Worker return;
127*9507f98cSAndroid Build Coastguard Worker }
128*9507f98cSAndroid Build Coastguard Worker if (block.heap_allocated) {
129*9507f98cSAndroid Build Coastguard Worker rep_->filter_data = block.data.data(); // Will need to delete later
130*9507f98cSAndroid Build Coastguard Worker }
131*9507f98cSAndroid Build Coastguard Worker rep_->filter = new FilterBlockReader(rep_->options.filter_policy, block.data);
132*9507f98cSAndroid Build Coastguard Worker }
133*9507f98cSAndroid Build Coastguard Worker
~Table()134*9507f98cSAndroid Build Coastguard Worker Table::~Table() { delete rep_; }
135*9507f98cSAndroid Build Coastguard Worker
DeleteBlock(void * arg,void * ignored)136*9507f98cSAndroid Build Coastguard Worker static void DeleteBlock(void* arg, void* ignored) {
137*9507f98cSAndroid Build Coastguard Worker delete reinterpret_cast<Block*>(arg);
138*9507f98cSAndroid Build Coastguard Worker }
139*9507f98cSAndroid Build Coastguard Worker
DeleteCachedBlock(const Slice & key,void * value)140*9507f98cSAndroid Build Coastguard Worker static void DeleteCachedBlock(const Slice& key, void* value) {
141*9507f98cSAndroid Build Coastguard Worker Block* block = reinterpret_cast<Block*>(value);
142*9507f98cSAndroid Build Coastguard Worker delete block;
143*9507f98cSAndroid Build Coastguard Worker }
144*9507f98cSAndroid Build Coastguard Worker
ReleaseBlock(void * arg,void * h)145*9507f98cSAndroid Build Coastguard Worker static void ReleaseBlock(void* arg, void* h) {
146*9507f98cSAndroid Build Coastguard Worker Cache* cache = reinterpret_cast<Cache*>(arg);
147*9507f98cSAndroid Build Coastguard Worker Cache::Handle* handle = reinterpret_cast<Cache::Handle*>(h);
148*9507f98cSAndroid Build Coastguard Worker cache->Release(handle);
149*9507f98cSAndroid Build Coastguard Worker }
150*9507f98cSAndroid Build Coastguard Worker
151*9507f98cSAndroid Build Coastguard Worker // Convert an index iterator value (i.e., an encoded BlockHandle)
152*9507f98cSAndroid Build Coastguard Worker // into an iterator over the contents of the corresponding block.
BlockReader(void * arg,const ReadOptions & options,const Slice & index_value)153*9507f98cSAndroid Build Coastguard Worker Iterator* Table::BlockReader(void* arg, const ReadOptions& options,
154*9507f98cSAndroid Build Coastguard Worker const Slice& index_value) {
155*9507f98cSAndroid Build Coastguard Worker Table* table = reinterpret_cast<Table*>(arg);
156*9507f98cSAndroid Build Coastguard Worker Cache* block_cache = table->rep_->options.block_cache;
157*9507f98cSAndroid Build Coastguard Worker Block* block = nullptr;
158*9507f98cSAndroid Build Coastguard Worker Cache::Handle* cache_handle = nullptr;
159*9507f98cSAndroid Build Coastguard Worker
160*9507f98cSAndroid Build Coastguard Worker BlockHandle handle;
161*9507f98cSAndroid Build Coastguard Worker Slice input = index_value;
162*9507f98cSAndroid Build Coastguard Worker Status s = handle.DecodeFrom(&input);
163*9507f98cSAndroid Build Coastguard Worker // We intentionally allow extra stuff in index_value so that we
164*9507f98cSAndroid Build Coastguard Worker // can add more features in the future.
165*9507f98cSAndroid Build Coastguard Worker
166*9507f98cSAndroid Build Coastguard Worker if (s.ok()) {
167*9507f98cSAndroid Build Coastguard Worker BlockContents contents;
168*9507f98cSAndroid Build Coastguard Worker if (block_cache != nullptr) {
169*9507f98cSAndroid Build Coastguard Worker char cache_key_buffer[16];
170*9507f98cSAndroid Build Coastguard Worker EncodeFixed64(cache_key_buffer, table->rep_->cache_id);
171*9507f98cSAndroid Build Coastguard Worker EncodeFixed64(cache_key_buffer + 8, handle.offset());
172*9507f98cSAndroid Build Coastguard Worker Slice key(cache_key_buffer, sizeof(cache_key_buffer));
173*9507f98cSAndroid Build Coastguard Worker cache_handle = block_cache->Lookup(key);
174*9507f98cSAndroid Build Coastguard Worker if (cache_handle != nullptr) {
175*9507f98cSAndroid Build Coastguard Worker block = reinterpret_cast<Block*>(block_cache->Value(cache_handle));
176*9507f98cSAndroid Build Coastguard Worker } else {
177*9507f98cSAndroid Build Coastguard Worker s = ReadBlock(table->rep_->file, options, handle, &contents);
178*9507f98cSAndroid Build Coastguard Worker if (s.ok()) {
179*9507f98cSAndroid Build Coastguard Worker block = new Block(contents);
180*9507f98cSAndroid Build Coastguard Worker if (contents.cachable && options.fill_cache) {
181*9507f98cSAndroid Build Coastguard Worker cache_handle = block_cache->Insert(key, block, block->size(),
182*9507f98cSAndroid Build Coastguard Worker &DeleteCachedBlock);
183*9507f98cSAndroid Build Coastguard Worker }
184*9507f98cSAndroid Build Coastguard Worker }
185*9507f98cSAndroid Build Coastguard Worker }
186*9507f98cSAndroid Build Coastguard Worker } else {
187*9507f98cSAndroid Build Coastguard Worker s = ReadBlock(table->rep_->file, options, handle, &contents);
188*9507f98cSAndroid Build Coastguard Worker if (s.ok()) {
189*9507f98cSAndroid Build Coastguard Worker block = new Block(contents);
190*9507f98cSAndroid Build Coastguard Worker }
191*9507f98cSAndroid Build Coastguard Worker }
192*9507f98cSAndroid Build Coastguard Worker }
193*9507f98cSAndroid Build Coastguard Worker
194*9507f98cSAndroid Build Coastguard Worker Iterator* iter;
195*9507f98cSAndroid Build Coastguard Worker if (block != nullptr) {
196*9507f98cSAndroid Build Coastguard Worker iter = block->NewIterator(table->rep_->options.comparator);
197*9507f98cSAndroid Build Coastguard Worker if (cache_handle == nullptr) {
198*9507f98cSAndroid Build Coastguard Worker iter->RegisterCleanup(&DeleteBlock, block, nullptr);
199*9507f98cSAndroid Build Coastguard Worker } else {
200*9507f98cSAndroid Build Coastguard Worker iter->RegisterCleanup(&ReleaseBlock, block_cache, cache_handle);
201*9507f98cSAndroid Build Coastguard Worker }
202*9507f98cSAndroid Build Coastguard Worker } else {
203*9507f98cSAndroid Build Coastguard Worker iter = NewErrorIterator(s);
204*9507f98cSAndroid Build Coastguard Worker }
205*9507f98cSAndroid Build Coastguard Worker return iter;
206*9507f98cSAndroid Build Coastguard Worker }
207*9507f98cSAndroid Build Coastguard Worker
NewIterator(const ReadOptions & options) const208*9507f98cSAndroid Build Coastguard Worker Iterator* Table::NewIterator(const ReadOptions& options) const {
209*9507f98cSAndroid Build Coastguard Worker return NewTwoLevelIterator(
210*9507f98cSAndroid Build Coastguard Worker rep_->index_block->NewIterator(rep_->options.comparator),
211*9507f98cSAndroid Build Coastguard Worker &Table::BlockReader, const_cast<Table*>(this), options);
212*9507f98cSAndroid Build Coastguard Worker }
213*9507f98cSAndroid Build Coastguard Worker
InternalGet(const ReadOptions & options,const Slice & k,void * arg,void (* handle_result)(void *,const Slice &,const Slice &))214*9507f98cSAndroid Build Coastguard Worker Status Table::InternalGet(const ReadOptions& options, const Slice& k, void* arg,
215*9507f98cSAndroid Build Coastguard Worker void (*handle_result)(void*, const Slice&,
216*9507f98cSAndroid Build Coastguard Worker const Slice&)) {
217*9507f98cSAndroid Build Coastguard Worker Status s;
218*9507f98cSAndroid Build Coastguard Worker Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
219*9507f98cSAndroid Build Coastguard Worker iiter->Seek(k);
220*9507f98cSAndroid Build Coastguard Worker if (iiter->Valid()) {
221*9507f98cSAndroid Build Coastguard Worker Slice handle_value = iiter->value();
222*9507f98cSAndroid Build Coastguard Worker FilterBlockReader* filter = rep_->filter;
223*9507f98cSAndroid Build Coastguard Worker BlockHandle handle;
224*9507f98cSAndroid Build Coastguard Worker if (filter != nullptr && handle.DecodeFrom(&handle_value).ok() &&
225*9507f98cSAndroid Build Coastguard Worker !filter->KeyMayMatch(handle.offset(), k)) {
226*9507f98cSAndroid Build Coastguard Worker // Not found
227*9507f98cSAndroid Build Coastguard Worker } else {
228*9507f98cSAndroid Build Coastguard Worker Iterator* block_iter = BlockReader(this, options, iiter->value());
229*9507f98cSAndroid Build Coastguard Worker block_iter->Seek(k);
230*9507f98cSAndroid Build Coastguard Worker if (block_iter->Valid()) {
231*9507f98cSAndroid Build Coastguard Worker (*handle_result)(arg, block_iter->key(), block_iter->value());
232*9507f98cSAndroid Build Coastguard Worker }
233*9507f98cSAndroid Build Coastguard Worker s = block_iter->status();
234*9507f98cSAndroid Build Coastguard Worker delete block_iter;
235*9507f98cSAndroid Build Coastguard Worker }
236*9507f98cSAndroid Build Coastguard Worker }
237*9507f98cSAndroid Build Coastguard Worker if (s.ok()) {
238*9507f98cSAndroid Build Coastguard Worker s = iiter->status();
239*9507f98cSAndroid Build Coastguard Worker }
240*9507f98cSAndroid Build Coastguard Worker delete iiter;
241*9507f98cSAndroid Build Coastguard Worker return s;
242*9507f98cSAndroid Build Coastguard Worker }
243*9507f98cSAndroid Build Coastguard Worker
ApproximateOffsetOf(const Slice & key) const244*9507f98cSAndroid Build Coastguard Worker uint64_t Table::ApproximateOffsetOf(const Slice& key) const {
245*9507f98cSAndroid Build Coastguard Worker Iterator* index_iter =
246*9507f98cSAndroid Build Coastguard Worker rep_->index_block->NewIterator(rep_->options.comparator);
247*9507f98cSAndroid Build Coastguard Worker index_iter->Seek(key);
248*9507f98cSAndroid Build Coastguard Worker uint64_t result;
249*9507f98cSAndroid Build Coastguard Worker if (index_iter->Valid()) {
250*9507f98cSAndroid Build Coastguard Worker BlockHandle handle;
251*9507f98cSAndroid Build Coastguard Worker Slice input = index_iter->value();
252*9507f98cSAndroid Build Coastguard Worker Status s = handle.DecodeFrom(&input);
253*9507f98cSAndroid Build Coastguard Worker if (s.ok()) {
254*9507f98cSAndroid Build Coastguard Worker result = handle.offset();
255*9507f98cSAndroid Build Coastguard Worker } else {
256*9507f98cSAndroid Build Coastguard Worker // Strange: we can't decode the block handle in the index block.
257*9507f98cSAndroid Build Coastguard Worker // We'll just return the offset of the metaindex block, which is
258*9507f98cSAndroid Build Coastguard Worker // close to the whole file size for this case.
259*9507f98cSAndroid Build Coastguard Worker result = rep_->metaindex_handle.offset();
260*9507f98cSAndroid Build Coastguard Worker }
261*9507f98cSAndroid Build Coastguard Worker } else {
262*9507f98cSAndroid Build Coastguard Worker // key is past the last key in the file. Approximate the offset
263*9507f98cSAndroid Build Coastguard Worker // by returning the offset of the metaindex block (which is
264*9507f98cSAndroid Build Coastguard Worker // right near the end of the file).
265*9507f98cSAndroid Build Coastguard Worker result = rep_->metaindex_handle.offset();
266*9507f98cSAndroid Build Coastguard Worker }
267*9507f98cSAndroid Build Coastguard Worker delete index_iter;
268*9507f98cSAndroid Build Coastguard Worker return result;
269*9507f98cSAndroid Build Coastguard Worker }
270*9507f98cSAndroid Build Coastguard Worker
271*9507f98cSAndroid Build Coastguard Worker } // namespace leveldb
272