Branch data Line data Source code
1 : : /* chert_table.h: Btree implementation
2 : : *
3 : : * Copyright 1999,2000,2001 BrightStation PLC
4 : : * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010 Olly Betts
5 : : * Copyright 2008 Lemur Consulting Ltd
6 : : *
7 : : * This program is free software; you can redistribute it and/or
8 : : * modify it under the terms of the GNU General Public License as
9 : : * published by the Free Software Foundation; either version 2 of the
10 : : * License, or (at your option) any later version.
11 : : *
12 : : * This program is distributed in the hope that it will be useful,
13 : : * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 : : * GNU General Public License for more details.
16 : : *
17 : : * You should have received a copy of the GNU General Public License
18 : : * along with this program; if not, write to the Free Software
19 : : * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 : : * USA
21 : : */
22 : :
23 : : #ifndef OM_HGUARD_CHERT_TABLE_H
24 : : #define OM_HGUARD_CHERT_TABLE_H
25 : :
26 : : #include <xapian/error.h>
27 : : #include <xapian/visibility.h>
28 : :
29 : : #include "chert_types.h"
30 : : #include "chert_btreebase.h"
31 : : #include "chert_cursor.h"
32 : :
33 : : #include "noreturn.h"
34 : : #include "omassert.h"
35 : : #include "str.h"
36 : : #include "stringutils.h"
37 : : #include "unaligned.h"
38 : :
39 : : #include <algorithm>
40 : : #include <string>
41 : :
42 : : #include <zlib.h>
43 : :
44 : : #define DONT_COMPRESS -1
45 : :
46 : : /** The largest possible value of a key_len.
47 : : *
48 : : * This gives the upper limit of the size of a key that may be stored in the
49 : : * B-tree (252 bytes with the present implementation).
50 : : */
51 : : #define CHERT_BTREE_MAX_KEY_LEN 252
52 : :
53 : : /** Even for items of at maximum size, it must be possible to get this number of
54 : : * items in a block */
55 : : #define BLOCK_CAPACITY 4
56 : :
57 : : // FIXME: This named constant probably isn't used everywhere it should be...
58 : : #define BYTES_PER_BLOCK_NUMBER 4
59 : :
60 : : /* The B-tree blocks have a number of internal lengths and offsets held in 1, 2
61 : : or 4 bytes. To make the coding a little clearer,
62 : : we use for
63 : : ------ ---
64 : : K1 the 1 byte length of key
65 : : I2 the 2 byte length of an item (key-tag pair)
66 : : D2 the 2 byte offset to the item from the directory
67 : : C2 the 2 byte counter that ends each key and begins each tag
68 : : */
69 : :
70 : : #define K1 1
71 : : #define I2 2
72 : : #define D2 2
73 : : #define C2 2
74 : :
75 : : /* and when getting K1 or setting D2, we use getK, setD defined as: */
76 : :
77 : : #define getK(p, c) getint1(p, c)
78 : : #define setD(p, c, x) setint2(p, c, x)
79 : :
80 : : /* if you've been reading the comments from the top, the next four procedures
81 : : will not cause any headaches.
82 : :
83 : : Recall that item has this form:
84 : :
85 : : i k
86 : : | |
87 : : I K key x C tag
88 : : <--K-->
89 : : <------I------>
90 : :
91 : :
92 : : item_of(p, c) returns i, the address of the item at block address p,
93 : : directory offset c,
94 : :
95 : : component_of(p, c) returns the number marked 'x' above,
96 : :
97 : : components_of(p, c) returns the number marked 'C' above,
98 : : */
99 : :
100 : : #define REVISION(b) static_cast<unsigned int>(getint4(b, 0))
101 : : #define GET_LEVEL(b) getint1(b, 4)
102 : : #define MAX_FREE(b) getint2(b, 5)
103 : : #define TOTAL_FREE(b) getint2(b, 7)
104 : : #define DIR_END(b) getint2(b, 9)
105 : : #define DIR_START 11
106 : :
107 : : #define SET_REVISION(b, x) setint4(b, 0, x)
108 : : #define SET_LEVEL(b, x) setint1(b, 4, x)
109 : : #define SET_MAX_FREE(b, x) setint2(b, 5, x)
110 : : #define SET_TOTAL_FREE(b, x) setint2(b, 7, x)
111 : : #define SET_DIR_END(b, x) setint2(b, 9, x)
112 : :
113 : : class XAPIAN_VISIBILITY_DEFAULT Key {
114 : : const byte *p;
115 : : public:
116 : 51181534 : explicit Key(const byte * p_) : p(p_) { }
117 : 33698 : const byte * get_address() const { return p; }
118 : 2124502 : void read(std::string * key) const {
119 : 2124502 : key->assign(reinterpret_cast<const char *>(p + K1), length());
120 : 2124502 : }
121 : : bool operator==(Key key2) const;
122 : : bool operator!=(Key key2) const { return !(*this == key2); }
123 : : bool operator<(Key key2) const;
124 : 1 : bool operator>=(Key key2) const { return !(*this < key2); }
125 : : bool operator>(Key key2) const { return key2 < *this; }
126 : 5705316 : bool operator<=(Key key2) const { return !(key2 < *this); }
127 : 89637144 : int length() const {
128 : : AssertRel(getK(p, 0),>=,3);
129 : 89637144 : return getK(p, 0) - C2 - K1;
130 : : }
131 : 172724 : char operator[](size_t i) const {
132 : : AssertRel(i,<,(size_t)length());
133 : 172724 : return p[i + K1];
134 : : }
135 : : };
136 : :
137 : : // Item_wr wants to be "Item with non-const p and more methods" - we can't
138 : : // achieve that nicely with inheritance, so we use a template base class.
139 : : template <class T> class Item_base {
140 : : protected:
141 : : T p;
142 : : public:
143 : : /* Item from block address and offset to item pointer */
144 : 69806851 : Item_base(T p_, int c) : p(p_ + getint2(p_, c)) { }
145 : 71629 : Item_base(T p_) : p(p_) { }
146 : 3141610 : T get_address() const { return p; }
147 : : /** I in diagram above. */
148 : 21328279 : int size() const {
149 : 21328279 : int item_size = getint2(p, 0) & 0x7fff;
150 : : AssertRel(item_size,>=,5);
151 : 21328279 : return item_size;
152 : : }
153 : 2698122 : bool get_compressed() const { return *p & 0x80; }
154 : 5937266 : int component_of() const {
155 : 5937266 : return getint2(p, getK(p, I2) + I2 - C2);
156 : : }
157 : 2783206 : int components_of() const {
158 : 2783206 : return getint2(p, getK(p, I2) + I2);
159 : : }
160 : 51181534 : Key key() const { return Key(p + I2); }
161 : 6965416 : void append_chunk(std::string * tag) const {
162 : : /* number of bytes to extract from current component */
163 : 6965416 : int cd = getK(p, I2) + I2 + C2;
164 : 6965416 : int l = size() - cd;
165 : 6965416 : tag->append(reinterpret_cast<const char *>(p + cd), l);
166 : 6965416 : }
167 : : /** Get this item's tag as a block number (this block should not be at
168 : : * level 0).
169 : : */
170 : 9514946 : uint4 block_given_by() const {
171 : : AssertRel(size(),>=,BYTES_PER_BLOCK_NUMBER);
172 : 9514946 : return getint4(p, size() - BYTES_PER_BLOCK_NUMBER);
173 : : }
174 : : };
175 : :
176 : : class Item : public Item_base<const byte *> {
177 : : public:
178 : : /* Item from block address and offset to item pointer */
179 : 69805954 : Item(const byte * p_, int c) : Item_base<const byte *>(p_, c) { }
180 : : Item(const byte * p_) : Item_base<const byte *>(p_) { }
181 : : };
182 : :
183 : : class Item_wr : public Item_base<byte *> {
184 : 4594259 : void set_key_len(int x) { setint1(p, I2, x); }
185 : : public:
186 : : /* Item_wr from block address and offset to item pointer */
187 : 897 : Item_wr(byte * p_, int c) : Item_base<byte *>(p_, c) { }
188 : 71629 : Item_wr(byte * p_) : Item_base<byte *>(p_) { }
189 : 5954653 : void set_component_of(int i) {
190 : 5954653 : setint2(p, getK(p, I2) + I2 - C2, i);
191 : 5954653 : }
192 : 1355102 : void set_components_of(int m) {
193 : 1355102 : setint2(p, getK(p, I2) + I2, m);
194 : 1355102 : }
195 : : // Takes size as we may be truncating newkey.
196 : 16725 : void set_key_and_block(Key newkey, int truncate_size, uint4 n) {
197 : 16725 : int i = truncate_size;
198 : : // Read the length now because we may be copying the key over itself.
199 : : // FIXME that's stupid! sort this out
200 : 16725 : int newkey_len = newkey.length();
201 : : AssertRel(i,<=,newkey_len);
202 : 16725 : int newsize = I2 + K1 + i + C2;
203 : : // Item size (BYTES_PER_BLOCK_NUMBER since tag contains block number)
204 : 16725 : setint2(p, 0, newsize + BYTES_PER_BLOCK_NUMBER);
205 : : // Key size
206 : 16725 : setint1(p, I2, newsize - I2);
207 : : // Copy the main part of the key, possibly truncating.
208 : 16725 : std::memmove(p + I2 + K1, newkey.get_address() + K1, i);
209 : : // Copy the count part.
210 : 16725 : std::memmove(p + I2 + K1 + i, newkey.get_address() + K1 + newkey_len, C2);
211 : : // Set tag contents to block number
212 : : // set_block_given_by(n);
213 : 16725 : setint4(p, newsize, n);
214 : 16725 : }
215 : :
216 : : /** Set this item's tag to point to block n (this block should not be at
217 : : * level 0).
218 : : */
219 : 897 : void set_block_given_by(uint4 n) {
220 : 897 : setint4(p, size() - BYTES_PER_BLOCK_NUMBER, n);
221 : 897 : }
222 : 1363770 : void set_size(int l) {
223 : : AssertRel(l,>=,5);
224 : 1363770 : setint2(p, 0, l);
225 : 1363770 : }
226 : : /** Form an item with a null key and with block number n in the tag.
227 : : */
228 : 330 : void form_null_key(uint4 n) {
229 : 330 : setint4(p, I2 + K1, n);
230 : 330 : set_key_len(K1); /* null key */
231 : 330 : set_size(I2 + K1 + BYTES_PER_BLOCK_NUMBER); /* total length */
232 : 330 : }
233 : 4590854 : void form_key(const std::string & key_) {
234 : 4590854 : std::string::size_type key_len = key_.length();
235 [ + + ]: 4590854 : if (key_len > CHERT_BTREE_MAX_KEY_LEN) {
236 : : // We check term length when a term is added to a document but
237 : : // chert doubles zero bytes, so this can still happen for terms
238 : : // which contain one or more zero bytes.
239 : 8 : std::string msg("Key too long: length was ");
240 : 8 : msg += str(key_len);
241 : : msg += " bytes, maximum length of a key is "
242 : 8 : STRINGIZE(CHERT_BTREE_MAX_KEY_LEN) " bytes";
243 : 16 : throw Xapian::InvalidArgumentError(msg);
244 : : }
245 : :
246 : 4590846 : set_key_len(key_len + K1 + C2);
247 : 4590846 : std::memmove(p + I2 + K1, key_.data(), key_len);
248 : 4590846 : set_component_of(1);
249 : 4590846 : }
250 : : // FIXME passing cd here is icky
251 : 1360357 : void set_tag(int cd, const char *start, int len, bool compressed) {
252 : 1360357 : std::memmove(p + cd, start, len);
253 : 1360357 : set_size(cd + len);
254 [ + + ]: 1360357 : if (compressed) *p |= 0x80;
255 : 1360357 : }
256 : 3083 : void fake_root_item() {
257 : 3083 : set_key_len(K1 + C2); // null key length
258 : 3083 : set_size(I2 + K1 + 2 * C2); // length of the item
259 : 3083 : set_component_of(1);
260 : 3083 : set_components_of(1);
261 : 3083 : }
262 : : };
263 : :
264 : : // Allow for BTREE_CURSOR_LEVELS levels in the B-tree.
265 : : // With 10, overflow is practically impossible
266 : : // FIXME: but we want it to be completely impossible...
267 : : #define BTREE_CURSOR_LEVELS 10
268 : :
269 : : /** Class managing a Btree table in a Chert database.
270 : : *
271 : : * A table is a store holding a set of key/tag pairs.
272 : : *
273 : : * A key is used to access a block of data in a chert table.
274 : : *
275 : : * Keys are of limited length.
276 : : *
277 : : * Keys may not be empty (each Btree has a special empty key for internal use).
278 : : *
279 : : * A tag is a piece of data associated with a given key. The contents
280 : : * of the tag are opaque to the Btree.
281 : : *
282 : : * Tags may be of arbitrary length (the Btree imposes a very large limit).
283 : : * Note though that they will be loaded into memory in their entirety, so
284 : : * should not be permitted to grow without bound in normal usage.
285 : : *
286 : : * Tags which are null strings _are_ valid, and are different from a
287 : : * tag simply not being in the table.
288 : : */
289 : : class XAPIAN_VISIBILITY_DEFAULT ChertTable {
290 : : friend class ChertCursor; /* Should probably fix this. */
291 : : private:
292 : : /// Copying not allowed
293 : : ChertTable(const ChertTable &);
294 : :
295 : : /// Assignment not allowed
296 : : ChertTable & operator=(const ChertTable &);
297 : :
298 : : /// Return true if there are no entries in the table.
299 : : bool really_empty() const;
300 : :
301 : : public:
302 : : /** Create a new Btree object.
303 : : *
304 : : * This does not create the table on disk - the create_and_open()
305 : : * method must be called to create the table on disk.
306 : : *
307 : : * This also does not open the table - either the create_and_open()
308 : : * or open() methods must be called before use is made of the table.
309 : : *
310 : : * @param tablename_ The name of the table (used in changesets).
311 : : * @param path_ Path at which the table is stored.
312 : : * @param readonly_ whether to open the table for read only access.
313 : : * @param compress_strategy_ DONT_COMPRESS, Z_DEFAULT_STRATEGY,
314 : : * Z_FILTERED, Z_HUFFMAN_ONLY, or Z_RLE.
315 : : * @param lazy If true, don't create the table until it's
316 : : * needed.
317 : : */
318 : : ChertTable(const char * tablename_, const std::string & path_,
319 : : bool readonly_, int compress_strategy_ = DONT_COMPRESS,
320 : : bool lazy = false);
321 : :
322 : : /** Close the Btree.
323 : : *
324 : : * Any outstanding changes (ie, changes made without commit() having
325 : : * subsequently been called) will be lost.
326 : : */
327 : : ~ChertTable();
328 : :
329 : : /** Close the Btree. This closes and frees any of the btree
330 : : * structures which have been created and opened.
331 : : *
332 : : * @param permanent If true, the Btree will not reopen on demand.
333 : : */
334 : : void close(bool permanent=false);
335 : :
336 : : /** Determine whether the btree exists on disk.
337 : : */
338 : : bool exists() const;
339 : :
340 : : /** Open the btree at the latest revision.
341 : : *
342 : : * @exception Xapian::DatabaseCorruptError will be thrown if the table
343 : : * is in a corrupt state.
344 : : * @exception Xapian::DatabaseOpeningError will be thrown if the table
345 : : * cannot be opened (but is not corrupt - eg, permission problems,
346 : : * not present, etc).
347 : : */
348 : : void open();
349 : :
350 : : /** Open the btree at a given revision.
351 : : *
352 : : * Like Btree::open, but try to open at the given revision number
353 : : * and fail if that isn't possible.
354 : : *
355 : : * @param revision_ - revision number to open.
356 : : *
357 : : * @return true if table is successfully opened at desired revision;
358 : : * false if table cannot be opened at desired revision (but
359 : : * table is otherwise consistent).
360 : : *
361 : : * @exception Xapian::DatabaseCorruptError will be thrown if the table
362 : : * is in a corrupt state.
363 : : * @exception Xapian::DatabaseOpeningError will be thrown if the table
364 : : * cannot be opened (but is not corrupt - eg, permission problems,
365 : : * not present, etc).
366 : : */
367 : : bool open(chert_revision_number_t revision_);
368 : :
369 : : /** Return true if this table is open.
370 : : *
371 : : * NB If the table is lazy and doesn't yet exist, returns false.
372 : : */
373 : 611948 : bool is_open() const { return handle >= 0; }
374 : :
375 : : /** Flush any outstanding changes to the DB file of the table.
376 : : *
377 : : * This must be called before commit, to ensure that the DB file is
378 : : * ready to be switched to a new version by the commit.
379 : : */
380 : : void flush_db();
381 : :
382 : : /** Commit any outstanding changes to the table.
383 : : *
384 : : * Commit changes made by calling add() and del() to the Btree.
385 : : *
386 : : * If an error occurs during the operation, this will be signalled
387 : : * by an exception. In case of error, changes made will not be
388 : : * committed to the Btree - they will be discarded.
389 : : *
390 : : * @param new_revision The new revision number to store. This must
391 : : * be greater than the latest revision number (see
392 : : * get_latest_revision_number()), or an exception will be
393 : : * thrown.
394 : : *
395 : : * @param changes_fd The file descriptor to write changes to.
396 : : * Defaults to -1, meaning no changes will be written.
397 : : */
398 : : void commit(chert_revision_number_t revision, int changes_fd = -1,
399 : : const std::string * changes_tail = NULL);
400 : :
401 : : /** Append the list of blocks changed to a changeset file.
402 : : *
403 : : * @param changes_fd The file descriptor to write changes to.
404 : : */
405 : : void write_changed_blocks(int changes_fd);
406 : :
407 : : /** Cancel any outstanding changes.
408 : : *
409 : : * This will discard any modifications which haven't been committed
410 : : * by calling commit().
411 : : */
412 : : void cancel();
413 : :
414 : : /** Read an entry from the table, if and only if it is exactly that
415 : : * being asked for.
416 : : *
417 : : * If the key is found in the table, then the tag is copied to @a
418 : : * tag. If the key is not found tag is left unchanged.
419 : : *
420 : : * The result is true iff the specified key is found in the Btree.
421 : : *
422 : : * @param key The key to look for in the table.
423 : : * @param tag A tag object to fill with the value if found.
424 : : *
425 : : * @return true if key is found in table,
426 : : * false if key is not found in table.
427 : : */
428 : : bool get_exact_entry(const std::string & key, std::string & tag) const;
429 : :
430 : : /** Check if a key exists in the Btree.
431 : : *
432 : : * This is just like get_exact_entry() except it doesn't read the tag
433 : : * value so is more efficient if you only want to check that the key
434 : : * exists.
435 : : *
436 : : * @param key The key to look for in the table.
437 : : *
438 : : * @return true if key is found in table,
439 : : * false if key is not found in table.
440 : : */
441 : : bool key_exists(const std::string &key) const;
442 : :
443 : : /** Read the tag value for the key pointed to by cursor C_.
444 : : *
445 : : * @param keep_compressed Don't uncompress the tag - e.g. useful
446 : : * if it's just being opaquely copied.
447 : : *
448 : : * @return true if current_tag holds compressed data (always
449 : : * false if keep_compressed was false).
450 : : */
451 : : bool read_tag(Cursor * C_, std::string *tag, bool keep_compressed) const;
452 : :
453 : : /** Add a key/tag pair to the table, replacing any existing pair with
454 : : * the same key.
455 : : *
456 : : * If an error occurs during the operation, an exception will be
457 : : * thrown.
458 : : *
459 : : * If key is empty, then the null item is replaced.
460 : : *
461 : : * e.g. btree.add("TODAY", "Mon 9 Oct 2000");
462 : : *
463 : : * @param key The key to store in the table.
464 : : * @param tag The tag to store in the table.
465 : : * @param already_compressed true if tag is already compressed,
466 : : * for example because it is being opaquely copied
467 : : * (default: false).
468 : : */
469 : : void add(const std::string &key, std::string tag, bool already_compressed = false);
470 : :
471 : : /** Delete an entry from the table.
472 : : *
473 : : * The entry will be removed from the table, if it exists. If
474 : : * it does not exist, no action will be taken. The item with
475 : : * an empty key can't be removed, and false is returned.
476 : : *
477 : : * If an error occurs during the operation, this will be signalled
478 : : * by an exception.
479 : : *
480 : : * e.g. bool deleted = btree.del("TODAY")
481 : : *
482 : : * @param key The key to remove from the table.
483 : : *
484 : : * @return true if an entry was removed; false if it did not exist.
485 : : */
486 : : bool del(const std::string &key);
487 : :
488 : : /// Erase this table from disk.
489 : : void erase();
490 : :
491 : : /** Set the block size.
492 : : *
493 : : * It's only safe to do this before the table is created.
494 : : */
495 : : void set_block_size(unsigned int block_size_);
496 : :
497 : : /** Get the block size.
498 : : */
499 : 1979 : unsigned int get_block_size() const { return block_size; }
500 : :
501 : : /** Create a new empty btree structure on disk and open it at the
502 : : * initial revision.
503 : : *
504 : : * The table must be writable - it doesn't make sense to create
505 : : * a table that is read-only!
506 : : *
507 : : * The block size must be less than 64K, where K = 1024. It is unwise
508 : : * to use a small block size (less than 1024 perhaps), so we enforce a
509 : : * minimum block size of 2K.
510 : : *
511 : : * Example:
512 : : *
513 : : * Btree btree("X-");
514 : : * btree.create_and_open(8192);
515 : : * // Files will be X-DB, X-baseA (and X-baseB).
516 : : *
517 : : * @param blocksize - Size of blocks to use.
518 : : *
519 : : * @exception Xapian::DatabaseCreateError if the table can't be
520 : : * created.
521 : : * @exception Xapian::InvalidArgumentError if the requested blocksize
522 : : * is unsuitable.
523 : : */
524 : : void create_and_open(unsigned int blocksize);
525 : :
526 : : void set_full_compaction(bool parity);
527 : :
528 : : /** Get the latest revision number stored in this table.
529 : : *
530 : : * This gives the higher of the revision numbers held in the base
531 : : * files of the B-tree, or just the revision number if there's only
532 : : * one base file.
533 : : *
534 : : * It is possible that there are other, older, revisions of this
535 : : * table available, and indeed that the revision currently open
536 : : * is one of these older revisions.
537 : : */
538 : 829 : chert_revision_number_t get_latest_revision_number() const {
539 : 829 : return latest_revision_number;
540 : : }
541 : :
542 : : /** Get the revision number at which this table
543 : : * is currently open.
544 : : *
545 : : * It is possible that there are other, more recent or older
546 : : * revisions available.
547 : : *
548 : : * @return the current revision number.
549 : : */
550 : 6434 : chert_revision_number_t get_open_revision_number() const {
551 : 6434 : return revision_number;
552 : : }
553 : :
554 : : /** Return a count of the number of entries in the table.
555 : : *
556 : : * The count does not include the ever-present item with null key.
557 : : *
558 : : * Use @a empty() if you only want to know if the table is empty or
559 : : * not.
560 : : *
561 : : * @return The number of entries in the table.
562 : : */
563 : 226095 : chert_tablesize_t get_entry_count() const {
564 : 226095 : return item_count;
565 : : }
566 : :
567 : : /// Return true if there are no entries in the table.
568 : 1674 : bool empty() const {
569 : : // Prior to 1.1.4/1.0.18, item_count was stored in 32 bits, so we
570 : : // can't trust it as there could be more than 1<<32 entries.
571 : : //
572 : : // In theory it should wrap, so if non-zero the table isn't empty,
573 : : // but the table this was first noticed in wasn't off by a multiple
574 : : // of 1<<32.
575 : :
576 : : // An empty table will always have level == 0, and most non-empty
577 : : // tables will have more levels, so use that as a short-cut.
578 [ + + ][ + + ]: 1674 : return (level == 0) && really_empty();
579 : : }
580 : :
581 : : /** Get a cursor for reading from the table.
582 : : *
583 : : * The cursor is owned by the caller - it is the caller's
584 : : * responsibility to ensure that it is deleted.
585 : : */
586 : : ChertCursor * cursor_get() const;
587 : :
588 : : /** Determine whether the object contains uncommitted modifications.
589 : : *
590 : : * @return true if there have been modifications since the last
591 : : * the last call to commit().
592 : : */
593 : 4752 : bool is_modified() const { return Btree_modified; }
594 : :
595 : : /** Set the maximum item size given the block capacity.
596 : : *
597 : : * At least this many items of maximum size must fit into a block.
598 : : * The default is BLOCK_CAPACITY (which is currently 4).
599 : : */
600 : 9113 : void set_max_item_size(size_t block_capacity) {
601 [ - + ]: 9113 : if (block_capacity > BLOCK_CAPACITY) block_capacity = BLOCK_CAPACITY;
602 : : max_item_size = (block_size - DIR_START - block_capacity * D2)
603 : 9113 : / block_capacity;
604 : 9113 : }
605 : :
606 : : protected:
607 : :
608 : : /** Perform the opening operation to read.
609 : : *
610 : : * Return true iff the open succeeded.
611 : : */
612 : : bool do_open_to_read(bool revision_supplied, chert_revision_number_t revision_);
613 : :
614 : : /** Perform the opening operation to write.
615 : : *
616 : : * Return true iff the open succeeded.
617 : : */
618 : : bool do_open_to_write(bool revision_supplied,
619 : : chert_revision_number_t revision_,
620 : : bool create_db = false);
621 : : bool basic_open(bool revision_supplied, chert_revision_number_t revision);
622 : :
623 : : bool find(Cursor *) const;
624 : : int delete_kt();
625 : : void read_block(uint4 n, byte *p) const;
626 : : void write_block(uint4 n, const byte *p) const;
627 : : XAPIAN_NORETURN(void set_overwritten() const);
628 : : void block_to_cursor(Cursor *C_, int j, uint4 n) const;
629 : : void alter();
630 : : void compact(byte *p);
631 : : void enter_key(int j, Key prevkey, Key newkey);
632 : : int mid_point(byte *p);
633 : : void add_item_to_block(byte *p, Item_wr kt, int c);
634 : : void add_item(Item_wr kt, int j);
635 : : void delete_item(int j, bool repeatedly);
636 : : int add_kt(bool found);
637 : : void read_root();
638 : : void split_root(uint4 split_n);
639 : : void form_key(const std::string & key) const;
640 : :
641 : 2384 : char other_base_letter() const {
642 [ + + ]: 2384 : return (base_letter == 'A') ? 'B' : 'A';
643 : : }
644 : :
645 : : /// The name of the table (used when writing changesets).
646 : : const char * tablename;
647 : :
648 : : /// Allocate the zstream for deflating, if not already allocated.
649 : : void lazy_alloc_deflate_zstream() const;
650 : :
651 : : /// Allocate the zstream for inflating, if not already allocated.
652 : : void lazy_alloc_inflate_zstream() const;
653 : :
654 : : /** revision number of the opened B-tree. */
655 : : chert_revision_number_t revision_number;
656 : :
657 : : /** keeps a count of the number of items in the B-tree. */
658 : : chert_tablesize_t item_count;
659 : :
660 : : /** block size of the B tree in bytes */
661 : : unsigned int block_size;
662 : :
663 : : /** Revision number of the other base, or zero if there is only one
664 : : * base file.
665 : : */
666 : : mutable chert_revision_number_t latest_revision_number;
667 : :
668 : : /** set to true if baseA and baseB both exist as valid bases.
669 : : *
670 : : * The unused base is deleted as soon as a write to the Btree takes
671 : : * place. */
672 : : mutable bool both_bases;
673 : :
674 : : /** the value 'A' or 'B' of the current base */
675 : : char base_letter;
676 : :
677 : : /** true if the root block is faked (not written to disk).
678 : : * false otherwise. This is true when the btree hasn't been
679 : : * modified yet.
680 : : */
681 : : bool faked_root_block;
682 : :
683 : : /** true iff the data has been written in a single write in
684 : : * sequential order.
685 : : */
686 : : bool sequential;
687 : :
688 : : /** File descriptor of the table.
689 : : *
690 : : * If the table is lazily created and doesn't yet exist, this will be
691 : : * -1.
692 : : *
693 : : * If close() has been called, this will be -2.
694 : : */
695 : : int handle;
696 : :
697 : : /// number of levels, counting from 0
698 : : int level;
699 : :
700 : : /// the root block of the B-tree
701 : : uint4 root;
702 : :
703 : : /// buffer of size block_size for making up key-tag items
704 : : mutable Item_wr kt;
705 : :
706 : : /// buffer of size block_size for reforming blocks
707 : : byte * buffer;
708 : :
709 : : /// For writing back as file baseA or baseB.
710 : : ChertTable_base base;
711 : :
712 : : /// The path name of the B tree.
713 : : std::string name;
714 : :
715 : : /** count of the number of successive instances of purely
716 : : * sequential addition, starting at SEQ_START_POINT (neg) and
717 : : * going up to zero. */
718 : : int seq_count;
719 : :
720 : : /** the last block to be changed by an addition */
721 : : uint4 changed_n;
722 : :
723 : : /** directory offset corresponding to last block to be changed
724 : : * by an addition */
725 : : int changed_c;
726 : :
727 : : /// maximum size of an item (key-tag pair)
728 : : size_t max_item_size;
729 : :
730 : : /// Set to true the first time the B-tree is modified.
731 : : mutable bool Btree_modified;
732 : :
733 : : /// set to true when full compaction is to be achieved
734 : : bool full_compaction;
735 : :
736 : : /// Set to true when the database is opened to write.
737 : : bool writable;
738 : :
739 : : /// Flag for tracking when cursors need to rebuild.
740 : : mutable bool cursor_created_since_last_modification;
741 : :
742 : : /// Version count for tracking when cursors need to rebuild.
743 : : unsigned long cursor_version;
744 : :
745 : : /* B-tree navigation functions */
746 : 3886481 : bool prev(Cursor *C_, int j) const {
747 [ + + ]: 3886481 : if (sequential) return prev_for_sequential(C_, j);
748 : 3886481 : return prev_default(C_, j);
749 : : }
750 : :
751 : 6388116 : bool next(Cursor *C_, int j) const {
752 [ + + ]: 6388116 : if (sequential) return next_for_sequential(C_, j);
753 : 6388116 : return next_default(C_, j);
754 : : }
755 : :
756 : : /* Default implementations. */
757 : : bool prev_default(Cursor *C_, int j) const;
758 : : bool next_default(Cursor *C_, int j) const;
759 : :
760 : : /* Implementations for sequential mode. */
761 : : bool prev_for_sequential(Cursor *C_, int dummy) const;
762 : : bool next_for_sequential(Cursor *C_, int dummy) const;
763 : :
764 : : static int find_in_block(const byte * p, Key key, bool leaf, int c);
765 : :
766 : : /** block_given_by(p, c) finds the item at block address p, directory
767 : : * offset c, and returns its tag value as an integer.
768 : : */
769 : : static uint4 block_given_by(const byte * p, int c);
770 : :
771 : : mutable Cursor C[BTREE_CURSOR_LEVELS];
772 : :
773 : : /** Buffer used when splitting a block.
774 : : *
775 : : * This buffer holds the split off part of the block. It's only used
776 : : * when updating (in ChertTable::add_item().
777 : : */
778 : : byte * split_p;
779 : :
780 : : /** DONT_COMPRESS or Z_DEFAULT_STRATEGY, Z_FILTERED, Z_HUFFMAN_ONLY,
781 : : * Z_RLE. */
782 : : int compress_strategy;
783 : :
784 : : /// Zlib state object for deflating
785 : : mutable z_stream *deflate_zstream;
786 : :
787 : : /// Zlib state object for inflating
788 : : mutable z_stream *inflate_zstream;
789 : :
790 : : /// If true, don't create the table until it's needed.
791 : : bool lazy;
792 : :
793 : : /* Debugging methods */
794 : : // void report_block_full(int m, int n, const byte * p);
795 : :
796 : : /// Throw an exception indicating that the database is closed.
797 : : XAPIAN_NORETURN(static void throw_database_closed());
798 : : };
799 : :
800 : : #endif /* OM_HGUARD_CHERT_TABLE_H */
|