Branch data Line data Source code
1 : : /* chert_table.cc: Btree implementation
2 : : *
3 : : * Copyright 1999,2000,2001 BrightStation PLC
4 : : * Copyright 2002 Ananova Ltd
5 : : * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011 Olly Betts
6 : : * Copyright 2008 Lemur Consulting Ltd
7 : : * Copyright 2010 Richard Boulton
8 : : *
9 : : * This program is free software; you can redistribute it and/or
10 : : * modify it under the terms of the GNU General Public License as
11 : : * published by the Free Software Foundation; either version 2 of the
12 : : * License, or (at your option) any later version.
13 : : *
14 : : * This program is distributed in the hope that it will be useful,
15 : : * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : : * GNU General Public License for more details.
18 : : *
19 : : * You should have received a copy of the GNU General Public License
20 : : * along with this program; if not, write to the Free Software
21 : : * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22 : : * USA
23 : : */
24 : :
25 : : #include <config.h>
26 : :
27 : : #include "chert_table.h"
28 : :
29 : : #include <xapian/error.h>
30 : :
31 : : #include "safeerrno.h"
32 : : #ifdef __WIN32__
33 : : # include "msvc_posix_wrapper.h"
34 : : #endif
35 : :
36 : : #include "omassert.h"
37 : : #include "stringutils.h" // For STRINGIZE().
38 : :
39 : : // Define to use "dangerous" mode - in this mode we write modified btree
40 : : // blocks back in place. This is somewhat faster (especially once we're
41 : : // I/O bound) but the database can't be safely searched during indexing
42 : : // and if indexing is terminated uncleanly, the database may be corrupt.
43 : : //
44 : : // Despite the risks, it's useful for speeding up a full rebuild.
45 : : //
46 : : // FIXME: make this mode run-time selectable, and record that it is currently
47 : : // in use somewhere on disk, so readers can check and refuse to open the
48 : : // database.
49 : : //
50 : : // #define DANGEROUS
51 : :
52 : : #include <sys/types.h>
53 : :
54 : : // Trying to include the correct headers with the correct defines set to
55 : : // get pread() and pwrite() prototyped on every platform without breaking any
56 : : // other platform is a real can of worms. So instead we probe for what
57 : : // prototypes (if any) are required in configure and put them into
58 : : // PREAD_PROTOTYPE and PWRITE_PROTOTYPE.
59 : : #if defined HAVE_PREAD && defined PREAD_PROTOTYPE
60 : : PREAD_PROTOTYPE
61 : : #endif
62 : : #if defined HAVE_PWRITE && defined PWRITE_PROTOTYPE
63 : : PWRITE_PROTOTYPE
64 : : #endif
65 : :
66 : : #include <cstdio> /* for rename */
67 : : #include <cstring> /* for memmove */
68 : : #include <climits> /* for CHAR_BIT */
69 : :
70 : : #include "chert_btreebase.h"
71 : : #include "chert_cursor.h"
72 : :
73 : : #include "io_utils.h"
74 : : #include "omassert.h"
75 : : #include "debuglog.h"
76 : : #include "pack.h"
77 : : #include "str.h"
78 : : #include "unaligned.h"
79 : : #include "utils.h"
80 : :
81 : : #include <algorithm> // for std::min()
82 : : #include <string>
83 : :
84 : : using namespace std;
85 : :
86 : : // Only try to compress tags longer than this many bytes.
87 : : const size_t COMPRESS_MIN = 4;
88 : :
89 : : //#define BTREE_DEBUG_FULL 1
90 : : #undef BTREE_DEBUG_FULL
91 : :
92 : : #ifdef BTREE_DEBUG_FULL
93 : : /*------debugging aids from here--------*/
94 : :
95 : : static void print_key(const byte * p, int c, int j);
96 : : static void print_tag(const byte * p, int c, int j);
97 : :
98 : : /*
99 : : static void report_cursor(int N, Btree * B, Cursor * C)
100 : : {
101 : : int i;
102 : : printf("%d)\n", N);
103 : : for (i = 0; i <= B->level; i++)
104 : : printf("p=%d, c=%d, n=[%d], rewrite=%d\n",
105 : : C[i].p, C[i].c, C[i].n, C[i].rewrite);
106 : : }
107 : : */
108 : :
109 : : /*------to here--------*/
110 : : #endif /* BTREE_DEBUG_FULL */
111 : :
112 : 11668 : static inline byte *zeroed_new(size_t size)
113 : : {
114 : 11668 : byte *temp = new byte[size];
115 : : // No need to check if temp is NULL, new throws std::bad_alloc if
116 : : // allocation fails.
117 : : Assert(temp);
118 : 11668 : memset(temp, 0, size);
119 : 11668 : return temp;
120 : : }
121 : :
122 : : /* A B-tree comprises (a) a base file, containing essential information (Block
123 : : size, number of the B-tree root block etc), (b) a bitmap, the Nth bit of the
124 : : bitmap being set if the Nth block of the B-tree file is in use, and (c) a
125 : : file DB containing the B-tree proper. The DB file is divided into a sequence
126 : : of equal sized blocks, numbered 0, 1, 2 ... some of which are free, some in
127 : : use. Those in use are arranged in a tree.
128 : :
129 : : Each block, b, has a structure like this:
130 : :
131 : : R L M T D o1 o2 o3 ... oN <gap> [item] .. [item] .. [item] ...
132 : : <---------- D ----------> <-M->
133 : :
134 : : And then,
135 : :
136 : : R = REVISION(b) is the revision number the B-tree had when the block was
137 : : written into the DB file.
138 : : L = GET_LEVEL(b) is the level of the block, which is the number of levels
139 : : towards the root of the B-tree structure. So leaf blocks
140 : : have level 0 and the one root block has the highest level
141 : : equal to the number of levels in the B-tree.
142 : : M = MAX_FREE(b) is the size of the gap between the end of the directory and
143 : : the first item of data. (It is not necessarily the maximum
144 : : size among the bits of space that are free, but I can't
145 : : think of a better name.)
146 : : T = TOTAL_FREE(b)is the total amount of free space left in b.
147 : : D = DIR_END(b) gives the offset to the end of the directory.
148 : :
149 : : o1, o2 ... oN are a directory of offsets to the N items held in the block.
150 : : The items are key-tag pairs, and as they occur in the directory are ordered
151 : : by the keys.
152 : :
153 : : An item has this form:
154 : :
155 : : I K key x C tag
156 : : <--K-->
157 : : <------I------>
158 : :
159 : : A long tag presented through the API is split up into C tags small enough to
160 : : be accommodated in the blocks of the B-tree. The key is extended to include
161 : : a counter, x, which runs from 1 to C. The key is preceded by a length, K,
162 : : and the whole item with a length, I, as depicted above.
163 : :
164 : : Here are the corresponding definitions:
165 : :
166 : : */
167 : :
168 : : /** Flip to sequential addition block-splitting after this number of observed
169 : : * sequential additions (in negated form). */
170 : : #define SEQ_START_POINT (-10)
171 : :
172 : :
173 : : /* There are two bit maps in bit_map0 and bit_map. The nth bit of bitmap is 0
174 : : if the nth block is free, otherwise 1. bit_map0 is the initial state of
175 : : bitmap at the start of the current transaction.
176 : :
177 : : Note use of the limits.h values:
178 : : UCHAR_MAX = 255, an unsigned with all bits set, and
179 : : CHAR_BIT = 8, the number of bits per byte
180 : :
181 : : BYTE_PAIR_RANGE below is the smallest +ve number that can't be held in two
182 : : bytes -- 64K effectively.
183 : : */
184 : :
185 : : #define BYTE_PAIR_RANGE (1 << 2 * CHAR_BIT)
186 : :
187 : : /// read_block(n, p) reads block n of the DB file to address p.
188 : : void
189 : 6643069 : ChertTable::read_block(uint4 n, byte * p) const
190 : : {
191 : : // Log the value of p, not the contents of the block it points to...
192 : : LOGCALL_VOID(DB, "ChertTable::read_block", n | (void*)p);
193 : : /* Use the base bit_map_size not the bitmap's size, because
194 : : * the latter is uninitialised in readonly mode.
195 : : */
196 : : Assert(n / CHAR_BIT < base.get_bit_map_size());
197 : :
198 : : #ifdef HAVE_PREAD
199 : 6643069 : off_t offset = off_t(block_size) * n;
200 : 6643069 : int m = block_size;
201 : 0 : while (true) {
202 : : ssize_t bytes_read = pread(handle, reinterpret_cast<char *>(p), m,
203 : 6643069 : offset);
204 : : // normal case - read succeeded, so return.
205 [ + - ]: 6643069 : if (bytes_read == m) return;
206 [ # # ]: 0 : if (bytes_read == -1) {
207 [ # # ]: 0 : if (errno == EINTR) continue;
208 : 0 : string message = "Error reading block " + str(n) + ": ";
209 : 0 : message += strerror(errno);
210 : 0 : throw Xapian::DatabaseError(message);
211 [ # # ]: 0 : } else if (bytes_read == 0) {
212 : 0 : string message = "Error reading block " + str(n) + ": got end of file";
213 : 0 : throw Xapian::DatabaseError(message);
214 [ # # ]: 0 : } else if (bytes_read < m) {
215 : : /* Read part of the block, which is not an error. We should
216 : : * continue reading the rest of the block.
217 : : */
218 : 0 : m -= int(bytes_read);
219 : 0 : p += bytes_read;
220 : 0 : offset += bytes_read;
221 : : }
222 : : }
223 : : #else
224 : : if (lseek(handle, off_t(block_size) * n, SEEK_SET) == -1) {
225 : : string message = "Error seeking to block: ";
226 : : message += strerror(errno);
227 : : throw Xapian::DatabaseError(message);
228 : : }
229 : :
230 : : io_read(handle, reinterpret_cast<char *>(p), block_size, block_size);
231 : : #endif
232 : : }
233 : :
234 : : /** write_block(n, p) writes block n in the DB file from address p.
235 : : * When writing we check to see if the DB file has already been
236 : : * modified. If not (so this is the first write) the old base is
237 : : * deleted. This prevents the possibility of it being opened
238 : : * subsequently as an invalid base.
239 : : */
240 : : void
241 : 321756 : ChertTable::write_block(uint4 n, const byte * p) const
242 : : {
243 : : LOGCALL_VOID(DB, "ChertTable::write_block", n | p);
244 : : Assert(writable);
245 : : /* Check that n is in range. */
246 : : Assert(n / CHAR_BIT < base.get_bit_map_size());
247 : :
248 : : /* don't write to non-free */;
249 : : AssertParanoid(base.block_free_at_start(n));
250 : :
251 : : /* write revision is okay */
252 : : AssertEqParanoid(REVISION(p), latest_revision_number + 1);
253 : :
254 [ + + ]: 321756 : if (both_bases) {
255 : : // Delete the old base before modifying the database.
256 : : //
257 : : // If the file is on NFS, then io_unlink() may return false even if
258 : : // the file was removed, so on balance throwing an exception in this
259 : : // case is unhelpful, since we wanted the file gone anyway! The
260 : : // likely explanation is that somebody moved, deleted, or changed a
261 : : // symlink to the database directory.
262 : 592 : (void)io_unlink(name + "base" + other_base_letter());
263 : 592 : both_bases = false;
264 : 592 : latest_revision_number = revision_number;
265 : : }
266 : :
267 : : #ifdef HAVE_PWRITE
268 : 321756 : off_t offset = off_t(block_size) * n;
269 : 321756 : int m = block_size;
270 : 0 : while (true) {
271 : 321756 : ssize_t bytes_written = pwrite(handle, p, m, offset);
272 [ + - ]: 321756 : if (bytes_written == m) {
273 : : // normal case - write succeeded, so return.
274 : : return;
275 [ # # ]: 0 : } else if (bytes_written == -1) {
276 [ # # ]: 0 : if (errno == EINTR) continue;
277 : 0 : string message = "Error writing block: ";
278 : 0 : message += strerror(errno);
279 : 0 : throw Xapian::DatabaseError(message);
280 [ # # ]: 0 : } else if (bytes_written == 0) {
281 : 0 : string message = "Error writing block: wrote no data";
282 : 0 : throw Xapian::DatabaseError(message);
283 [ # # ]: 0 : } else if (bytes_written < m) {
284 : : /* Wrote part of the block, which is not an error. We should
285 : : * continue writing the rest of the block.
286 : : */
287 : 0 : m -= bytes_written;
288 : 0 : p += bytes_written;
289 : 0 : offset += bytes_written;
290 : : }
291 : : }
292 : : #else
293 : : if (lseek(handle, (off_t)block_size * n, SEEK_SET) == -1) {
294 : : string message = "Error seeking to block: ";
295 : : message += strerror(errno);
296 : : throw Xapian::DatabaseError(message);
297 : : }
298 : :
299 : : io_write(handle, reinterpret_cast<const char *>(p), block_size);
300 : : #endif
301 : : }
302 : :
303 : :
304 : : /* A note on cursors:
305 : :
306 : : Each B-tree level has a corresponding array element C[j] in a
307 : : cursor, C. C[0] is the leaf (or data) level, and C[B->level] is the
308 : : root block level. Within a level j,
309 : :
310 : : C[j].p addresses the block
311 : : C[j].c is the offset into the directory entry in the block
312 : : C[j].n is the number of the block at C[j].p
313 : :
314 : : A look up in the B-tree causes navigation of the blocks starting
315 : : from the root. In each block, p, we find an offset, c, to an item
316 : : which gives the number, n, of the block for the next level. This
317 : : leads to an array of values p,c,n which are held inside the cursor.
318 : :
319 : : Any Btree object B has a built-in cursor, at B->C. But other cursors may
320 : : be created. If BC is a created cursor, BC->C is the cursor in the
321 : : sense given above, and BC->B is the handle for the B-tree again.
322 : : */
323 : :
324 : :
325 : : void
326 : 5 : ChertTable::set_overwritten() const
327 : : {
328 : : LOGCALL_VOID(DB, "ChertTable::set_overwritten", NO_ARGS);
329 : : // If we're writable, there shouldn't be another writer who could cause
330 : : // overwritten to be flagged, so that's a DatabaseCorruptError.
331 [ - + ]: 5 : if (writable)
332 : 0 : throw Xapian::DatabaseCorruptError("Db block overwritten - are there multiple writers?");
333 : 5 : throw Xapian::DatabaseModifiedError("The revision being read has been discarded - you should call Xapian::Database::reopen() and retry the operation");
334 : : }
335 : :
336 : : /* block_to_cursor(C, j, n) puts block n into position C[j] of cursor
337 : : C, writing the block currently at C[j] back to disk if necessary.
338 : : Note that
339 : :
340 : : C[j].rewrite
341 : :
342 : : is true iff C[j].n is different from block n in file DB. If it is
343 : : false no rewriting is necessary.
344 : : */
345 : :
346 : : void
347 : 9523237 : ChertTable::block_to_cursor(Cursor * C_, int j, uint4 n) const
348 : : {
349 : : LOGCALL_VOID(DB, "ChertTable::block_to_cursor", (void*)C_ | j | n);
350 [ + + ]: 9523237 : if (n == C_[j].n) return;
351 : 6824404 : byte * p = C_[j].p;
352 : : Assert(p);
353 : :
354 : : // FIXME: only needs to be done in write mode
355 [ + + ]: 6824404 : if (C_[j].rewrite) {
356 : : Assert(writable);
357 : : Assert(C == C_);
358 : 303122 : write_block(C_[j].n, p);
359 : 303122 : C_[j].rewrite = false;
360 : : }
361 : :
362 : : // Check if the block is in the built-in cursor (potentially in
363 : : // modified form).
364 [ + + ][ + + ]: 7020315 : if (writable && n == C[j].n) {
365 [ + - ]: 195911 : if (p != C[j].p)
366 : 195911 : memcpy(p, C[j].p, block_size);
367 : : } else {
368 : 6628493 : read_block(n, p);
369 : : }
370 : :
371 : 6824404 : C_[j].n = n;
372 [ + + ]: 6824404 : if (j < level) {
373 : : /* unsigned comparison */
374 [ + + ]: 6816098 : if (rare(REVISION(p) > REVISION(C_[j + 1].p))) {
375 : 5 : set_overwritten();
376 : : return;
377 : : }
378 : : }
379 : :
380 [ - + ]: 6824399 : if (rare(j != GET_LEVEL(p))) {
381 : 0 : string msg = "Expected block ";
382 : 0 : msg += str(n);
383 : 0 : msg += " to be level ";
384 : 0 : msg += str(j);
385 : 0 : msg += ", not ";
386 : 0 : msg += str(GET_LEVEL(p));
387 : 9523232 : throw Xapian::DatabaseCorruptError(msg);
388 : : }
389 : : }
390 : :
391 : : /** Btree::alter(); is called when the B-tree is to be altered.
392 : :
393 : : It causes new blocks to be forced for the current set of blocks in
394 : : the cursor.
395 : :
396 : : The point is that if a block at level 0 is to be altered it may get
397 : : a new number. Then the pointer to this block from level 1 will need
398 : : changing. So the block at level 1 needs altering and may get a new
399 : : block number. Then the pointer to this block from level 2 will need
400 : : changing ... and so on back to the root.
401 : :
402 : : The clever bit here is spotting the cases when we can make an early
403 : : exit from this process. If C[j].rewrite is true, C[j+k].rewrite
404 : : will be true for k = 1,2 ... We have been through all this before,
405 : : and there is no need to do it again. If C[j].n was free at the
406 : : start of the transaction, we can copy it back to the same place
407 : : without violating the integrity of the B-tree. We don't then need a
408 : : new n and can return. The corresponding C[j].rewrite may be true or
409 : : false in that case.
410 : : */
411 : :
412 : : void
413 : 1402060 : ChertTable::alter()
414 : : {
415 : : LOGCALL_VOID(DB, "ChertTable::alter", NO_ARGS);
416 : : Assert(writable);
417 : : #ifdef DANGEROUS
418 : : C[0].rewrite = true;
419 : : #else
420 : 1402060 : int j = 0;
421 : 1402060 : byte * p = C[j].p;
422 : 1402957 : while (true) {
423 [ + + ]: 1402957 : if (C[j].rewrite) return; /* all new, so return */
424 : 303978 : C[j].rewrite = true;
425 : :
426 : 303978 : uint4 n = C[j].n;
427 [ + + ]: 303978 : if (base.block_free_at_start(n)) {
428 : : Assert(REVISION(p) == latest_revision_number + 1);
429 : 302482 : return;
430 : : }
431 : : Assert(REVISION(p) < latest_revision_number + 1);
432 : 1496 : base.free_block(n);
433 : 1496 : n = base.next_free_block();
434 : 1496 : C[j].n = n;
435 : 1496 : SET_REVISION(p, latest_revision_number + 1);
436 : :
437 [ + + ]: 1496 : if (j == level) return;
438 : 897 : j++;
439 : 897 : p = C[j].p;
440 : 897 : Item_wr(p, C[j].c).set_block_given_by(n);
441 : : }
442 : : #endif
443 : : }
444 : :
445 : : /** find_in_block(p, key, leaf, c) searches for the key in the block at p.
446 : :
447 : : leaf is true for a data block, and false for an index block (when the
448 : : first key is dummy and never needs to be tested). What we get is the
449 : : directory entry to the last key <= the key being searched for.
450 : :
451 : : The lookup is by binary chop, with i and j set to the left and
452 : : right ends of the search area. In sequential addition, c will often
453 : : be the answer, so we test the keys round c and move i and j towards
454 : : c if possible.
455 : : */
456 : :
457 : : int
458 : 11670669 : ChertTable::find_in_block(const byte * p, Key key, bool leaf, int c)
459 : : {
460 : : LOGCALL_STATIC(DB, int, "ChertTable::find_in_block", (void*)p | (const void *)key.get_address() | leaf | c);
461 : 11670669 : int i = DIR_START;
462 [ + + ]: 11670669 : if (leaf) i -= D2;
463 : 11670669 : int j = DIR_END(p);
464 : :
465 [ + + ]: 11670669 : if (c != -1) {
466 [ + + ][ + + ]: 5918895 : if (c < j && i < c && Item(p, c).key() <= key)
[ + + ][ + + ]
467 : 5215314 : i = c;
468 : 5918895 : c += D2;
469 [ + + ][ + + ]: 5918895 : if (c < j && i < c && key < Item(p, c).key())
[ + + ][ + + ]
470 : 1633552 : j = c;
471 : : }
472 : :
473 [ + + ]: 41753592 : while (j - i > D2) {
474 : 30082923 : int k = i + ((j - i)/(D2 * 2))*D2; /* mid way */
475 [ + + ]: 30082923 : if (key < Item(p, k).key()) j = k; else i = k;
476 : : }
477 : 11670669 : RETURN(i);
478 : : }
479 : :
480 : : /** find(C_) searches for the key of B->kt in the B-tree.
481 : :
482 : : Result is true if found, false otherwise. When false, the B_tree
483 : : cursor is positioned at the last key in the B-tree <= the search
484 : : key. Goes to first (null) item in B-tree when key length == 0.
485 : : */
486 : :
487 : : bool
488 : 4599551 : ChertTable::find(Cursor * C_) const
489 : : {
490 : : LOGCALL(DB, bool, "ChertTable::find", (void*)C_);
491 : : // Note: the parameter is needed when we're called by ChertCursor
492 : : const byte * p;
493 : : int c;
494 : 4599551 : Key key = kt.key();
495 [ + + ]: 11653944 : for (int j = level; j > 0; --j) {
496 : 7054398 : p = C_[j].p;
497 : 7054398 : c = find_in_block(p, key, false, C_[j].c);
498 : : #ifdef BTREE_DEBUG_FULL
499 : : printf("Block in ChertTable:find - code position 1");
500 : : report_block_full(j, C_[j].n, p);
501 : : #endif /* BTREE_DEBUG_FULL */
502 : 7054398 : C_[j].c = c;
503 : 7054398 : block_to_cursor(C_, j - 1, Item(p, c).block_given_by());
504 : : }
505 : 4599546 : p = C_[0].p;
506 : 4599546 : c = find_in_block(p, key, true, C_[0].c);
507 : : #ifdef BTREE_DEBUG_FULL
508 : : printf("Block in ChertTable:find - code position 2");
509 : : report_block_full(0, C_[0].n, p);
510 : : #endif /* BTREE_DEBUG_FULL */
511 : 4599546 : C_[0].c = c;
512 [ + + ]: 4599546 : if (c < DIR_START) RETURN(false);
513 : 4599546 : RETURN(Item(p, c).key() == key);
514 : : }
515 : :
516 : : /** compact(p) compact the block at p by shuffling all the items up to the end.
517 : :
518 : : MAX_FREE(p) is then maximized, and is equal to TOTAL_FREE(p).
519 : : */
520 : :
521 : : void
522 : 34605 : ChertTable::compact(byte * p)
523 : : {
524 : : LOGCALL_VOID(DB, "ChertTable::compact", (void*)p);
525 : : Assert(writable);
526 : 34605 : int e = block_size;
527 : 34605 : byte * b = buffer;
528 : 34605 : int dir_end = DIR_END(p);
529 [ + + ]: 1734754 : for (int c = DIR_START; c < dir_end; c += D2) {
530 : 1700149 : Item item(p, c);
531 : 1700149 : int l = item.size();
532 : 1700149 : e -= l;
533 : 1700149 : memmove(b + e, item.get_address(), l);
534 : 1700149 : setD(p, c, e); /* reform in b */
535 : : }
536 : 34605 : memmove(p + e, b + e, block_size - e); /* copy back */
537 : 34605 : e -= dir_end;
538 : 34605 : SET_TOTAL_FREE(p, e);
539 : 34605 : SET_MAX_FREE(p, e);
540 : 34605 : }
541 : :
542 : : /** Btree needs to gain a new level to insert more items: so split root block
543 : : * and construct a new one.
544 : : */
545 : : void
546 : 206 : ChertTable::split_root(uint4 split_n)
547 : : {
548 : : LOGCALL_VOID(DB, "ChertTable::split_root", split_n);
549 : : /* gain a level */
550 : 206 : ++level;
551 : :
552 : : /* check level overflow - this isn't something that should ever happen
553 : : * but deserves more than an Assert()... */
554 [ - + ]: 206 : if (level == BTREE_CURSOR_LEVELS) {
555 : 0 : throw Xapian::DatabaseCorruptError("Btree has grown impossibly large ("STRINGIZE(BTREE_CURSOR_LEVELS)" levels)");
556 : : }
557 : :
558 : 206 : byte * q = zeroed_new(block_size);
559 : 206 : C[level].p = q;
560 : 206 : C[level].c = DIR_START;
561 : 206 : C[level].n = base.next_free_block();
562 : 206 : C[level].rewrite = true;
563 : 206 : SET_REVISION(q, latest_revision_number + 1);
564 : 206 : SET_LEVEL(q, level);
565 : 206 : SET_DIR_END(q, DIR_START);
566 : 206 : compact(q); /* to reset TOTAL_FREE, MAX_FREE */
567 : :
568 : : /* form a null key in b with a pointer to the old root */
569 : : byte b[10]; /* 7 is exact */
570 : 206 : Item_wr item(b);
571 : 206 : item.form_null_key(split_n);
572 : 206 : add_item(item, level);
573 : 206 : }
574 : :
575 : : /** enter_key(j, prevkey, newkey) is called after a block split.
576 : :
577 : : It enters in the block at level C[j] a separating key for the block
578 : : at level C[j - 1]. The key itself is newkey. prevkey is the
579 : : preceding key, and at level 1 newkey can be trimmed down to the
580 : : first point of difference to prevkey for entry in C[j].
581 : :
582 : : This code looks longer than it really is. If j exceeds the number
583 : : of B-tree levels the root block has split and we have to construct
584 : : a new one, but this is a rare event.
585 : :
586 : : The key is constructed in b, with block number C[j - 1].n as tag,
587 : : and this is added in with add_item. add_item may itself cause a
588 : : block split, with a further call to enter_key. Hence the recursion.
589 : : */
590 : : void
591 : 16725 : ChertTable::enter_key(int j, Key prevkey, Key newkey)
592 : : {
593 : : LOGCALL_VOID(DB, "ChertTable::enter_key", j | Literal("prevkey") | Literal("newkey"));
594 : : Assert(writable);
595 : : Assert(prevkey < newkey);
596 : : AssertRel(j,>=,1);
597 : :
598 : 16725 : uint4 blocknumber = C[j - 1].n;
599 : :
600 : : // FIXME update to use Key
601 : : // Keys are truncated here: but don't truncate the count at the end away.
602 : 16725 : const int newkey_len = newkey.length();
603 : : AssertRel(newkey_len,>,0);
604 : : int i;
605 : :
606 [ + + ]: 16725 : if (j == 1) {
607 : : // Truncate the key to the minimal key which differs from prevkey,
608 : : // the preceding key in the block.
609 : 16601 : i = 0;
610 : 16601 : const int min_len = min(newkey_len, prevkey.length());
611 [ + + ][ + + ]: 89107 : while (i < min_len && prevkey[i] == newkey[i]) {
[ + + ]
612 : 72506 : i++;
613 : : }
614 : :
615 : : // Want one byte of difference.
616 [ + + ]: 16601 : if (i < newkey_len) i++;
617 : : } else {
618 : : /* Can't truncate between branch levels, since the separated keys
619 : : * are in at the leaf level, and truncating again will change the
620 : : * branch point.
621 : : */
622 : 124 : i = newkey_len;
623 : : }
624 : :
625 : : byte b[UCHAR_MAX + 6];
626 : 16725 : Item_wr item(b);
627 : : Assert(i <= 256 - I2 - C2);
628 : : Assert(i <= (int)sizeof(b) - I2 - C2 - 4);
629 : 16725 : item.set_key_and_block(newkey, i, blocknumber);
630 : :
631 : : // When j > 1 we can make the first key of block p null. This is probably
632 : : // worthwhile as it trades a small amount of CPU and RAM use for a small
633 : : // saving in disk use. Other redundant keys will still creep in though.
634 [ + + ]: 16725 : if (j > 1) {
635 : 124 : byte * p = C[j - 1].p;
636 : 124 : uint4 n = getint4(newkey.get_address(), newkey_len + K1 + C2);
637 : 124 : int new_total_free = TOTAL_FREE(p) + newkey_len + C2;
638 : : // FIXME: incredibly icky going from key to item like this...
639 : 124 : Item_wr(const_cast<byte*>(newkey.get_address()) - I2).form_null_key(n);
640 : 124 : SET_TOTAL_FREE(p, new_total_free);
641 : : }
642 : :
643 : 16725 : C[j].c = find_in_block(C[j].p, item.key(), false, 0) + D2;
644 : 16725 : C[j].rewrite = true; /* a subtle point: this *is* required. */
645 : 16725 : add_item(item, j);
646 : 16725 : }
647 : :
648 : : /** mid_point(p) finds the directory entry in c that determines the
649 : : approximate mid point of the data in the block at p.
650 : : */
651 : :
652 : : int
653 : 7820 : ChertTable::mid_point(byte * p)
654 : : {
655 : : LOGCALL(DB, int, "ChertTable::mid_point", (void*)p);
656 : 7820 : int n = 0;
657 : 7820 : int dir_end = DIR_END(p);
658 : 7820 : int size = block_size - TOTAL_FREE(p) - dir_end;
659 [ + - ]: 346515 : for (int c = DIR_START; c < dir_end; c += D2) {
660 : 346515 : int l = Item(p, c).size();
661 : 346515 : n += 2 * l;
662 [ + + ]: 346515 : if (n >= size) {
663 [ + + ]: 7820 : if (l < n - size) RETURN(c);
664 : 4333 : RETURN(c + D2);
665 : : }
666 : : }
667 : :
668 : : /* falling out of mid_point */
669 : : Assert(false);
670 : 7820 : RETURN(0); /* Stop compiler complaining about end of method. */
671 : : }
672 : :
673 : : /** add_item_to_block(p, kt_, c) adds item kt_ to the block at p.
674 : :
675 : : c is the offset in the directory that needs to be expanded to accommodate
676 : : the new entry for the item. We know before this is called that there is
677 : : enough contiguous room for the item in the block, so it's just a matter of
678 : : shuffling up any directory entries after where we're inserting and copying
679 : : in the item.
680 : : */
681 : :
682 : : void
683 : 1335127 : ChertTable::add_item_to_block(byte * p, Item_wr kt_, int c)
684 : : {
685 : : LOGCALL_VOID(DB, "ChertTable::add_item_to_block", (void*)p | Literal("kt_") | c);
686 : : Assert(writable);
687 : 1335127 : int dir_end = DIR_END(p);
688 : 1335127 : int kt_len = kt_.size();
689 : 1335127 : int needed = kt_len + D2;
690 : 1335127 : int new_total = TOTAL_FREE(p) - needed;
691 : 1335127 : int new_max = MAX_FREE(p) - needed;
692 : :
693 : : Assert(new_total >= 0);
694 : :
695 : : AssertRel(MAX_FREE(p),>=,needed);
696 : :
697 : : Assert(dir_end >= c);
698 : :
699 : 1335127 : memmove(p + c + D2, p + c, dir_end - c);
700 : 1335127 : dir_end += D2;
701 : 1335127 : SET_DIR_END(p, dir_end);
702 : :
703 : 1335127 : int o = dir_end + new_max;
704 : 1335127 : setD(p, c, o);
705 : 1335127 : memmove(p + o, kt_.get_address(), kt_len);
706 : :
707 : 1335127 : SET_MAX_FREE(p, new_max);
708 : :
709 : 1335127 : SET_TOTAL_FREE(p, new_total);
710 : 1335127 : }
711 : :
712 : : /** ChertTable::add_item(kt_, j) adds item kt_ to the block at cursor level C[j].
713 : : *
714 : : * If there is not enough room the block splits and the item is then
715 : : * added to the appropriate half.
716 : : */
717 : : void
718 : 1335127 : ChertTable::add_item(Item_wr kt_, int j)
719 : : {
720 : : LOGCALL_VOID(DB, "ChertTable::add_item", Literal("kt_") | j);
721 : : Assert(writable);
722 : 1335127 : byte * p = C[j].p;
723 : 1335127 : int c = C[j].c;
724 : : uint4 n;
725 : :
726 : 1335127 : int needed = kt_.size() + D2;
727 [ + + ]: 1335127 : if (TOTAL_FREE(p) < needed) {
728 : : int m;
729 : : // Prepare to split p. After splitting, the block is in two halves, the
730 : : // lower half is split_p, the upper half p again. add_to_upper_half
731 : : // becomes true when the item gets added to p, false when it gets added
732 : : // to split_p.
733 : :
734 [ + + ]: 16725 : if (seq_count < 0) {
735 : : // If we're not in sequential mode, we split at the mid point
736 : : // of the node.
737 : 7820 : m = mid_point(p);
738 : : } else {
739 : : // During sequential addition, split at the insert point
740 : 8905 : m = c;
741 : : }
742 : :
743 : 16725 : uint4 split_n = C[j].n;
744 : 16725 : C[j].n = base.next_free_block();
745 : :
746 : 16725 : memcpy(split_p, p, block_size); // replicate the whole block in split_p
747 : 16725 : SET_DIR_END(split_p, m);
748 : 16725 : compact(split_p); /* to reset TOTAL_FREE, MAX_FREE */
749 : :
750 : : {
751 : 16725 : int residue = DIR_END(p) - m;
752 : 16725 : int new_dir_end = DIR_START + residue;
753 : 16725 : memmove(p + DIR_START, p + m, residue);
754 : 16725 : SET_DIR_END(p, new_dir_end);
755 : : }
756 : :
757 : 16725 : compact(p); /* to reset TOTAL_FREE, MAX_FREE */
758 : :
759 : : bool add_to_upper_half;
760 [ + + ]: 16725 : if (seq_count < 0) {
761 : 7820 : add_to_upper_half = (c >= m);
762 : : } else {
763 : : // And add item to lower half if split_p has room, otherwise upper
764 : : // half
765 : 8905 : add_to_upper_half = (TOTAL_FREE(split_p) < needed);
766 : : }
767 : :
768 [ + + ]: 16725 : if (add_to_upper_half) {
769 : 15156 : c -= (m - DIR_START);
770 : : Assert(seq_count < 0 || c <= DIR_START + D2);
771 : : Assert(c >= DIR_START);
772 : : Assert(c <= DIR_END(p));
773 : 15156 : add_item_to_block(p, kt_, c);
774 : 15156 : n = C[j].n;
775 : : } else {
776 : : Assert(c >= DIR_START);
777 : : Assert(c <= DIR_END(split_p));
778 : 1569 : add_item_to_block(split_p, kt_, c);
779 : 1569 : n = split_n;
780 : : }
781 : 16725 : write_block(split_n, split_p);
782 : :
783 : : // Check if we're splitting the root block.
784 [ + + ]: 16725 : if (j == level) split_root(split_n);
785 : :
786 : : /* Enter a separating key at level j + 1 between */
787 : : /* the last key of block split_p, and the first key of block p */
788 : : enter_key(j + 1,
789 : : Item(split_p, DIR_END(split_p) - D2).key(),
790 : 16725 : Item(p, DIR_START).key());
791 : : } else {
792 : : AssertRel(TOTAL_FREE(p),>=,needed);
793 : :
794 [ + + ]: 1318402 : if (MAX_FREE(p) < needed) {
795 : 949 : compact(p);
796 : : AssertRel(MAX_FREE(p),>=,needed);
797 : : }
798 : :
799 : 1318402 : add_item_to_block(p, kt_, c);
800 : 1318402 : n = C[j].n;
801 : : }
802 [ + + ]: 1335127 : if (j == 0) {
803 : 1318196 : changed_n = n;
804 : 1318196 : changed_c = c;
805 : : }
806 : 1335127 : }
807 : :
808 : : /** ChertTable::delete_item(j, repeatedly) is (almost) the converse of add_item.
809 : : *
810 : : * If repeatedly is true, the process repeats at the next level when a
811 : : * block has been completely emptied, freeing the block and taking out
812 : : * the pointer to it. Emptied root blocks are also removed, which
813 : : * reduces the number of levels in the B-tree.
814 : : */
815 : : void
816 : 43336 : ChertTable::delete_item(int j, bool repeatedly)
817 : : {
818 : : LOGCALL_VOID(DB, "ChertTable::delete_item", j | repeatedly);
819 : : Assert(writable);
820 : 43336 : byte * p = C[j].p;
821 : 43336 : int c = C[j].c;
822 : 43336 : int kt_len = Item(p, c).size(); /* size of the item to be deleted */
823 : 43336 : int dir_end = DIR_END(p) - D2; /* directory length will go down by 2 bytes */
824 : :
825 : 43336 : memmove(p + c, p + c + D2, dir_end - c);
826 : 43336 : SET_DIR_END(p, dir_end);
827 : 43336 : SET_MAX_FREE(p, MAX_FREE(p) + D2);
828 : 43336 : SET_TOTAL_FREE(p, TOTAL_FREE(p) + kt_len + D2);
829 : :
830 [ + + ]: 43336 : if (!repeatedly) return;
831 [ + + ]: 42116 : if (j < level) {
832 [ + + ]: 41215 : if (dir_end == DIR_START) {
833 : 413 : base.free_block(C[j].n);
834 : 413 : C[j].rewrite = false;
835 : 413 : C[j].n = BLK_UNUSED;
836 : 413 : C[j + 1].rewrite = true; /* *is* necessary */
837 : 413 : delete_item(j + 1, true);
838 : : }
839 : : } else {
840 : : Assert(j == level);
841 [ + + ][ + + ]: 43351 : while (dir_end == DIR_START + D2 && level > 0) {
[ + + ]
842 : : /* single item in the root block, so lose a level */
843 : 15 : uint4 new_root = Item(p, DIR_START).block_given_by();
844 [ + - ]: 15 : delete [] p;
845 : 15 : C[level].p = 0;
846 : 15 : base.free_block(C[level].n);
847 : 15 : C[level].rewrite = false;
848 : 15 : C[level].n = BLK_UNUSED;
849 : 15 : level--;
850 : :
851 : 15 : block_to_cursor(C, level, new_root);
852 : :
853 : 15 : p = C[level].p;
854 : 15 : dir_end = DIR_END(p); /* prepare for the loop */
855 : : }
856 : : }
857 : : }
858 : :
859 : : /* debugging aid:
860 : : static addcount = 0;
861 : : */
862 : :
863 : : /** add_kt(found) adds the item (key-tag pair) at B->kt into the
864 : : B-tree, using cursor C.
865 : :
866 : : found == find() is handed over as a parameter from Btree::add.
867 : : Btree::alter() prepares for the alteration to the B-tree. Then
868 : : there are a number of cases to consider:
869 : :
870 : : If an item with the same key is in the B-tree (found is true),
871 : : the new kt replaces it.
872 : :
873 : : If then kt is smaller, or the same size as, the item it replaces,
874 : : kt is put in the same place as the item it replaces, and the
875 : : TOTAL_FREE measure is reduced.
876 : :
877 : : If kt is larger than the item it replaces it is put in the
878 : : MAX_FREE space if there is room, and the directory entry and
879 : : space counts are adjusted accordingly.
880 : :
881 : : - But if there is not room we do it the long way: the old item is
882 : : deleted with delete_item and kt is added in with add_item.
883 : :
884 : : If the key of kt is not in the B-tree (found is false), the new
885 : : kt is added in with add_item.
886 : : */
887 : :
888 : : int
889 : 1360357 : ChertTable::add_kt(bool found)
890 : : {
891 : : LOGCALL(DB, int, "ChertTable::add_kt", found);
892 : : Assert(writable);
893 : 1360357 : int components = 0;
894 : :
895 : : /*
896 : : {
897 : : printf("%d) %s ", addcount++, (found ? "replacing" : "adding"));
898 : : print_bytes(kt[I2] - K1 - C2, kt + I2 + K1); putchar('\n');
899 : : }
900 : : */
901 : 1360357 : alter();
902 : :
903 [ + + ]: 1360357 : if (found) { /* replacement */
904 : 43381 : seq_count = SEQ_START_POINT;
905 : 43381 : sequential = false;
906 : :
907 : 43381 : byte * p = C[0].p;
908 : 43381 : int c = C[0].c;
909 : 43381 : Item item(p, c);
910 : 43381 : int kt_size = kt.size();
911 : 43381 : int needed = kt_size - item.size();
912 : :
913 : 43381 : components = item.components_of();
914 : :
915 [ + + ]: 43381 : if (needed <= 0) {
916 : : /* simple replacement */
917 : : memmove(const_cast<byte *>(item.get_address()),
918 : 35680 : kt.get_address(), kt_size);
919 : 35680 : SET_TOTAL_FREE(p, TOTAL_FREE(p) - needed);
920 : : } else {
921 : : /* new item into the block's freespace */
922 : 7701 : int new_max = MAX_FREE(p) - kt_size;
923 [ + + ]: 7701 : if (new_max >= 0) {
924 : 6481 : int o = DIR_END(p) + new_max;
925 : 6481 : memmove(p + o, kt.get_address(), kt_size);
926 : 6481 : setD(p, c, o);
927 : 6481 : SET_MAX_FREE(p, new_max);
928 : 6481 : SET_TOTAL_FREE(p, TOTAL_FREE(p) - needed);
929 : : } else {
930 : : /* do it the long way */
931 : 1220 : delete_item(0, false);
932 : 1220 : add_item(kt, 0);
933 : : }
934 : : }
935 : : } else {
936 : : /* addition */
937 [ + + ][ + + ]: 2266124 : if (changed_n == C[0].n && changed_c == C[0].c) {
938 [ + + ]: 949148 : if (seq_count < 0) seq_count++;
939 : : } else {
940 : 367828 : seq_count = SEQ_START_POINT;
941 : 367828 : sequential = false;
942 : : }
943 : 1316976 : C[0].c += D2;
944 : 1316976 : add_item(kt, 0);
945 : : }
946 : 1360357 : RETURN(components);
947 : : }
948 : :
949 : : /* delete_kt() corresponds to add_kt(found), but there are only
950 : : two cases: if the key is not found nothing is done, and if it is
951 : : found the corresponding item is deleted with delete_item.
952 : : */
953 : :
954 : : int
955 : 41846 : ChertTable::delete_kt()
956 : : {
957 : : LOGCALL(DB, int, "ChertTable::delete_kt", NO_ARGS);
958 : : Assert(writable);
959 : :
960 : 41846 : bool found = find(C);
961 : :
962 : 41846 : int components = 0;
963 : 41846 : seq_count = SEQ_START_POINT;
964 : 41846 : sequential = false;
965 : :
966 : : /*
967 : : {
968 : : printf("%d) %s ", addcount++, (found ? "deleting " : "ignoring "));
969 : : print_bytes(B->kt[I2] - K1 - C2, B->kt + I2 + K1); putchar('\n');
970 : : }
971 : : */
972 [ + + ]: 41846 : if (found) {
973 : 41703 : components = Item(C[0].p, C[0].c).components_of();
974 : 41703 : alter();
975 : 41703 : delete_item(0, true);
976 : : }
977 : 41846 : RETURN(components);
978 : : }
979 : :
980 : : /* ChertTable::form_key(key) treats address kt as an item holder and fills in
981 : : the key part:
982 : :
983 : : (I) K key c (C tag)
984 : :
985 : : The bracketed parts are left blank. The key is filled in with key_len bytes and
986 : : K set accordingly. c is set to 1.
987 : : */
988 : :
989 : 4590854 : void ChertTable::form_key(const string & key) const
990 : : {
991 : : LOGCALL_VOID(DB, "ChertTable::form_key", key);
992 : 4590854 : kt.form_key(key);
993 : 4590846 : }
994 : :
995 : : /* ChertTable::add(key, tag) adds the key/tag item to the
996 : : B-tree, replacing any existing item with the same key.
997 : :
998 : : For a long tag, we end up having to add m components, of the form
999 : :
1000 : : key 1 m tag1
1001 : : key 2 m tag2
1002 : : ...
1003 : : key m m tagm
1004 : :
1005 : : and tag1+tag2+...+tagm are equal to tag. These in their turn may be replacing
1006 : : n components of the form
1007 : :
1008 : : key 1 n TAG1
1009 : : key 2 n TAG2
1010 : : ...
1011 : : key n n TAGn
1012 : :
1013 : : and n may be greater than, equal to, or less than m. These cases are dealt
1014 : : with in the code below. If m < n for example, we end up with a series of
1015 : : deletions.
1016 : : */
1017 : :
1018 : : void
1019 : 1352027 : ChertTable::add(const string &key, string tag, bool already_compressed)
1020 : : {
1021 : : LOGCALL_VOID(DB, "ChertTable::add", key | tag | already_compressed);
1022 : : Assert(writable);
1023 : :
1024 [ + + ]: 1352027 : if (handle < 0) create_and_open(block_size);
1025 : :
1026 : 1352027 : form_key(key);
1027 : :
1028 : 1352019 : bool compressed = false;
1029 [ + + ]: 1352019 : if (already_compressed) {
1030 : 157 : compressed = true;
1031 [ + + ][ + + ]: 1351862 : } else if (compress_strategy != DONT_COMPRESS && tag.size() > COMPRESS_MIN) {
[ + + ]
1032 : : CompileTimeAssert(DONT_COMPRESS != Z_DEFAULT_STRATEGY);
1033 : : CompileTimeAssert(DONT_COMPRESS != Z_FILTERED);
1034 : : CompileTimeAssert(DONT_COMPRESS != Z_HUFFMAN_ONLY);
1035 : : #ifdef Z_RLE
1036 : : CompileTimeAssert(DONT_COMPRESS != Z_RLE);
1037 : : #endif
1038 : :
1039 : 58393 : lazy_alloc_deflate_zstream();
1040 : :
1041 : 58393 : deflate_zstream->next_in = (Bytef *)const_cast<char *>(tag.data());
1042 : 58393 : deflate_zstream->avail_in = (uInt)tag.size();
1043 : :
1044 : : // If compressed size is >= tag.size(), we don't want to compress.
1045 : 58393 : unsigned long blk_len = tag.size() - 1;
1046 : 58393 : unsigned char * blk = new unsigned char[blk_len];
1047 : 58393 : deflate_zstream->next_out = blk;
1048 : 58393 : deflate_zstream->avail_out = (uInt)blk_len;
1049 : :
1050 : 58393 : int err = deflate(deflate_zstream, Z_FINISH);
1051 [ + + ]: 58393 : if (err == Z_STREAM_END) {
1052 : : // If deflate succeeded, then the output was at least one byte
1053 : : // smaller than the input.
1054 : 14635 : tag.assign(reinterpret_cast<const char *>(blk), deflate_zstream->total_out);
1055 : 14635 : compressed = true;
1056 : : } else {
1057 : : // Deflate failed - presumably the data wasn't compressible.
1058 : : }
1059 : :
1060 [ + - ]: 58393 : delete [] blk;
1061 : : }
1062 : :
1063 : : // sort of matching kt.append_chunk(), but setting the chunk
1064 : 1352019 : const size_t cd = kt.key().length() + K1 + I2 + C2 + C2; // offset to the tag data
1065 : 1352019 : const size_t L = max_item_size - cd; // largest amount of tag data for any chunk
1066 : 1352019 : size_t first_L = L; // - amount for tag1
1067 : 1352019 : bool found = find(C);
1068 [ + + ]: 1352019 : if (!found) {
1069 : 1308959 : byte * p = C[0].p;
1070 : 1308959 : size_t n = TOTAL_FREE(p) % (max_item_size + D2);
1071 [ + + ]: 1308959 : if (n > D2 + cd) {
1072 : 1271297 : n -= (D2 + cd);
1073 : : // if n >= last then fully filling this block won't produce
1074 : : // an extra item, so we might as well do this even if
1075 : : // full_compaction isn't active.
1076 : : //
1077 : : // In the full_compaction case, it turns out we shouldn't always
1078 : : // try to fill every last byte. Doing so can actually increase the
1079 : : // total space required (I believe this effect is due to longer
1080 : : // dividing keys being required in the index blocks). Empirically,
1081 : : // n >= key.size() + K appears a good criterion for K ~= 34. This
1082 : : // seems to save about 0.2% in total database size over always
1083 : : // splitting the tag. It'll also give be slightly faster retrieval
1084 : : // as we can avoid reading an extra block occasionally.
1085 : 1271297 : size_t last = tag.length() % L;
1086 [ + + + + ]: 1271297 : if (n >= last || (full_compaction && n >= key.size() + 34))
[ + + ][ + + ]
1087 : 1264176 : first_L = n;
1088 : : }
1089 : : }
1090 : :
1091 : : // a null tag must be added in of course
1092 [ + + ]: 1352019 : int m = tag.empty() ? 1 : (tag.length() - first_L + L - 1) / L + 1;
1093 : : // there are m items to add
1094 : : /* FIXME: sort out this error higher up and turn this into
1095 : : * an assert.
1096 : : */
1097 [ - + ]: 1352019 : if (m >= BYTE_PAIR_RANGE)
1098 : 0 : throw Xapian::UnimplementedError("Can't handle insanely large tags");
1099 : :
1100 : 1352019 : int n = 0; // initialise to shut off warning
1101 : : // - and there will be n to delete
1102 : 1352019 : int o = 0; // Offset into the tag
1103 : 1352019 : size_t residue = tag.length(); // Bytes of the tag remaining to add in
1104 : 1352019 : int replacement = false; // Has there been a replacement ?
1105 : : int i;
1106 : 1352019 : kt.set_components_of(m);
1107 [ + + ]: 2712376 : for (i = 1; i <= m; i++) {
1108 [ + + ][ + + ]: 1360357 : size_t l = (i == m ? residue : (i == 1 ? first_L : L));
1109 : : Assert(cd + l <= block_size);
1110 : : Assert(string::size_type(o + l) <= tag.length());
1111 : 1360357 : kt.set_tag(cd, tag.data() + o, l, compressed);
1112 : 1360357 : kt.set_component_of(i);
1113 : :
1114 : 1360357 : o += l;
1115 : 1360357 : residue -= l;
1116 : :
1117 [ + + ]: 1360357 : if (i > 1) found = find(C);
1118 : 1360357 : n = add_kt(found);
1119 [ + + ]: 1360357 : if (n > 0) replacement = true;
1120 : : }
1121 : : /* o == tag.length() here, and n may be zero */
1122 [ - + ]: 1352019 : for (i = m + 1; i <= n; i++) {
1123 : 0 : kt.set_component_of(i);
1124 : 0 : delete_kt();
1125 : : }
1126 [ + + ]: 1352019 : if (!replacement) ++item_count;
1127 : 1352019 : Btree_modified = true;
1128 [ + + ]: 1352019 : if (cursor_created_since_last_modification) {
1129 : 188020 : cursor_created_since_last_modification = false;
1130 : 188020 : ++cursor_version;
1131 : : }
1132 : 1352019 : }
1133 : :
1134 : : /* ChertTable::del(key) returns false if the key is not in the B-tree,
1135 : : otherwise deletes it and returns true.
1136 : :
1137 : : Again, this is parallel to ChertTable::add, but simpler in form.
1138 : : */
1139 : :
1140 : : bool
1141 : 45922 : ChertTable::del(const string &key)
1142 : : {
1143 : : LOGCALL(DB, bool, "ChertTable::del", key);
1144 : : Assert(writable);
1145 : :
1146 [ + + ]: 45922 : if (handle < 0) {
1147 [ - + ]: 4443 : if (handle == -2) {
1148 : 0 : ChertTable::throw_database_closed();
1149 : : }
1150 : 4443 : RETURN(false);
1151 : : }
1152 : :
1153 : : // We can't delete a key which we is too long for us to store.
1154 [ - + ]: 41479 : if (key.size() > CHERT_BTREE_MAX_KEY_LEN) RETURN(false);
1155 : :
1156 [ - + ]: 41479 : if (key.empty()) RETURN(false);
1157 : 41479 : form_key(key);
1158 : :
1159 : 41479 : int n = delete_kt(); /* there are n items to delete */
1160 [ + + ]: 41479 : if (n <= 0) RETURN(false);
1161 : :
1162 [ + + ]: 41703 : for (int i = 2; i <= n; i++) {
1163 : 367 : kt.set_component_of(i);
1164 : 367 : delete_kt();
1165 : : }
1166 : :
1167 : 41336 : item_count--;
1168 : 41336 : Btree_modified = true;
1169 [ + + ]: 41336 : if (cursor_created_since_last_modification) {
1170 : 186 : cursor_created_since_last_modification = false;
1171 : 186 : ++cursor_version;
1172 : : }
1173 : 45922 : RETURN(true);
1174 : : }
1175 : :
1176 : : bool
1177 : 1120948 : ChertTable::get_exact_entry(const string &key, string & tag) const
1178 : : {
1179 : : LOGCALL(DB, bool, "ChertTable::get_exact_entry", key | tag);
1180 : : Assert(!key.empty());
1181 : :
1182 [ + + ]: 1120948 : if (handle < 0) {
1183 [ - + ]: 50999 : if (handle == -2) {
1184 : 0 : ChertTable::throw_database_closed();
1185 : : }
1186 : 50999 : RETURN(false);
1187 : : }
1188 : :
1189 : : // An oversized key can't exist, so attempting to search for it should fail.
1190 [ + + ]: 1069949 : if (key.size() > CHERT_BTREE_MAX_KEY_LEN) RETURN(false);
1191 : :
1192 : 1069917 : form_key(key);
1193 [ + + ]: 1069917 : if (!find(C)) RETURN(false);
1194 : :
1195 : 765390 : (void)read_tag(C, &tag, false);
1196 : 1120946 : RETURN(true);
1197 : : }
1198 : :
1199 : : bool
1200 : 1096 : ChertTable::key_exists(const string &key) const
1201 : : {
1202 : : LOGCALL(DB, bool, "ChertTable::key_exists", key);
1203 : : Assert(!key.empty());
1204 : :
1205 : : // An oversized key can't exist, so attempting to search for it should fail.
1206 [ - + ]: 1096 : if (key.size() > CHERT_BTREE_MAX_KEY_LEN) RETURN(false);
1207 : :
1208 : 1096 : form_key(key);
1209 : 1096 : RETURN(find(C));
1210 : : }
1211 : :
1212 : : bool
1213 : 2698122 : ChertTable::read_tag(Cursor * C_, string *tag, bool keep_compressed) const
1214 : : {
1215 : : LOGCALL(DB, bool, "ChertTable::read_tag", Literal("C_") | tag | keep_compressed);
1216 : 2698122 : Item item(C_[0].p, C_[0].c);
1217 : :
1218 : : /* n components to join */
1219 : 2698122 : int n = item.components_of();
1220 : :
1221 : 2698122 : tag->resize(0);
1222 : : // max_item_size also includes K1 + I2 + C2 + C2 bytes overhead and the key
1223 : : // (which is at least 1 byte long).
1224 [ + + ]: 2698122 : if (n > 1) tag->reserve((max_item_size - (1 + K1 + I2 + C2 + C2)) * n);
1225 : :
1226 : 2698122 : item.append_chunk(tag);
1227 : 2698122 : bool compressed = item.get_compressed();
1228 : :
1229 [ + + ]: 6965416 : for (int i = 2; i <= n; i++) {
1230 [ - + ]: 4267294 : if (!next(C_, 0)) {
1231 : 0 : throw Xapian::DatabaseCorruptError("Unexpected end of table when reading continuation of tag");
1232 : : }
1233 : 4267294 : (void)Item(C_[0].p, C_[0].c).append_chunk(tag);
1234 : : }
1235 : : // At this point the cursor is on the last item - calling next will move
1236 : : // it to the next key (ChertCursor::get_tag() relies on this).
1237 [ + + ][ + + ]: 2698122 : if (!compressed || keep_compressed) RETURN(compressed);
1238 : :
1239 : : // FIXME: Perhaps we should we decompress each chunk as we read it so we
1240 : : // don't need both the full compressed and uncompressed tags in memory
1241 : : // at once.
1242 : :
1243 : 207456 : string utag;
1244 : : // May not be enough for a compressed tag, but it's a reasonable guess.
1245 : 207456 : utag.reserve(tag->size() + tag->size() / 2);
1246 : :
1247 : : Bytef buf[8192];
1248 : :
1249 : 207456 : lazy_alloc_inflate_zstream();
1250 : :
1251 : 207456 : inflate_zstream->next_in = (Bytef*)const_cast<char *>(tag->data());
1252 : 207456 : inflate_zstream->avail_in = (uInt)tag->size();
1253 : :
1254 : 207456 : int err = Z_OK;
1255 [ + + ]: 414912 : while (err != Z_STREAM_END) {
1256 : 207456 : inflate_zstream->next_out = buf;
1257 : 207456 : inflate_zstream->avail_out = (uInt)sizeof(buf);
1258 : 207456 : err = inflate(inflate_zstream, Z_SYNC_FLUSH);
1259 [ - + # # ]: 207456 : if (err == Z_BUF_ERROR && inflate_zstream->avail_in == 0) {
1260 : : LOGLINE(DB, "Z_BUF_ERROR - faking checksum of " << inflate_zstream->adler);
1261 : : Bytef header2[4];
1262 : 0 : setint4(header2, 0, inflate_zstream->adler);
1263 : 0 : inflate_zstream->next_in = header2;
1264 : 0 : inflate_zstream->avail_in = 4;
1265 : 0 : err = inflate(inflate_zstream, Z_SYNC_FLUSH);
1266 [ # # ]: 0 : if (err == Z_STREAM_END) break;
1267 : : }
1268 : :
1269 [ + - ][ - + ]: 207456 : if (err != Z_OK && err != Z_STREAM_END) {
1270 [ # # ]: 0 : if (err == Z_MEM_ERROR) throw std::bad_alloc();
1271 : 0 : string msg = "inflate failed";
1272 [ # # ]: 0 : if (inflate_zstream->msg) {
1273 : 0 : msg += " (";
1274 : 0 : msg += inflate_zstream->msg;
1275 : 0 : msg += ')';
1276 : : }
1277 : 0 : throw Xapian::DatabaseError(msg);
1278 : : }
1279 : :
1280 : : utag.append(reinterpret_cast<const char *>(buf),
1281 : 207456 : inflate_zstream->next_out - buf);
1282 : : }
1283 [ - + ]: 207456 : if (utag.size() != inflate_zstream->total_out) {
1284 : 0 : string msg = "compressed tag didn't expand to the expected size: ";
1285 : 0 : msg += str(utag.size());
1286 : 0 : msg += " != ";
1287 : : // OpenBSD's zlib.h uses off_t instead of uLong for total_out.
1288 : 0 : msg += str((size_t)inflate_zstream->total_out);
1289 : 0 : throw Xapian::DatabaseCorruptError(msg);
1290 : : }
1291 : :
1292 : 207456 : swap(*tag, utag);
1293 : :
1294 : 2698122 : RETURN(false);
1295 : : }
1296 : :
1297 : : void
1298 : 40 : ChertTable::set_full_compaction(bool parity)
1299 : : {
1300 : : LOGCALL_VOID(DB, "ChertTable::set_full_compaction", parity);
1301 : : Assert(writable);
1302 : :
1303 [ + - ]: 40 : if (parity) seq_count = 0;
1304 : 40 : full_compaction = parity;
1305 : 40 : }
1306 : :
1307 : 2014545 : ChertCursor * ChertTable::cursor_get() const {
1308 : : LOGCALL(DB, ChertCursor *, "ChertTable::cursor_get", NO_ARGS);
1309 [ + + ]: 2014545 : if (handle < 0) {
1310 [ + + ]: 9 : if (handle == -2) {
1311 : 7 : ChertTable::throw_database_closed();
1312 : : }
1313 : 2 : RETURN(NULL);
1314 : : }
1315 : : // FIXME Ick - casting away const is nasty
1316 : 2014538 : RETURN(new ChertCursor(const_cast<ChertTable *>(this)));
1317 : : }
1318 : :
1319 : : /************ B-tree opening and closing ************/
1320 : :
1321 : : bool
1322 : 9113 : ChertTable::basic_open(bool revision_supplied, chert_revision_number_t revision_)
1323 : : {
1324 : : LOGCALL(DB, bool, "ChertTable::basic_open", revision_supplied | revision_);
1325 : 9113 : int ch = 'X'; /* will be 'A' or 'B' */
1326 : :
1327 : : {
1328 : 9113 : const size_t BTREE_BASES = 2;
1329 : 9113 : string err_msg;
1330 : : static const char basenames[BTREE_BASES] = { 'A', 'B' };
1331 : :
1332 [ + + ][ # # ]: 27339 : ChertTable_base bases[BTREE_BASES];
[ # # ]
1333 : : bool base_ok[BTREE_BASES];
1334 : :
1335 : 9113 : both_bases = true;
1336 : 9113 : bool valid_base = false;
1337 : : {
1338 [ + + ]: 27339 : for (size_t i = 0; i < BTREE_BASES; ++i) {
1339 : 18226 : bool ok = bases[i].read(name, basenames[i], writable, err_msg);
1340 : 18226 : base_ok[i] = ok;
1341 [ + + ]: 18226 : if (ok) {
1342 : 15705 : valid_base = true;
1343 : : } else {
1344 : 2521 : both_bases = false;
1345 : : }
1346 : : }
1347 : : }
1348 : :
1349 [ - + ]: 9113 : if (!valid_base) {
1350 [ # # ]: 0 : if (handle >= 0) {
1351 : 0 : ::close(handle);
1352 : 0 : handle = -1;
1353 : : }
1354 : 0 : string message = "Error opening table `";
1355 : 0 : message += name;
1356 : 0 : message += "':\n";
1357 : 0 : message += err_msg;
1358 : 0 : throw Xapian::DatabaseOpeningError(message);
1359 : : }
1360 : :
1361 [ + + ]: 9113 : if (revision_supplied) {
1362 : 5423 : bool found_revision = false;
1363 [ + - ]: 9878 : for (size_t i = 0; i < BTREE_BASES; ++i) {
1364 [ + + ][ + + ]: 9878 : if (base_ok[i] && bases[i].get_revision() == revision_) {
[ + + ]
1365 : 5423 : ch = basenames[i];
1366 : 5423 : found_revision = true;
1367 : 5423 : break;
1368 : : }
1369 : : }
1370 [ - + ]: 5423 : if (!found_revision) {
1371 : : /* Couldn't open the revision that was asked for.
1372 : : * This shouldn't throw an exception, but should just return
1373 : : * false to upper levels.
1374 : : */
1375 : 0 : RETURN(false);
1376 : : }
1377 : : } else {
1378 : 3690 : chert_revision_number_t highest_revision = 0;
1379 [ + + ]: 11070 : for (size_t i = 0; i < BTREE_BASES; ++i) {
1380 [ + + ][ + + ]: 7380 : if (base_ok[i] && bases[i].get_revision() >= highest_revision) {
[ + + ]
1381 : 5659 : ch = basenames[i];
1382 : 5659 : highest_revision = bases[i].get_revision();
1383 : : }
1384 : : }
1385 : : }
1386 : :
1387 : 9113 : ChertTable_base *basep = 0;
1388 : 9113 : ChertTable_base *other_base = 0;
1389 : :
1390 [ + - ]: 15537 : for (size_t i = 0; i < BTREE_BASES; ++i) {
1391 : : LOGLINE(DB, "Checking (ch == " << ch << ") against "
1392 : : "basenames[" << i << "] == " << basenames[i]);
1393 : : LOGLINE(DB, "bases[" << i << "].get_revision() == " <<
1394 : : bases[i].get_revision());
1395 : : LOGLINE(DB, "base_ok[" << i << "] == " << base_ok[i]);
1396 [ + + ]: 15537 : if (ch == basenames[i]) {
1397 : 9113 : basep = &bases[i];
1398 : :
1399 : : // FIXME: assuming only two bases for other_base
1400 : 9113 : size_t otherbase_num = 1-i;
1401 [ + + ]: 9113 : if (base_ok[otherbase_num]) {
1402 : 6592 : other_base = &bases[otherbase_num];
1403 : : }
1404 : 9113 : break;
1405 : : }
1406 : : }
1407 : : Assert(basep);
1408 : :
1409 : : /* basep now points to the most recent base block */
1410 : :
1411 : : /* Avoid copying the bitmap etc. - swap contents with the base
1412 : : * object in the vector, since it'll be destroyed anyway soon.
1413 : : */
1414 : 9113 : base.swap(*basep);
1415 : :
1416 : 9113 : revision_number = base.get_revision();
1417 : 9113 : block_size = base.get_block_size();
1418 : 9113 : root = base.get_root();
1419 : 9113 : level = base.get_level();
1420 : : //bit_map_size = basep->get_bit_map_size();
1421 : 9113 : item_count = base.get_item_count();
1422 : 9113 : faked_root_block = base.get_have_fakeroot();
1423 : 9113 : sequential = base.get_sequential();
1424 : :
1425 [ + + ]: 9113 : if (other_base != 0) {
1426 : 6592 : latest_revision_number = other_base->get_revision();
1427 [ + - ]: 6592 : if (revision_number > latest_revision_number)
1428 : 6592 : latest_revision_number = revision_number;
1429 : : } else {
1430 : 9113 : latest_revision_number = revision_number;
1431 [ # # ][ + + ]: 27339 : }
[ - + ][ + - ]
1432 : : }
1433 : :
1434 : : /* kt holds constructed items as well as keys */
1435 : 9113 : kt = Item_wr(zeroed_new(block_size));
1436 : :
1437 : 9113 : set_max_item_size(BLOCK_CAPACITY);
1438 : :
1439 : 9113 : base_letter = ch;
1440 : :
1441 : : /* ready to open the main file */
1442 : :
1443 : 9113 : RETURN(true);
1444 : : }
1445 : :
1446 : : void
1447 : 11374 : ChertTable::read_root()
1448 : : {
1449 : : LOGCALL_VOID(DB, "ChertTable::read_root", NO_ARGS);
1450 [ + + ]: 11374 : if (faked_root_block) {
1451 : : /* root block for an unmodified database. */
1452 : 3083 : byte * p = C[0].p;
1453 : : Assert(p);
1454 : :
1455 : : /* clear block - shouldn't be necessary, but is a bit nicer,
1456 : : * and means that the same operations should always produce
1457 : : * the same database. */
1458 : 3083 : memset(p, 0, block_size);
1459 : :
1460 : 3083 : int o = block_size - I2 - K1 - C2 - C2;
1461 : 3083 : Item_wr(p + o).fake_root_item();
1462 : :
1463 : 3083 : setD(p, DIR_START, o); // its directory entry
1464 : 3083 : SET_DIR_END(p, DIR_START + D2);// the directory size
1465 : :
1466 : 3083 : o -= (DIR_START + D2);
1467 : 3083 : SET_MAX_FREE(p, o);
1468 : 3083 : SET_TOTAL_FREE(p, o);
1469 : 3083 : SET_LEVEL(p, 0);
1470 : :
1471 [ + + ]: 3083 : if (!writable) {
1472 : : /* reading - revision number doesn't matter as long as
1473 : : * it's not greater than the current one. */
1474 : 572 : SET_REVISION(p, 0);
1475 : 572 : C[0].n = 0;
1476 : : } else {
1477 : : /* writing - */
1478 : 2511 : SET_REVISION(p, latest_revision_number + 1);
1479 : 2511 : C[0].n = base.next_free_block();
1480 : : }
1481 : : } else {
1482 : : /* using a root block stored on disk */
1483 : 8291 : block_to_cursor(C, level, root);
1484 : :
1485 [ - + ]: 8291 : if (REVISION(C[level].p) > revision_number) set_overwritten();
1486 : : /* although this is unlikely */
1487 : : }
1488 : 11374 : }
1489 : :
1490 : : bool
1491 : 3285 : ChertTable::do_open_to_write(bool revision_supplied,
1492 : : chert_revision_number_t revision_,
1493 : : bool create_db)
1494 : : {
1495 : : LOGCALL(DB, bool, "ChertTable::do_open_to_write", revision_supplied | revision_ | create_db);
1496 [ - + ]: 3285 : if (handle == -2) {
1497 : 0 : ChertTable::throw_database_closed();
1498 : : }
1499 : 3285 : int flags = O_RDWR | O_BINARY;
1500 [ + + ]: 3285 : if (create_db) flags |= O_CREAT | O_TRUNC;
1501 : 3285 : handle = ::open((name + "DB").c_str(), flags, 0666);
1502 [ + + ]: 3285 : if (handle < 0) {
1503 : : // lazy doesn't make a lot of sense with create_db anyway, but ENOENT
1504 : : // with O_CREAT means a parent directory doesn't exist.
1505 [ + - ][ + - ]: 936 : if (lazy && !create_db && errno == ENOENT) {
[ + - ]
1506 : 936 : revision_number = revision_;
1507 : 936 : RETURN(true);
1508 : : }
1509 [ # # ]: 0 : string message(create_db ? "Couldn't create " : "Couldn't open ");
1510 : 0 : message += name;
1511 : 0 : message += "DB read/write: ";
1512 : 0 : message += strerror(errno);
1513 : 0 : throw Xapian::DatabaseOpeningError(message);
1514 : : }
1515 : :
1516 [ - + ]: 2349 : if (!basic_open(revision_supplied, revision_)) {
1517 : 0 : ::close(handle);
1518 : 0 : handle = -1;
1519 [ # # ]: 0 : if (!revision_supplied) {
1520 : 0 : throw Xapian::DatabaseOpeningError("Failed to open for writing");
1521 : : }
1522 : : /* When the revision is supplied, it's not an exceptional
1523 : : * case when open failed, so we just return false here.
1524 : : */
1525 : 0 : RETURN(false);
1526 : : }
1527 : :
1528 : 2349 : writable = true;
1529 : :
1530 [ + + ]: 4762 : for (int j = 0; j <= level; j++) {
1531 : 2413 : C[j].n = BLK_UNUSED;
1532 : 2413 : C[j].p = new byte[block_size];
1533 [ - + ]: 2413 : if (C[j].p == 0) {
1534 : 0 : throw std::bad_alloc();
1535 : : }
1536 : : }
1537 : 2349 : split_p = new byte[block_size];
1538 [ - + ]: 2349 : if (split_p == 0) {
1539 : 0 : throw std::bad_alloc();
1540 : : }
1541 : 2349 : read_root();
1542 : :
1543 : 2349 : buffer = zeroed_new(block_size);
1544 : :
1545 : 2349 : changed_n = 0;
1546 : 2349 : changed_c = DIR_START;
1547 : 2349 : seq_count = SEQ_START_POINT;
1548 : :
1549 : 3285 : RETURN(true);
1550 : : }
1551 : :
1552 : 13889 : ChertTable::ChertTable(const char * tablename_, const string & path_,
1553 : : bool readonly_, int compress_strategy_, bool lazy_)
1554 : : : tablename(tablename_),
1555 : : revision_number(0),
1556 : : item_count(0),
1557 : : block_size(0),
1558 : : latest_revision_number(0),
1559 : : both_bases(false),
1560 : : base_letter('A'),
1561 : : faked_root_block(true),
1562 : : sequential(true),
1563 : : handle(-1),
1564 : : level(0),
1565 : : root(0),
1566 : : kt(0),
1567 : : buffer(0),
1568 : : base(),
1569 : : name(path_),
1570 : : seq_count(0),
1571 : : changed_n(0),
1572 : : changed_c(0),
1573 : : max_item_size(0),
1574 : : Btree_modified(false),
1575 : : full_compaction(false),
1576 : : writable(!readonly_),
1577 : : cursor_created_since_last_modification(false),
1578 : : cursor_version(0),
1579 : : split_p(0),
1580 : : compress_strategy(compress_strategy_),
1581 : : deflate_zstream(NULL),
1582 : : inflate_zstream(NULL),
1583 [ + + ][ + + ]: 152779 : lazy(lazy_)
1584 : : {
1585 : : LOGCALL_CTOR(DB, "ChertTable", tablename_ | path_ | readonly_ | compress_strategy_ | lazy_);
1586 : 13889 : }
1587 : :
1588 : : bool
1589 : 893 : ChertTable::really_empty() const
1590 : : {
1591 [ + + ]: 893 : if (handle < 0) {
1592 [ - + ]: 426 : if (handle == -2) {
1593 : 0 : ChertTable::throw_database_closed();
1594 : : }
1595 : 426 : return true;
1596 : : }
1597 : 467 : ChertCursor cur(const_cast<ChertTable*>(this));
1598 : 467 : cur.find_entry(string());
1599 : 893 : return !cur.next();
1600 : : }
1601 : :
1602 : : void
1603 : 58393 : ChertTable::lazy_alloc_deflate_zstream() const {
1604 [ + + ]: 58393 : if (usual(deflate_zstream)) {
1605 [ + - ]: 58020 : if (usual(deflateReset(deflate_zstream) == Z_OK)) return;
1606 : : // Try to recover by deleting the stream and starting from scratch.
1607 : 0 : delete deflate_zstream;
1608 : : }
1609 : :
1610 : 373 : deflate_zstream = new z_stream;
1611 : :
1612 : 373 : deflate_zstream->zalloc = Z_NULL;
1613 : 373 : deflate_zstream->zfree = Z_NULL;
1614 : 373 : deflate_zstream->opaque = Z_NULL;
1615 : :
1616 : : // -15 means raw deflate with 32K LZ77 window (largest)
1617 : : // memLevel 9 is the highest (8 is default)
1618 : : int err;
1619 : : err = deflateInit2(deflate_zstream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
1620 : 373 : -15, 9, compress_strategy);
1621 [ - + ]: 373 : if (rare(err != Z_OK)) {
1622 [ # # ]: 0 : if (err == Z_MEM_ERROR) {
1623 : 0 : delete deflate_zstream;
1624 : 0 : deflate_zstream = 0;
1625 : 0 : throw std::bad_alloc();
1626 : : }
1627 : 0 : string msg = "deflateInit2 failed (";
1628 [ # # ]: 0 : if (deflate_zstream->msg) {
1629 : 0 : msg += deflate_zstream->msg;
1630 : : } else {
1631 : 0 : msg += str(err);
1632 : : }
1633 : 0 : msg += ')';
1634 : 0 : delete deflate_zstream;
1635 : 0 : deflate_zstream = 0;
1636 : 58393 : throw Xapian::DatabaseError(msg);
1637 : : }
1638 : : }
1639 : :
1640 : : void
1641 : 207456 : ChertTable::lazy_alloc_inflate_zstream() const {
1642 [ + + ]: 207456 : if (usual(inflate_zstream)) {
1643 [ + - ]: 207221 : if (usual(inflateReset(inflate_zstream) == Z_OK)) return;
1644 : : // Try to recover by deleting the stream and starting from scratch.
1645 : 0 : delete inflate_zstream;
1646 : : }
1647 : :
1648 : 235 : inflate_zstream = new z_stream;
1649 : :
1650 : 235 : inflate_zstream->zalloc = Z_NULL;
1651 : 235 : inflate_zstream->zfree = Z_NULL;
1652 : 235 : inflate_zstream->opaque = Z_NULL;
1653 : :
1654 : 235 : inflate_zstream->next_in = Z_NULL;
1655 : 235 : inflate_zstream->avail_in = 0;
1656 : :
1657 : 235 : int err = inflateInit2(inflate_zstream, -15);
1658 [ - + ]: 235 : if (rare(err != Z_OK)) {
1659 [ # # ]: 0 : if (err == Z_MEM_ERROR) {
1660 : 0 : delete inflate_zstream;
1661 : 0 : inflate_zstream = 0;
1662 : 0 : throw std::bad_alloc();
1663 : : }
1664 : 0 : string msg = "inflateInit2 failed (";
1665 [ # # ]: 0 : if (inflate_zstream->msg) {
1666 : 0 : msg += inflate_zstream->msg;
1667 : : } else {
1668 : 0 : msg += str(err);
1669 : : }
1670 : 0 : msg += ')';
1671 : 0 : delete inflate_zstream;
1672 : 0 : inflate_zstream = 0;
1673 : 207456 : throw Xapian::DatabaseError(msg);
1674 : : }
1675 : : }
1676 : :
1677 : : bool
1678 : 616 : ChertTable::exists() const {
1679 : : LOGCALL(DB, bool, "ChertTable::exists", NO_ARGS);
1680 : : return (file_exists(name + "DB") &&
1681 [ + + ][ - + ]: 616 : (file_exists(name + "baseA") || file_exists(name + "baseB")));
[ # # ][ - + ]
[ # # ][ # # ]
[ + + ][ # # ]
[ + - ]
1682 : : }
1683 : :
1684 : : void
1685 : 1009 : ChertTable::erase()
1686 : : {
1687 : : LOGCALL_VOID(DB, "ChertTable::erase", NO_ARGS);
1688 : 1009 : close();
1689 : :
1690 : 1009 : (void)io_unlink(name + "baseA");
1691 : 1009 : (void)io_unlink(name + "baseB");
1692 : 1009 : (void)io_unlink(name + "DB");
1693 : 1009 : }
1694 : :
1695 : : void
1696 : 10149 : ChertTable::set_block_size(unsigned int block_size_)
1697 : : {
1698 : : LOGCALL_VOID(DB, "ChertTable::set_block_size", block_size_);
1699 : : // Block size must in the range 2048..BYTE_PAIR_RANGE, and a power of two.
1700 [ + - ][ + - ]: 10149 : if (block_size_ < 2048 || block_size_ > BYTE_PAIR_RANGE ||
[ - + ]
1701 : : (block_size_ & (block_size_ - 1)) != 0) {
1702 : 0 : block_size_ = CHERT_DEFAULT_BLOCK_SIZE;
1703 : : }
1704 : 10149 : block_size = block_size_;
1705 : 10149 : }
1706 : :
1707 : : void
1708 : 1224 : ChertTable::create_and_open(unsigned int block_size_)
1709 : : {
1710 : : LOGCALL_VOID(DB, "ChertTable::create_and_open", block_size_);
1711 [ - + ]: 1224 : if (handle == -2) {
1712 : 0 : ChertTable::throw_database_closed();
1713 : : }
1714 : : Assert(writable);
1715 : 1224 : close();
1716 : :
1717 [ - + ]: 1224 : if (block_size_ == 0) abort();
1718 : 1224 : set_block_size(block_size_);
1719 : :
1720 : : // FIXME: it would be good to arrange that this works such that there's
1721 : : // always a valid table in place if you run create_and_open() on an
1722 : : // existing table.
1723 : :
1724 : : /* write initial values to files */
1725 : :
1726 : : /* create the base file */
1727 : 1224 : ChertTable_base base_;
1728 : 1224 : base_.set_revision(revision_number);
1729 : 1224 : base_.set_block_size(block_size_);
1730 : 1224 : base_.set_have_fakeroot(true);
1731 : 1224 : base_.set_sequential(true);
1732 : 1224 : base_.write_to_file(name + "baseA", 'A', string(), -1, NULL);
1733 : :
1734 : : /* remove the alternative base file, if any */
1735 : 1224 : (void)io_unlink(name + "baseB");
1736 : :
1737 : : // Any errors are thrown if revision_supplied is false.
1738 : 1224 : (void)do_open_to_write(false, 0, true);
1739 : 1224 : }
1740 : :
1741 : 13889 : ChertTable::~ChertTable() {
1742 : : LOGCALL_DTOR(DB, "ChertTable");
1743 : 13889 : ChertTable::close();
1744 : :
1745 [ + + + + ]: 13889 : if (deflate_zstream) {
1746 : : // Errors which we care about have already been handled, so just ignore
1747 : : // any which get returned here.
1748 : 373 : (void) deflateEnd(deflate_zstream);
1749 : 373 : delete deflate_zstream;
1750 : : }
1751 : :
1752 [ - + ][ + + ]: 13889 : if (inflate_zstream) {
1753 : : // Errors which we care about have already been handled, so just ignore
1754 : : // any which get returned here.
1755 : 235 : (void) inflateEnd(inflate_zstream);
1756 : 235 : delete inflate_zstream;
1757 : : }
1758 : 13889 : }
1759 : :
1760 : 28579 : void ChertTable::close(bool permanent) {
1761 : : LOGCALL_VOID(DB, "ChertTable::close", NO_ARGS);
1762 : :
1763 [ + + ]: 28579 : if (handle >= 0) {
1764 : : // If an error occurs here, we just ignore it, since we're just
1765 : : // trying to free everything.
1766 : 9113 : (void)::close(handle);
1767 : 9113 : handle = -1;
1768 : : }
1769 : :
1770 [ + + ]: 28579 : if (permanent) {
1771 : 90 : handle = -2;
1772 : : // Don't delete the resources in the table, since they may
1773 : : // still be used to look up cached content.
1774 : 90 : return;
1775 : : }
1776 [ + + ]: 59414 : for (int j = level; j >= 0; j--) {
1777 [ + + ]: 30925 : delete [] C[j].p;
1778 : 30925 : C[j].p = 0;
1779 : : }
1780 [ + + ]: 28489 : delete [] split_p;
1781 : 28489 : split_p = 0;
1782 : :
1783 [ + + ]: 28489 : delete [] kt.get_address();
1784 : 28489 : kt = 0;
1785 [ + + ]: 28489 : delete [] buffer;
1786 : 28579 : buffer = 0;
1787 : : }
1788 : :
1789 : : void
1790 : 2956 : ChertTable::flush_db()
1791 : : {
1792 : : LOGCALL_VOID(DB, "ChertTable::flush_db", NO_ARGS);
1793 : : Assert(writable);
1794 [ + + ]: 2956 : if (handle < 0) {
1795 [ - + ]: 1164 : if (handle == -2) {
1796 : 0 : ChertTable::throw_database_closed();
1797 : : }
1798 : 1164 : return;
1799 : : }
1800 : :
1801 [ + + ]: 3934 : for (int j = level; j >= 0; j--) {
1802 [ + + ]: 2142 : if (C[j].rewrite) {
1803 : 1909 : write_block(C[j].n, C[j].p);
1804 : : }
1805 : : }
1806 : :
1807 [ + + ]: 1792 : if (Btree_modified) {
1808 : 2956 : faked_root_block = false;
1809 : : }
1810 : : }
1811 : :
1812 : : void
1813 : 2956 : ChertTable::commit(chert_revision_number_t revision, int changes_fd,
1814 : : const string * changes_tail)
1815 : : {
1816 : : LOGCALL_VOID(DB, "ChertTable::commit", revision | changes_fd | changes_tail);
1817 : : Assert(writable);
1818 : :
1819 [ - + ]: 2956 : if (revision <= revision_number) {
1820 : 0 : throw Xapian::DatabaseError("New revision too low");
1821 : : }
1822 : :
1823 [ + + ]: 2956 : if (handle < 0) {
1824 [ - + ]: 1164 : if (handle == -2) {
1825 : 0 : ChertTable::throw_database_closed();
1826 : : }
1827 : 1164 : latest_revision_number = revision_number = revision;
1828 : 1164 : return;
1829 : : }
1830 : :
1831 : : try {
1832 [ + + ]: 1792 : if (faked_root_block) {
1833 : : /* We will use a dummy bitmap. */
1834 : 135 : base.clear_bit_map();
1835 : : }
1836 : :
1837 : 1792 : base.set_revision(revision);
1838 : 1792 : base.set_root(C[level].n);
1839 : 1792 : base.set_level(level);
1840 : 1792 : base.set_item_count(item_count);
1841 : 1792 : base.set_have_fakeroot(faked_root_block);
1842 : 1792 : base.set_sequential(sequential);
1843 : :
1844 : 1792 : base_letter = other_base_letter();
1845 : :
1846 : 1792 : both_bases = true;
1847 : 1792 : latest_revision_number = revision_number = revision;
1848 : 1792 : root = C[level].n;
1849 : :
1850 : 1792 : Btree_modified = false;
1851 : :
1852 [ + + ]: 19712 : for (int i = 0; i < BTREE_CURSOR_LEVELS; ++i) {
1853 : 17920 : C[i].n = BLK_UNUSED;
1854 : 17920 : C[i].c = -1;
1855 : 17920 : C[i].rewrite = false;
1856 : : }
1857 : :
1858 : : // Save to "<table>.tmp" and then rename to "<table>.base<letter>" so
1859 : : // that a reader can't try to read a partially written base file.
1860 : 1792 : string tmp = name;
1861 : 1792 : tmp += "tmp";
1862 : 1792 : string basefile = name;
1863 : 1792 : basefile += "base";
1864 : 1792 : basefile += char(base_letter);
1865 : 1792 : base.write_to_file(tmp, base_letter, tablename, changes_fd, changes_tail);
1866 : :
1867 : : // Do this as late as possible to allow maximum time for writes to
1868 : : // happen, and so the calls to io_sync() are adjacent which may be
1869 : : // more efficient, at least with some Linux kernel versions.
1870 [ - + ]: 1792 : if (!io_sync(handle)) {
1871 : 0 : (void)::close(handle);
1872 : 0 : handle = -1;
1873 : 0 : (void)unlink(tmp);
1874 : 0 : throw Xapian::DatabaseError("Can't commit new revision - failed to flush DB to disk");
1875 : : }
1876 : :
1877 : : #if defined __WIN32__
1878 : : if (msvc_posix_rename(tmp.c_str(), basefile.c_str()) < 0)
1879 : : #else
1880 [ - + ]: 1792 : if (rename(tmp.c_str(), basefile.c_str()) < 0)
1881 : : #endif
1882 : : {
1883 : : // With NFS, rename() failing may just mean that the server crashed
1884 : : // after successfully renaming, but before reporting this, and then
1885 : : // the retried operation fails. So we need to check if the source
1886 : : // file still exists, which we do by calling unlink(), since we want
1887 : : // to remove the temporary file anyway.
1888 : 0 : int saved_errno = errno;
1889 [ # # # # ]: 0 : if (unlink(tmp) == 0 || errno != ENOENT) {
[ # # ]
1890 : 0 : string msg("Couldn't update base file ");
1891 : 0 : msg += basefile;
1892 : 0 : msg += ": ";
1893 : 0 : msg += strerror(saved_errno);
1894 : 0 : throw Xapian::DatabaseError(msg);
1895 : : }
1896 : : }
1897 : 1792 : base.commit();
1898 : :
1899 : 1792 : read_root();
1900 : :
1901 : 1792 : changed_n = 0;
1902 : 1792 : changed_c = DIR_START;
1903 : 1792 : seq_count = SEQ_START_POINT;
1904 : 2956 : } catch (...) {
1905 : 0 : ChertTable::close();
1906 : 0 : throw;
1907 : : }
1908 : : }
1909 : :
1910 : : void
1911 : 138 : ChertTable::write_changed_blocks(int changes_fd)
1912 : : {
1913 : : LOGCALL_VOID(DB, "ChertTable::write_changed_blocks", changes_fd);
1914 : : Assert(changes_fd >= 0);
1915 [ + + ]: 138 : if (handle < 0) return;
1916 [ - + ]: 86 : if (faked_root_block) return;
1917 : :
1918 : 86 : string buf;
1919 : 86 : pack_uint(buf, 2u); // Indicate the item is a list of blocks
1920 : 86 : pack_string(buf, tablename);
1921 : 86 : pack_uint(buf, block_size);
1922 : 86 : io_write(changes_fd, buf.data(), buf.size());
1923 : :
1924 : : // Compare the old and new bitmaps to find blocks which have changed, and
1925 : : // write them to the file descriptor.
1926 : 86 : uint4 n = 0;
1927 : 86 : byte * p = new byte[block_size];
1928 : : try {
1929 : 86 : base.calculate_last_block();
1930 [ + + ]: 161 : while (base.find_changed_block(&n)) {
1931 : 75 : buf.resize(0);
1932 : 75 : pack_uint(buf, n + 1);
1933 : 75 : io_write(changes_fd, buf.data(), buf.size());
1934 : :
1935 : : // Read block n.
1936 : 75 : read_block(n, p);
1937 : :
1938 : : // Write block n to the file.
1939 : : io_write(changes_fd, reinterpret_cast<const char *>(p),
1940 : 75 : block_size);
1941 : 75 : ++n;
1942 : : }
1943 [ + - ]: 86 : delete[] p;
1944 : 86 : p = 0;
1945 : 0 : } catch (...) {
1946 [ # # ]: 0 : delete[] p;
1947 : 0 : throw;
1948 : : }
1949 : 86 : buf.resize(0);
1950 : 86 : pack_uint(buf, 0u);
1951 : 138 : io_write(changes_fd, buf.data(), buf.size());
1952 : : }
1953 : :
1954 : : void
1955 : 914 : ChertTable::cancel()
1956 : : {
1957 : : LOGCALL_VOID(DB, "ChertTable::cancel", NO_ARGS);
1958 : : Assert(writable);
1959 : :
1960 [ + + ]: 914 : if (handle < 0) {
1961 [ + + ]: 445 : if (handle == -2) {
1962 : 2 : ChertTable::throw_database_closed();
1963 : : }
1964 : 443 : latest_revision_number = revision_number; // FIXME: we can end up reusing a revision if we opened a btree at an older revision, start to modify it, then cancel...
1965 : 443 : return;
1966 : : }
1967 : :
1968 : : // This causes problems: if (!Btree_modified) return;
1969 : :
1970 : 469 : string err_msg;
1971 [ - + ]: 469 : if (!base.read(name, base_letter, writable, err_msg)) {
1972 : 0 : throw Xapian::DatabaseCorruptError(string("Couldn't reread base ") + base_letter);
1973 : : }
1974 : :
1975 : 469 : revision_number = base.get_revision();
1976 : 469 : block_size = base.get_block_size();
1977 : 469 : root = base.get_root();
1978 : 469 : level = base.get_level();
1979 : : //bit_map_size = basep->get_bit_map_size();
1980 : 469 : item_count = base.get_item_count();
1981 : 469 : faked_root_block = base.get_have_fakeroot();
1982 : 469 : sequential = base.get_sequential();
1983 : :
1984 : 469 : latest_revision_number = revision_number; // FIXME: we can end up reusing a revision if we opened a btree at an older revision, start to modify it, then cancel...
1985 : :
1986 : 469 : Btree_modified = false;
1987 : :
1988 [ + + ]: 976 : for (int j = 0; j <= level; j++) {
1989 : 507 : C[j].n = BLK_UNUSED;
1990 : 507 : C[j].rewrite = false;
1991 : : }
1992 : 469 : read_root();
1993 : :
1994 : 469 : changed_n = 0;
1995 : 469 : changed_c = DIR_START;
1996 : 912 : seq_count = SEQ_START_POINT;
1997 : : }
1998 : :
1999 : : /************ B-tree reading ************/
2000 : :
2001 : : bool
2002 : 10306 : ChertTable::do_open_to_read(bool revision_supplied, chert_revision_number_t revision_)
2003 : : {
2004 : : LOGCALL(DB, bool, "ChertTable::do_open_to_read", revision_supplied | revision_);
2005 [ + + ]: 10306 : if (handle == -2) {
2006 : 4 : ChertTable::throw_database_closed();
2007 : : }
2008 : 10302 : handle = ::open((name + "DB").c_str(), O_RDONLY | O_BINARY);
2009 [ + + ]: 10302 : if (handle < 0) {
2010 [ + - ]: 3538 : if (lazy) {
2011 : : // This table is optional when reading!
2012 : 3538 : revision_number = revision_;
2013 : 3538 : RETURN(true);
2014 : : }
2015 : 0 : string message("Couldn't open ");
2016 : 0 : message += name;
2017 : 0 : message += "DB to read: ";
2018 : 0 : message += strerror(errno);
2019 : 0 : throw Xapian::DatabaseOpeningError(message);
2020 : : }
2021 : :
2022 [ - + ]: 6764 : if (!basic_open(revision_supplied, revision_)) {
2023 : 0 : ::close(handle);
2024 : 0 : handle = -1;
2025 [ # # ]: 0 : if (revision_supplied) {
2026 : : // The requested revision was not available.
2027 : : // This could be because the database was modified underneath us, or
2028 : : // because a base file is missing. Return false, and work out what
2029 : : // the problem was at a higher level.
2030 : 0 : RETURN(false);
2031 : : }
2032 : 0 : throw Xapian::DatabaseOpeningError("Failed to open table for reading");
2033 : : }
2034 : :
2035 [ + + ]: 15709 : for (int j = 0; j <= level; j++) {
2036 : 8945 : C[j].n = BLK_UNUSED;
2037 : 8945 : C[j].p = new byte[block_size];
2038 [ - + ]: 8945 : if (C[j].p == 0) {
2039 : 0 : throw std::bad_alloc();
2040 : : }
2041 : : }
2042 : :
2043 : 6764 : read_root();
2044 : 10302 : RETURN(true);
2045 : : }
2046 : :
2047 : : void
2048 : 2472 : ChertTable::open()
2049 : : {
2050 : : LOGCALL_VOID(DB, "ChertTable::open", NO_ARGS);
2051 : : LOGLINE(DB, "opening at path " << name);
2052 : 2472 : close();
2053 : :
2054 [ + + ]: 2472 : if (!writable) {
2055 : : // Any errors are thrown if revision_supplied is false
2056 : 2126 : (void)do_open_to_read(false, 0);
2057 : 2122 : return;
2058 : : }
2059 : :
2060 : : // Any errors are thrown if revision_supplied is false.
2061 : 2468 : (void)do_open_to_write(false, 0);
2062 : : }
2063 : :
2064 : : bool
2065 : 9895 : ChertTable::open(chert_revision_number_t revision)
2066 : : {
2067 : : LOGCALL(DB, bool, "ChertTable::open", revision);
2068 : : LOGLINE(DB, "opening for particular revision at path " << name);
2069 : 9895 : close();
2070 : :
2071 [ + + ]: 9895 : if (!writable) {
2072 [ + - ]: 8180 : if (do_open_to_read(true, revision)) {
2073 : : AssertEq(revision_number, revision);
2074 : 8180 : RETURN(true);
2075 : : } else {
2076 : 0 : close();
2077 : 0 : RETURN(false);
2078 : : }
2079 : : }
2080 : :
2081 [ - + ]: 1715 : if (!do_open_to_write(true, revision)) {
2082 : : // Can't open at the requested revision.
2083 : 0 : close();
2084 : 0 : RETURN(false);
2085 : : }
2086 : :
2087 : : AssertEq(revision_number, revision);
2088 : 9895 : RETURN(true);
2089 : : }
2090 : :
2091 : : bool
2092 : 5030 : ChertTable::prev_for_sequential(Cursor * C_, int /*dummy*/) const
2093 : : {
2094 : : LOGCALL(DB, bool, "ChertTable::prev_for_sequential", Literal("C_") | Literal("/*dummy*/"));
2095 : 5030 : int c = C_[0].c;
2096 [ - + ]: 5030 : if (c == DIR_START) {
2097 : 0 : byte * p = C_[0].p;
2098 : : Assert(p);
2099 : 0 : uint4 n = C_[0].n;
2100 : 0 : while (true) {
2101 [ # # ]: 0 : if (n == 0) RETURN(false);
2102 : 0 : n--;
2103 [ # # ]: 0 : if (writable) {
2104 [ # # ]: 0 : if (n == C[0].n) {
2105 : : // Block is a leaf block in the built-in cursor
2106 : : // (potentially in modified form).
2107 : 0 : memcpy(p, C[0].p, block_size);
2108 : : } else {
2109 : : // Blocks in the built-in cursor may not have been written
2110 : : // to disk yet, so we have to check that the block number
2111 : : // isn't in the built-in cursor or we'll read an
2112 : : // uninitialised block (for which GET_LEVEL(p) will
2113 : : // probably return 0).
2114 : : int j;
2115 [ # # ]: 0 : for (j = 1; j <= level; ++j) {
2116 [ # # ]: 0 : if (n == C[j].n) break;
2117 : : }
2118 [ # # ]: 0 : if (j <= level) continue;
2119 : :
2120 : : // Block isn't in the built-in cursor, so the form on disk
2121 : : // is valid, so read it to check if it's the next level 0
2122 : : // block.
2123 : 0 : read_block(n, p);
2124 : : }
2125 : : } else {
2126 : 0 : read_block(n, p);
2127 : : }
2128 : 0 : if (writable) AssertEq(revision_number, latest_revision_number);
2129 [ # # ]: 0 : if (REVISION(p) > revision_number + writable) {
2130 : 0 : set_overwritten();
2131 : : RETURN(false);
2132 : : }
2133 [ # # ]: 0 : if (GET_LEVEL(p) == 0) break;
2134 : : }
2135 : 0 : c = DIR_END(p);
2136 : 0 : C_[0].n = n;
2137 : : }
2138 : 5030 : c -= D2;
2139 : 5030 : C_[0].c = c;
2140 : 5030 : RETURN(true);
2141 : : }
2142 : :
2143 : : bool
2144 : 131293 : ChertTable::next_for_sequential(Cursor * C_, int /*dummy*/) const
2145 : : {
2146 : : LOGCALL(DB, bool, "ChertTable::next_for_sequential", Literal("C_") | Literal("/*dummy*/"));
2147 : 131293 : byte * p = C_[0].p;
2148 : : Assert(p);
2149 : 131293 : int c = C_[0].c;
2150 : 131293 : c += D2;
2151 : : Assert((unsigned)c < block_size);
2152 [ + + ]: 131293 : if (c == DIR_END(p)) {
2153 : 19879 : uint4 n = C_[0].n;
2154 : 19 : while (true) {
2155 : 19898 : n++;
2156 [ + + ]: 19898 : if (n > base.get_last_block()) RETURN(false);
2157 [ + + ]: 14507 : if (writable) {
2158 [ - + ]: 48 : if (n == C[0].n) {
2159 : : // Block is a leaf block in the built-in cursor
2160 : : // (potentially in modified form).
2161 : 0 : memcpy(p, C[0].p, block_size);
2162 : : } else {
2163 : : // Blocks in the built-in cursor may not have been written
2164 : : // to disk yet, so we have to check that the block number
2165 : : // isn't in the built-in cursor or we'll read an
2166 : : // uninitialised block (for which GET_LEVEL(p) will
2167 : : // probably return 0).
2168 : : int j;
2169 [ + + ]: 90 : for (j = 1; j <= level; ++j) {
2170 [ + + ]: 48 : if (n == C[j].n) break;
2171 : : }
2172 [ + + ]: 48 : if (j <= level) continue;
2173 : :
2174 : : // Block isn't in the built-in cursor, so the form on disk
2175 : : // is valid, so read it to check if it's the next level 0
2176 : : // block.
2177 : 42 : read_block(n, p);
2178 : : }
2179 : : } else {
2180 : 14459 : read_block(n, p);
2181 : : }
2182 : 14501 : if (writable) AssertEq(revision_number, latest_revision_number);
2183 [ - + ]: 14501 : if (REVISION(p) > revision_number + writable) {
2184 : 0 : set_overwritten();
2185 : : RETURN(false);
2186 : : }
2187 [ + + ]: 14501 : if (GET_LEVEL(p) == 0) break;
2188 : : }
2189 : 14488 : c = DIR_START;
2190 : 14488 : C_[0].n = n;
2191 : : }
2192 : 125902 : C_[0].c = c;
2193 : 131293 : RETURN(true);
2194 : : }
2195 : :
2196 : : bool
2197 : 4815655 : ChertTable::prev_default(Cursor * C_, int j) const
2198 : : {
2199 : : LOGCALL(DB, bool, "ChertTable::prev_default", Literal("C_") | j);
2200 : 4815655 : byte * p = C_[j].p;
2201 : 4815655 : int c = C_[j].c;
2202 : : Assert(c >= DIR_START);
2203 : : Assert((unsigned)c < block_size);
2204 : : Assert(c <= DIR_END(p));
2205 [ + + ]: 4815655 : if (c == DIR_START) {
2206 [ - + ]: 934204 : if (j == level) RETURN(false);
2207 [ - + ]: 934204 : if (!prev_default(C_, j + 1)) RETURN(false);
2208 : 934204 : c = DIR_END(p);
2209 : : }
2210 : 4815655 : c -= D2;
2211 : 4815655 : C_[j].c = c;
2212 [ + + ]: 4815655 : if (j > 0) {
2213 : 934204 : block_to_cursor(C_, j - 1, Item(p, c).block_given_by());
2214 : : }
2215 : 4815655 : RETURN(true);
2216 : : }
2217 : :
2218 : : bool
2219 : 7803645 : ChertTable::next_default(Cursor * C_, int j) const
2220 : : {
2221 : : LOGCALL(DB, bool, "ChertTable::next_default", Literal("C_") | j);
2222 : 7803645 : byte * p = C_[j].p;
2223 : 7803645 : int c = C_[j].c;
2224 : : Assert(c >= DIR_START);
2225 : 7803645 : c += D2;
2226 : : Assert((unsigned)c < block_size);
2227 : : // Sometimes c can be DIR_END(p) + 2 here it appears...
2228 [ + + ]: 7803645 : if (c >= DIR_END(p)) {
2229 [ + + ]: 1568724 : if (j == level) RETURN(false);
2230 [ + + ]: 1546822 : if (!next_default(C_, j + 1)) RETURN(false);
2231 : 1526329 : c = DIR_START;
2232 : : }
2233 : 7761250 : C_[j].c = c;
2234 [ + + ]: 7761250 : if (j > 0) {
2235 : 1526329 : block_to_cursor(C_, j - 1, Item(p, c).block_given_by());
2236 : : #ifdef BTREE_DEBUG_FULL
2237 : : printf("Block in ChertTable:next_default");
2238 : : report_block_full(j - 1, C_[j - 1].n, C_[j - 1].p);
2239 : : #endif /* BTREE_DEBUG_FULL */
2240 : : }
2241 : 7803645 : RETURN(true);
2242 : : }
2243 : :
2244 : : void
2245 : 13 : ChertTable::throw_database_closed()
2246 : : {
2247 : 13 : throw Xapian::DatabaseError("Database has been closed");
2248 : : }
2249 : :
2250 : : /** Compares this key with key2.
2251 : :
2252 : : The result is true if this key precedes key2. The comparison is for byte
2253 : : sequence collating order, taking lengths into account. So if the keys are
2254 : : made up of lower case ASCII letters we get alphabetical ordering.
2255 : :
2256 : : Now remember that items are added into the B-tree in fastest time
2257 : : when they are preordered by their keys. This is therefore the piece
2258 : : of code that needs to be followed to arrange for the preordering.
2259 : :
2260 : : This is complicated by the fact that keys have two parts - a value
2261 : : and then a count. We first compare the values, and only if they
2262 : : are equal do we compare the counts.
2263 : : */
2264 : :
2265 : 38493910 : bool Key::operator<(Key key2) const
2266 : : {
2267 : : LOGCALL(DB, bool, "Key::operator<", static_cast<const void*>(key2.p));
2268 : 38493910 : int key1_len = length();
2269 : 38493910 : int key2_len = key2.length();
2270 [ + + ]: 38493910 : if (key1_len == key2_len) {
2271 : : // The keys are the same length, so we can compare the counts
2272 : : // in the same operation since they're stored as 2 byte
2273 : : // bigendian numbers.
2274 : 23393635 : RETURN(memcmp(p + K1, key2.p + K1, key1_len + C2) < 0);
2275 : : }
2276 : :
2277 [ + + ]: 15100275 : int k_smaller = (key2_len < key1_len ? key2_len : key1_len);
2278 : :
2279 : : // Compare the common part of the keys
2280 : 15100275 : int diff = memcmp(p + K1, key2.p + K1, k_smaller);
2281 [ + + ]: 15100275 : if (diff != 0) RETURN(diff < 0);
2282 : :
2283 : : // We dealt with the "same length" case above so we never need to check
2284 : : // the count here.
2285 : 38493910 : RETURN(key1_len < key2_len);
2286 : : }
2287 : :
2288 : 4561376 : bool Key::operator==(Key key2) const
2289 : : {
2290 : : LOGCALL(DB, bool, "Key::operator==", static_cast<const void*>(key2.p));
2291 : 4561376 : int key1_len = length();
2292 [ + + ]: 4561376 : if (key1_len != key2.length()) RETURN(false);
2293 : : // The keys are the same length, so we can compare the counts
2294 : : // in the same operation since they're stored as 2 byte
2295 : : // bigendian numbers.
2296 : 4561376 : RETURN(memcmp(p + K1, key2.p + K1, key1_len + C2) == 0);
2297 : : }
|