LCOV - code coverage report
Current view: top level - backends/flint - flint_table.cc (source / functions) Hit Total Coverage
Test: Test Coverage for xapian-core r Lines: 761 915 83.2 %
Date: 2011-08-21 Functions: 54 54 100.0 %
Branches: 385 530 72.6 %

           Branch data     Line data    Source code
       1                 :            : /* flint_table.cc: Btree implementation
       2                 :            :  *
       3                 :            :  * Copyright 1999,2000,2001 BrightStation PLC
       4                 :            :  * Copyright 2002 Ananova Ltd
       5                 :            :  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2011 Olly Betts
       6                 :            :  * Copyright 2008 Lemur Consulting Ltd
       7                 :            :  * Copyright 2010 Richard Boulton
       8                 :            :  *
       9                 :            :  * This program is free software; you can redistribute it and/or
      10                 :            :  * modify it under the terms of the GNU General Public License as
      11                 :            :  * published by the Free Software Foundation; either version 2 of the
      12                 :            :  * License, or (at your option) any later version.
      13                 :            :  *
      14                 :            :  * This program is distributed in the hope that it will be useful,
      15                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      16                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      17                 :            :  * GNU General Public License for more details.
      18                 :            :  *
      19                 :            :  * You should have received a copy of the GNU General Public License
      20                 :            :  * along with this program; if not, write to the Free Software
      21                 :            :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
      22                 :            :  * USA
      23                 :            :  */
      24                 :            : 
      25                 :            : #include <config.h>
      26                 :            : 
      27                 :            : #include "flint_table.h"
      28                 :            : 
      29                 :            : #include <xapian/error.h>
      30                 :            : 
      31                 :            : #include "safeerrno.h"
      32                 :            : #ifdef __WIN32__
      33                 :            : # include "msvc_posix_wrapper.h"
      34                 :            : #endif
      35                 :            : 
      36                 :            : #include "omassert.h"
      37                 :            : #include "stringutils.h" // For STRINGIZE().
      38                 :            : 
      39                 :            : // Define to use "dangerous" mode - in this mode we write modified btree
      40                 :            : // blocks back in place.  This is somewhat faster (especially once we're
      41                 :            : // I/O bound) but the database can't be safely searched during indexing
      42                 :            : // and if indexing is terminated uncleanly, the database may be corrupt.
      43                 :            : //
      44                 :            : // Despite the risks, it's useful for speeding up a full rebuild.
      45                 :            : //
      46                 :            : // FIXME: make this mode run-time selectable, and record that it is currently
      47                 :            : // in use somewhere on disk, so readers can check and refuse to open the
      48                 :            : // database.
      49                 :            : //
      50                 :            : // #define DANGEROUS
      51                 :            : 
      52                 :            : #include <sys/types.h>
      53                 :            : 
      54                 :            : // Trying to include the correct headers with the correct defines set to
      55                 :            : // get pread() and pwrite() prototyped on every platform without breaking any
      56                 :            : // other platform is a real can of worms.  So instead we probe for what
      57                 :            : // prototypes (if any) are required in configure and put them into
      58                 :            : // PREAD_PROTOTYPE and PWRITE_PROTOTYPE.
      59                 :            : #if defined HAVE_PREAD && defined PREAD_PROTOTYPE
      60                 :            : PREAD_PROTOTYPE
      61                 :            : #endif
      62                 :            : #if defined HAVE_PWRITE && defined PWRITE_PROTOTYPE
      63                 :            : PWRITE_PROTOTYPE
      64                 :            : #endif
      65                 :            : 
      66                 :            : #include <cstdio>    /* for rename */
      67                 :            : #include <cstring>   /* for memmove */
      68                 :            : #include <climits>   /* for CHAR_BIT */
      69                 :            : 
      70                 :            : #include "flint_btreebase.h"
      71                 :            : #include "flint_cursor.h"
      72                 :            : #include "flint_utils.h"
      73                 :            : 
      74                 :            : #include "debuglog.h"
      75                 :            : #include "io_utils.h"
      76                 :            : #include "omassert.h"
      77                 :            : #include "str.h"
      78                 :            : #include "unaligned.h"
      79                 :            : #include "utils.h"
      80                 :            : 
      81                 :            : #include <algorithm>  // for std::min()
      82                 :            : #include <string>
      83                 :            : 
      84                 :            : using namespace std;
      85                 :            : 
      86                 :            : // Only try to compress tags longer than this many bytes.
      87                 :            : const size_t COMPRESS_MIN = 4;
      88                 :            : 
      89                 :            : //#define BTREE_DEBUG_FULL 1
      90                 :            : #undef BTREE_DEBUG_FULL
      91                 :            : 
      92                 :            : #ifdef BTREE_DEBUG_FULL
      93                 :            : /*------debugging aids from here--------*/
      94                 :            : 
      95                 :            : static void print_key(const byte * p, int c, int j);
      96                 :            : static void print_tag(const byte * p, int c, int j);
      97                 :            : 
      98                 :            : /*
      99                 :            : static void report_cursor(int N, Btree * B, Cursor_ * C)
     100                 :            : {
     101                 :            :     int i;
     102                 :            :     printf("%d)\n", N);
     103                 :            :     for (i = 0; i <= B->level; i++)
     104                 :            :         printf("p=%d, c=%d, n=[%d], rewrite=%d\n",
     105                 :            :                 C[i].p, C[i].c, C[i].n, C[i].rewrite);
     106                 :            : }
     107                 :            : */
     108                 :            : 
     109                 :            : /*------to here--------*/
     110                 :            : #endif /* BTREE_DEBUG_FULL */
     111                 :            : 
     112                 :      13274 : static inline byte *zeroed_new(size_t size)
     113                 :            : {
     114                 :      13274 :     byte *temp = new byte[size];
     115                 :            :     // No need to check if temp is NULL, new throws std::bad_alloc if
     116                 :            :     // allocation fails.
     117                 :            :     Assert(temp);
     118                 :      13274 :     memset(temp, 0, size);
     119                 :      13274 :     return temp;
     120                 :            : }
     121                 :            : 
     122                 :            : /* A B-tree comprises (a) a base file, containing essential information (Block
     123                 :            :    size, number of the B-tree root block etc), (b) a bitmap, the Nth bit of the
     124                 :            :    bitmap being set if the Nth block of the B-tree file is in use, and (c) a
     125                 :            :    file DB containing the B-tree proper. The DB file is divided into a sequence
     126                 :            :    of equal sized blocks, numbered 0, 1, 2 ... some of which are free, some in
     127                 :            :    use. Those in use are arranged in a tree.
     128                 :            : 
     129                 :            :    Each block, b, has a structure like this:
     130                 :            : 
     131                 :            :      R L M T D o1 o2 o3 ... oN <gap> [item] .. [item] .. [item] ...
     132                 :            :      <---------- D ----------> <-M->
     133                 :            : 
     134                 :            :    And then,
     135                 :            : 
     136                 :            :    R = REVISION(b)  is the revision number the B-tree had when the block was
     137                 :            :                     written into the DB file.
     138                 :            :    L = GET_LEVEL(b) is the level of the block, which is the number of levels
     139                 :            :                     towards the root of the B-tree structure. So leaf blocks
     140                 :            :                     have level 0 and the one root block has the highest level
     141                 :            :                     equal to the number of levels in the B-tree.
     142                 :            :    M = MAX_FREE(b)  is the size of the gap between the end of the directory and
     143                 :            :                     the first item of data. (It is not necessarily the maximum
     144                 :            :                     size among the bits of space that are free, but I can't
     145                 :            :                     think of a better name.)
     146                 :            :    T = TOTAL_FREE(b)is the total amount of free space left in b.
     147                 :            :    D = DIR_END(b)   gives the offset to the end of the directory.
     148                 :            : 
     149                 :            :    o1, o2 ... oN are a directory of offsets to the N items held in the block.
     150                 :            :    The items are key-tag pairs, and as they occur in the directory are ordered
     151                 :            :    by the keys.
     152                 :            : 
     153                 :            :    An item has this form:
     154                 :            : 
     155                 :            :            I K key x C tag
     156                 :            :              <--K-->
     157                 :            :            <------I------>
     158                 :            : 
     159                 :            :    A long tag presented through the API is split up into C tags small enough to
     160                 :            :    be accommodated in the blocks of the B-tree. The key is extended to include
     161                 :            :    a counter, x, which runs from 1 to C. The key is preceded by a length, K,
     162                 :            :    and the whole item with a length, I, as depicted above.
     163                 :            : 
     164                 :            :    Here are the corresponding definitions:
     165                 :            : 
     166                 :            : */
     167                 :            : 
     168                 :            : #define REVISION(b)      static_cast<unsigned int>(getint4(b, 0))
     169                 :            : #define GET_LEVEL(b)     getint1(b, 4)
     170                 :            : #define MAX_FREE(b)      getint2(b, 5)
     171                 :            : #define TOTAL_FREE(b)    getint2(b, 7)
     172                 :            : #define DIR_END(b)       getint2(b, 9)
     173                 :            : #define DIR_START        11
     174                 :            : 
     175                 :            : #define SET_REVISION(b, x)      setint4(b, 0, x)
     176                 :            : #define SET_LEVEL(b, x)         setint1(b, 4, x)
     177                 :            : #define SET_MAX_FREE(b, x)      setint2(b, 5, x)
     178                 :            : #define SET_TOTAL_FREE(b, x)    setint2(b, 7, x)
     179                 :            : #define SET_DIR_END(b, x)       setint2(b, 9, x)
     180                 :            : 
     181                 :            : /** Flip to sequential addition block-splitting after this number of observed
     182                 :            :  *  sequential additions (in negated form). */
     183                 :            : #define SEQ_START_POINT (-10)
     184                 :            : 
     185                 :            : 
     186                 :            : 
     187                 :            : /* There are two bit maps in bit_map0 and bit_map. The nth bit of bitmap is 0
     188                 :            :    if the nth block is free, otherwise 1. bit_map0 is the initial state of
     189                 :            :    bitmap at the start of the current transaction.
     190                 :            : 
     191                 :            :    Note use of the limits.h values:
     192                 :            :    UCHAR_MAX = 255, an unsigned with all bits set, and
     193                 :            :    CHAR_BIT = 8, the number of bits per byte
     194                 :            : 
     195                 :            :    BYTE_PAIR_RANGE below is the smallest +ve number that can't be held in two
     196                 :            :    bytes -- 64K effectively.
     197                 :            : */
     198                 :            : 
     199                 :            : #define BYTE_PAIR_RANGE (1 << 2 * CHAR_BIT)
     200                 :            : 
     201                 :            : /// read_block(n, p) reads block n of the DB file to address p.
     202                 :            : void
     203                 :     810143 : FlintTable::read_block(uint4 n, byte * p) const
     204                 :            : {
     205                 :            :     // Log the value of p, not the contents of the block it points to...
     206                 :            :     LOGCALL_VOID(DB, "FlintTable::read_block", n | (void*)p);
     207                 :            :     /* Use the base bit_map_size not the bitmap's size, because
     208                 :            :      * the latter is uninitialised in readonly mode.
     209                 :            :      */
     210                 :            :     Assert(n / CHAR_BIT < base.get_bit_map_size());
     211                 :            : 
     212                 :            : #ifdef HAVE_PREAD
     213                 :     810143 :     off_t offset = off_t(block_size) * n;
     214                 :     810143 :     int m = block_size;
     215                 :          0 :     while (true) {
     216                 :            :         ssize_t bytes_read = pread(handle, reinterpret_cast<char *>(p), m,
     217                 :     810143 :                                    offset);
     218                 :            :         // normal case - read succeeded, so return.
     219         [ +  - ]:     810143 :         if (bytes_read == m) return;
     220         [ #  # ]:          0 :         if (bytes_read == -1) {
     221         [ #  # ]:          0 :             if (errno == EINTR) continue;
     222                 :          0 :             string message = "Error reading block " + str(n) + ": ";
     223                 :          0 :             message += strerror(errno);
     224                 :          0 :             throw Xapian::DatabaseError(message);
     225         [ #  # ]:          0 :         } else if (bytes_read == 0) {
     226                 :          0 :             string message = "Error reading block " + str(n) + ": got end of file";
     227                 :          0 :             throw Xapian::DatabaseError(message);
     228         [ #  # ]:          0 :         } else if (bytes_read < m) {
     229                 :            :             /* Read part of the block, which is not an error.  We should
     230                 :            :              * continue reading the rest of the block.
     231                 :            :              */
     232                 :          0 :             m -= int(bytes_read);
     233                 :          0 :             p += bytes_read;
     234                 :          0 :             offset += bytes_read;
     235                 :            :         }
     236                 :            :     }
     237                 :            : #else
     238                 :            :     if (lseek(handle, off_t(block_size) * n, SEEK_SET) == -1) {
     239                 :            :         string message = "Error seeking to block: ";
     240                 :            :         message += strerror(errno);
     241                 :            :         throw Xapian::DatabaseError(message);
     242                 :            :     }
     243                 :            : 
     244                 :            :     io_read(handle, reinterpret_cast<char *>(p), block_size, block_size);
     245                 :            : #endif
     246                 :            : }
     247                 :            : 
     248                 :            : /** write_block(n, p) writes block n in the DB file from address p.
     249                 :            :  *  When writing we check to see if the DB file has already been
     250                 :            :  *  modified. If not (so this is the first write) the old base is
     251                 :            :  *  deleted. This prevents the possibility of it being opened
     252                 :            :  *  subsequently as an invalid base.
     253                 :            :  */
     254                 :            : void
     255                 :      67240 : FlintTable::write_block(uint4 n, const byte * p) const
     256                 :            : {
     257                 :            :     LOGCALL_VOID(DB, "FlintTable::write_block", n | p);
     258                 :            :     Assert(writable);
     259                 :            :     /* Check that n is in range. */
     260                 :            :     Assert(n / CHAR_BIT < base.get_bit_map_size());
     261                 :            : 
     262                 :            :     /* don't write to non-free */;
     263                 :            :     AssertParanoid(base.block_free_at_start(n));
     264                 :            : 
     265                 :            :     /* write revision is okay */
     266                 :            :     AssertEqParanoid(REVISION(p), latest_revision_number + 1);
     267                 :            : 
     268         [ +  + ]:      67240 :     if (both_bases) {
     269                 :            :         // Delete the old base before modifying the database.
     270                 :            :         //
     271                 :            :         // If the file is on NFS, then io_unlink() may return false even if
     272                 :            :         // the file was removed, so on balance throwing an exception in this
     273                 :            :         // case is unhelpful, since we wanted the file gone anyway!  The
     274                 :            :         // likely explanation is that somebody moved, deleted, or changed a
     275                 :            :         // symlink to the database directory.
     276                 :        732 :         (void)io_unlink(name + "base" + other_base_letter());
     277                 :        732 :         both_bases = false;
     278                 :        732 :         latest_revision_number = revision_number;
     279                 :            :     }
     280                 :            : 
     281                 :            : #ifdef HAVE_PWRITE
     282                 :      67240 :     off_t offset = off_t(block_size) * n;
     283                 :      67240 :     int m = block_size;
     284                 :          0 :     while (true) {
     285                 :      67240 :         ssize_t bytes_written = pwrite(handle, p, m, offset);
     286         [ +  - ]:      67240 :         if (bytes_written == m) {
     287                 :            :             // normal case - write succeeded, so return.
     288                 :            :             return;
     289         [ #  # ]:          0 :         } else if (bytes_written == -1) {
     290         [ #  # ]:          0 :             if (errno == EINTR) continue;
     291                 :          0 :             string message = "Error writing block: ";
     292                 :          0 :             message += strerror(errno);
     293                 :          0 :             throw Xapian::DatabaseError(message);
     294         [ #  # ]:          0 :         } else if (bytes_written == 0) {
     295                 :          0 :             string message = "Error writing block: wrote no data";
     296                 :          0 :             throw Xapian::DatabaseError(message);
     297         [ #  # ]:          0 :         } else if (bytes_written < m) {
     298                 :            :             /* Wrote part of the block, which is not an error.  We should
     299                 :            :              * continue writing the rest of the block.
     300                 :            :              */
     301                 :          0 :             m -= int(bytes_written);
     302                 :          0 :             p += bytes_written;
     303                 :          0 :             offset += bytes_written;
     304                 :            :         }
     305                 :            :     }
     306                 :            : #else
     307                 :            :     if (lseek(handle, (off_t)block_size * n, SEEK_SET) == -1) {
     308                 :            :         string message = "Error seeking to block: ";
     309                 :            :         message += strerror(errno);
     310                 :            :         throw Xapian::DatabaseError(message);
     311                 :            :     }
     312                 :            : 
     313                 :            :     io_write(handle, reinterpret_cast<const char *>(p), block_size);
     314                 :            : #endif
     315                 :            : }
     316                 :            : 
     317                 :            : 
     318                 :            : /* A note on cursors:
     319                 :            : 
     320                 :            :    Each B-tree level has a corresponding array element C[j] in a
     321                 :            :    cursor, C. C[0] is the leaf (or data) level, and C[B->level] is the
     322                 :            :    root block level. Within a level j,
     323                 :            : 
     324                 :            :        C[j].p  addresses the block
     325                 :            :        C[j].c  is the offset into the directory entry in the block
     326                 :            :        C[j].n  is the number of the block at C[j].p
     327                 :            : 
     328                 :            :    A look up in the B-tree causes navigation of the blocks starting
     329                 :            :    from the root. In each block, p, we find an offset, c, to an item
     330                 :            :    which gives the number, n, of the block for the next level. This
     331                 :            :    leads to an array of values p,c,n which are held inside the cursor.
     332                 :            : 
     333                 :            :    Any Btree object B has a built-in cursor, at B->C. But other cursors may
     334                 :            :    be created.  If BC is a created cursor, BC->C is the cursor in the
     335                 :            :    sense given above, and BC->B is the handle for the B-tree again.
     336                 :            : */
     337                 :            : 
     338                 :            : 
     339                 :            : void
     340                 :          5 : FlintTable::set_overwritten() const
     341                 :            : {
     342                 :            :     LOGCALL_VOID(DB, "FlintTable::set_overwritten", NO_ARGS);
     343                 :            :     // If we're writable, there shouldn't be another writer who could cause
     344                 :            :     // overwritten to be flagged, so that's a DatabaseCorruptError.
     345         [ -  + ]:          5 :     if (writable)
     346                 :          0 :         throw Xapian::DatabaseCorruptError("Db block overwritten - are there multiple writers?");
     347                 :          5 :     throw Xapian::DatabaseModifiedError("The revision being read has been discarded - you should call Xapian::Database::reopen() and retry the operation");
     348                 :            : }
     349                 :            : 
     350                 :            : /* block_to_cursor(C, j, n) puts block n into position C[j] of cursor
     351                 :            :    C, writing the block currently at C[j] back to disk if necessary.
     352                 :            :    Note that
     353                 :            : 
     354                 :            :        C[j].rewrite
     355                 :            : 
     356                 :            :    is true iff C[j].n is different from block n in file DB. If it is
     357                 :            :    false no rewriting is necessary.
     358                 :            : */
     359                 :            : 
     360                 :            : void
     361                 :    3341875 : FlintTable::block_to_cursor(Cursor_ * C_, int j, uint4 n) const
     362                 :            : {
     363                 :            :     LOGCALL_VOID(DB, "FlintTable::block_to_cursor", (void*)C_ | j | n);
     364         [ +  + ]:    3341875 :     if (n == C_[j].n) return;
     365                 :     730205 :     byte * p = C_[j].p;
     366                 :            :     Assert(p);
     367                 :            : 
     368                 :            :     // FIXME: only needs to be done in write mode
     369         [ +  + ]:     730205 :     if (C_[j].rewrite) {
     370                 :            :         Assert(writable);
     371                 :            :         Assert(C == C_);
     372                 :      52465 :         write_block(C_[j].n, p);
     373                 :      52465 :         C_[j].rewrite = false;
     374                 :            :     }
     375                 :            :     // Check if the block is in the built-in cursor (potentially in
     376                 :            :     // modified form).
     377 [ +  + ][ +  + ]:     752439 :     if (writable && n == C[j].n) {
     378         [ +  - ]:      22234 :         if (p != C[j].p)
     379                 :      22234 :             memcpy(p, C[j].p, block_size);
     380                 :            :     } else {
     381                 :     707971 :         read_block(n, p);
     382                 :            :     }
     383                 :            : 
     384                 :     730205 :     C_[j].n = n;
     385         [ +  + ]:     730205 :     if (j < level) {
     386                 :            :         /* unsigned comparison */
     387         [ +  + ]:     720330 :         if (REVISION(p) > REVISION(C_[j + 1].p)) {
     388                 :    3341875 :             set_overwritten();
     389                 :            :             return;
     390                 :            :         }
     391                 :            :     }
     392                 :            :     AssertEq(j, GET_LEVEL(p));
     393                 :            : }
     394                 :            : 
     395                 :            : /** Btree::alter(); is called when the B-tree is to be altered.
     396                 :            : 
     397                 :            :    It causes new blocks to be forced for the current set of blocks in
     398                 :            :    the cursor.
     399                 :            : 
     400                 :            :    The point is that if a block at level 0 is to be altered it may get
     401                 :            :    a new number. Then the pointer to this block from level 1 will need
     402                 :            :    changing. So the block at level 1 needs altering and may get a new
     403                 :            :    block number. Then the pointer to this block from level 2 will need
     404                 :            :    changing ... and so on back to the root.
     405                 :            : 
     406                 :            :    The clever bit here is spotting the cases when we can make an early
     407                 :            :    exit from this process. If C[j].rewrite is true, C[j+k].rewrite
     408                 :            :    will be true for k = 1,2 ... We have been through all this before,
     409                 :            :    and there is no need to do it again. If C[j].n was free at the
     410                 :            :    start of the transaction, we can copy it back to the same place
     411                 :            :    without violating the integrity of the B-tree. We don't then need a
     412                 :            :    new n and can return. The corresponding C[j].rewrite may be true or
     413                 :            :    false in that case.
     414                 :            : */
     415                 :            : 
     416                 :            : void
     417                 :    1038212 : FlintTable::alter()
     418                 :            : {
     419                 :            :     LOGCALL_VOID(DB, "FlintTable::alter", NO_ARGS);
     420                 :            :     Assert(writable);
     421                 :            : #ifdef DANGEROUS
     422                 :            :     C[0].rewrite = true;
     423                 :            : #else
     424                 :    1038212 :     int j = 0;
     425                 :    1038212 :     byte * p = C[j].p;
     426                 :    1039113 :     while (true) {
     427         [ +  + ]:    1039113 :         if (C[j].rewrite) return; /* all new, so return */
     428                 :      55329 :         C[j].rewrite = true;
     429                 :            : 
     430                 :      55329 :         uint4 n = C[j].n;
     431         [ +  + ]:      55329 :         if (base.block_free_at_start(n)) {
     432                 :            :             Assert(REVISION(p) == latest_revision_number + 1);
     433                 :      53682 :             return;
     434                 :            :         }
     435                 :            :         Assert(REVISION(p) < latest_revision_number + 1);
     436                 :       1647 :         base.free_block(n);
     437                 :       1647 :         n = base.next_free_block();
     438                 :       1647 :         C[j].n = n;
     439                 :       1647 :         SET_REVISION(p, latest_revision_number + 1);
     440                 :            : 
     441         [ +  + ]:       1647 :         if (j == level) return;
     442                 :        901 :         j++;
     443                 :        901 :         p = C[j].p;
     444                 :        901 :         Item_wr_(p, C[j].c).set_block_given_by(n);
     445                 :            :     }
     446                 :            : #endif
     447                 :            : }
     448                 :            : 
     449                 :            : /** find_in_block(p, key, leaf, c) searches for the key in the block at p.
     450                 :            : 
     451                 :            :    leaf is true for a data block, and false for an index block (when the
     452                 :            :    first key is dummy and never needs to be tested). What we get is the
     453                 :            :    directory entry to the last key <= the key being searched for.
     454                 :            : 
     455                 :            :    The lookup is by binary chop, with i and j set to the left and
     456                 :            :    right ends of the search area. In sequential addition, c will often
     457                 :            :    be the answer, so we test the keys round c and move i and j towards
     458                 :            :    c if possible.
     459                 :            : */
     460                 :            : 
     461                 :    5711243 : int FlintTable::find_in_block(const byte * p, Key_ key, bool leaf, int c)
     462                 :            : {
     463                 :            :     LOGCALL_STATIC(DB, int, "FlintTable::find_in_block", reinterpret_cast<const void*>(p) | reinterpret_cast<const void*>(key.get_address()) | leaf | c);
     464                 :    5711243 :     int i = DIR_START;
     465         [ +  + ]:    5711243 :     if (leaf) i -= D2;
     466                 :    5711243 :     int j = DIR_END(p);
     467                 :            : 
     468         [ +  + ]:    5711243 :     if (c != -1) {
     469 [ +  + ][ +  + ]:    5328505 :         if (c < j && i < c && Item_(p, c).key() <= key)
         [ +  + ][ +  + ]
     470                 :    4620075 :             i = c;
     471                 :    5328505 :         c += D2;
     472 [ +  + ][ +  + ]:    5328505 :         if (c < j && i < c && key < Item_(p, c).key())
         [ +  + ][ +  + ]
     473                 :    1586594 :             j = c;
     474                 :            :     }
     475                 :            : 
     476         [ +  + ]:   12948022 :     while (j - i > D2) {
     477                 :    7236779 :         int k = i + ((j - i)/(D2 * 2))*D2; /* mid way */
     478         [ +  + ]:    7236779 :         if (key < Item_(p, k).key()) j = k; else i = k;
     479                 :            :     }
     480                 :    5711243 :     RETURN(i);
     481                 :            : }
     482                 :            : 
     483                 :            : /** find(C_) searches for the key of B->kt in the B-tree.
     484                 :            : 
     485                 :            :    Result is true if found, false otherwise.  When false, the B_tree
     486                 :            :    cursor is positioned at the last key in the B-tree <= the search
     487                 :            :    key.  Goes to first (null) item in B-tree when key length == 0.
     488                 :            : */
     489                 :            : 
     490                 :            : bool
     491                 :    2436164 : FlintTable::find(Cursor_ * C_) const
     492                 :            : {
     493                 :            :     LOGCALL(DB, bool, "FlintTable::find", (void*)C_);
     494                 :            :     // Note: the parameter is needed when we're called by FlintCursor
     495                 :            :     const byte * p;
     496                 :            :     int c;
     497                 :    2436164 :     Key_ key = kt.key();
     498         [ +  + ]:    5698756 :     for (int j = level; j > 0; --j) {
     499                 :    3262596 :         p = C_[j].p;
     500                 :    3262596 :         c = find_in_block(p, key, false, C_[j].c);
     501                 :            : #ifdef BTREE_DEBUG_FULL
     502                 :            :         printf("Block in FlintTable:find - code position 1");
     503                 :            :         report_block_full(j, C_[j].n, p);
     504                 :            : #endif /* BTREE_DEBUG_FULL */
     505                 :    3262596 :         C_[j].c = c;
     506                 :    3262596 :         block_to_cursor(C_, j - 1, Item_(p, c).block_given_by());
     507                 :            :     }
     508                 :    2436160 :     p = C_[0].p;
     509                 :    2436160 :     c = find_in_block(p, key, true, C_[0].c);
     510                 :            : #ifdef BTREE_DEBUG_FULL
     511                 :            :     printf("Block in FlintTable:find - code position 2");
     512                 :            :     report_block_full(0, C_[0].n, p);
     513                 :            : #endif /* BTREE_DEBUG_FULL */
     514                 :    2436160 :     C_[0].c = c;
     515         [ +  + ]:    2436160 :     if (c < DIR_START) RETURN(false);
     516                 :    2436160 :     RETURN(Item_(p, c).key() == key);
     517                 :            : }
     518                 :            : 
     519                 :            : /** compact(p) compact the block at p by shuffling all the items up to the end.
     520                 :            : 
     521                 :            :    MAX_FREE(p) is then maximized, and is equal to TOTAL_FREE(p).
     522                 :            : */
     523                 :            : 
     524                 :            : void
     525                 :      26606 : FlintTable::compact(byte * p)
     526                 :            : {
     527                 :            :     LOGCALL_VOID(DB, "FlintTable::compact", (void*)p);
     528                 :            :     Assert(writable);
     529                 :      26606 :     int e = block_size;
     530                 :      26606 :     byte * b = buffer;
     531                 :      26606 :     int dir_end = DIR_END(p);
     532         [ +  + ]:    1092368 :     for (int c = DIR_START; c < dir_end; c += D2) {
     533                 :    1065762 :         Item_ item(p, c);
     534                 :    1065762 :         int l = item.size();
     535                 :    1065762 :         e -= l;
     536                 :    1065762 :         memmove(b + e, item.get_address(), l);
     537                 :    1065762 :         setD(p, c, e);  /* reform in b */
     538                 :            :     }
     539                 :      26606 :     memmove(p + e, b + e, block_size - e);  /* copy back */
     540                 :      26606 :     e -= dir_end;
     541                 :      26606 :     SET_TOTAL_FREE(p, e);
     542                 :      26606 :     SET_MAX_FREE(p, e);
     543                 :      26606 : }
     544                 :            : 
     545                 :            : /** Btree needs to gain a new level to insert more items: so split root block
     546                 :            :  *  and construct a new one.
     547                 :            :  */
     548                 :            : void
     549                 :        225 : FlintTable::split_root(uint4 split_n)
     550                 :            : {
     551                 :            :     LOGCALL_VOID(DB, "FlintTable::split_root", split_n);
     552                 :            :     /* gain a level */
     553                 :        225 :     ++level;
     554                 :            : 
     555                 :            :     /* check level overflow - this isn't something that should ever happen
     556                 :            :      * but deserves more than an Assert()... */
     557         [ -  + ]:        225 :     if (level == BTREE_CURSOR_LEVELS) {
     558                 :          0 :         throw Xapian::DatabaseCorruptError("Btree has grown impossibly large ("STRINGIZE(BTREE_CURSOR_LEVELS)" levels)");
     559                 :            :     }
     560                 :            : 
     561                 :        225 :     byte * q = zeroed_new(block_size);
     562                 :        225 :     C[level].p = q;
     563                 :        225 :     C[level].c = DIR_START;
     564                 :        225 :     C[level].n = base.next_free_block();
     565                 :        225 :     C[level].rewrite = true;
     566                 :        225 :     SET_REVISION(q, latest_revision_number + 1);
     567                 :        225 :     SET_LEVEL(q, level);
     568                 :        225 :     SET_DIR_END(q, DIR_START);
     569                 :        225 :     compact(q);   /* to reset TOTAL_FREE, MAX_FREE */
     570                 :            : 
     571                 :            :     /* form a null key in b with a pointer to the old root */
     572                 :            :     byte b[10]; /* 7 is exact */
     573                 :        225 :     Item_wr_ item(b);
     574                 :        225 :     item.form_null_key(split_n);
     575                 :        225 :     add_item(item, level);
     576                 :        225 : }
     577                 :            : 
     578                 :            : /** enter_key(j, prevkey, newkey) is called after a block split.
     579                 :            : 
     580                 :            :    It enters in the block at level C[j] a separating key for the block
     581                 :            :    at level C[j - 1]. The key itself is newkey. prevkey is the
     582                 :            :    preceding key, and at level 1 newkey can be trimmed down to the
     583                 :            :    first point of difference to prevkey for entry in C[j].
     584                 :            : 
     585                 :            :    This code looks longer than it really is. If j exceeds the number
     586                 :            :    of B-tree levels the root block has split and we have to construct
     587                 :            :    a new one, but this is a rare event.
     588                 :            : 
     589                 :            :    The key is constructed in b, with block number C[j - 1].n as tag,
     590                 :            :    and this is added in with add_item. add_item may itself cause a
     591                 :            :    block split, with a further call to enter_key. Hence the recursion.
     592                 :            : */
     593                 :            : void
     594                 :      12487 : FlintTable::enter_key(int j, Key_ prevkey, Key_ newkey)
     595                 :            : {
     596                 :            :     Assert(writable);
     597                 :            :     Assert(prevkey < newkey);
     598                 :            :     Assert(j >= 1);
     599                 :            : 
     600                 :      12487 :     uint4 blocknumber = C[j - 1].n;
     601                 :            : 
     602                 :            :     // FIXME update to use Key_
     603                 :            :     // Keys are truncated here: but don't truncate the count at the end away.
     604                 :      12487 :     const int newkey_len = newkey.length();
     605                 :            :     int i;
     606                 :            : 
     607         [ +  + ]:      12487 :     if (j == 1) {
     608                 :            :         // Truncate the key to the minimal key which differs from prevkey,
     609                 :            :         // the preceding key in the block.
     610                 :      12391 :         i = 0;
     611                 :      12391 :         const int min_len = min(newkey_len, prevkey.length());
     612 [ +  + ][ +  + ]:      82455 :         while (i < min_len && prevkey[i] == newkey[i]) {
                 [ +  + ]
     613                 :      70064 :             i++;
     614                 :            :         }
     615                 :            : 
     616                 :            :         // Want one byte of difference.
     617         [ +  + ]:      12391 :         if (i < newkey_len) i++;
     618                 :            :     } else {
     619                 :            :         /* Can't truncate between branch levels, since the separated keys
     620                 :            :          * are in at the leaf level, and truncating again will change the
     621                 :            :          * branch point.
     622                 :            :          */
     623                 :         96 :         i = newkey_len;
     624                 :            :     }
     625                 :            : 
     626                 :            :     byte b[UCHAR_MAX + 6];
     627                 :      12487 :     Item_wr_ item(b);
     628                 :            :     Assert(i <= 256 - I2 - C2);
     629                 :            :     Assert(i <= (int)sizeof(b) - I2 - C2 - 4);
     630                 :      12487 :     item.set_key_and_block(newkey, i, blocknumber);
     631                 :            : 
     632                 :            :     // When j > 1 we can make the first key of block p null.  This is probably
     633                 :            :     // worthwhile as it trades a small amount of CPU and RAM use for a small
     634                 :            :     // saving in disk use.  Other redundant keys will still creep in though.
     635         [ +  + ]:      12487 :     if (j > 1) {
     636                 :         96 :         byte * p = C[j - 1].p;
     637                 :         96 :         uint4 n = getint4(newkey.get_address(), newkey_len + K1 + C2);
     638                 :         96 :         int new_total_free = TOTAL_FREE(p) + newkey_len + C2;
     639                 :            :         // FIXME: incredibly icky going from key to item like this...
     640                 :         96 :         Item_wr_(const_cast<byte*>(newkey.get_address()) - I2).form_null_key(n);
     641                 :         96 :         SET_TOTAL_FREE(p, new_total_free);
     642                 :            :     }
     643                 :            : 
     644                 :      12487 :     C[j].c = find_in_block(C[j].p, item.key(), false, 0) + D2;
     645                 :      12487 :     C[j].rewrite = true; /* a subtle point: this *is* required. */
     646                 :      12487 :     add_item(item, j);
     647                 :      12487 : }
     648                 :            : 
     649                 :            : /** mid_point(p) finds the directory entry in c that determines the
     650                 :            :    approximate mid point of the data in the block at p.
     651                 :            :  */
     652                 :            : 
     653                 :            : int
     654                 :       2911 : FlintTable::mid_point(byte * p)
     655                 :            : {
     656                 :       2911 :     int n = 0;
     657                 :       2911 :     int dir_end = DIR_END(p);
     658                 :       2911 :     int size = block_size - TOTAL_FREE(p) - dir_end;
     659         [ +  - ]:      27927 :     for (int c = DIR_START; c < dir_end; c += D2) {
     660                 :      27927 :         int l = Item_(p, c).size();
     661                 :      27927 :         n += 2 * l;
     662         [ +  + ]:      27927 :         if (n >= size) {
     663         [ +  + ]:       2911 :             if (l < n - size) return c;
     664                 :       1651 :             return c + D2;
     665                 :            :         }
     666                 :            :     }
     667                 :            : 
     668                 :            :     /* falling out of mid_point */
     669                 :            :     Assert(false);
     670                 :       2911 :     return 0; /* Stop compiler complaining about end of method. */
     671                 :            : }
     672                 :            : 
     673                 :            : /** add_item_to_block(p, kt_, c) adds item kt_ to the block at p.
     674                 :            : 
     675                 :            :    c is the offset in the directory that needs to be expanded to
     676                 :            :    accommodate the new entry for the item. We know before this is
     677                 :            :    called that there is enough room, so it's just a matter of byte
     678                 :            :    shuffling.
     679                 :            : */
     680                 :            : 
     681                 :            : void
     682                 :     953603 : FlintTable::add_item_to_block(byte * p, Item_wr_ kt_, int c)
     683                 :            : {
     684                 :            :     Assert(writable);
     685                 :     953603 :     int dir_end = DIR_END(p);
     686                 :     953603 :     int kt_len = kt_.size();
     687                 :     953603 :     int needed = kt_len + D2;
     688                 :     953603 :     int new_total = TOTAL_FREE(p) - needed;
     689                 :     953603 :     int new_max = MAX_FREE(p) - needed;
     690                 :            : 
     691                 :            :     Assert(new_total >= 0);
     692                 :            : 
     693         [ +  + ]:     953603 :     if (new_max < 0) {
     694                 :       1407 :         compact(p);
     695                 :       1407 :         new_max = MAX_FREE(p) - needed;
     696                 :            :         Assert(new_max >= 0);
     697                 :            :     }
     698                 :            :     Assert(dir_end >= c);
     699                 :            : 
     700                 :     953603 :     memmove(p + c + D2, p + c, dir_end - c);
     701                 :     953603 :     dir_end += D2;
     702                 :     953603 :     SET_DIR_END(p, dir_end);
     703                 :            : 
     704                 :     953603 :     int o = dir_end + new_max;
     705                 :     953603 :     setD(p, c, o);
     706                 :     953603 :     memmove(p + o, kt_.get_address(), kt_len);
     707                 :            : 
     708                 :     953603 :     SET_MAX_FREE(p, new_max);
     709                 :            : 
     710                 :     953603 :     SET_TOTAL_FREE(p, new_total);
     711                 :     953603 : }
     712                 :            : 
     713                 :            : /** FlintTable::add_item(kt_, j) adds item kt_ to the block at cursor level C[j].
     714                 :            :  *
     715                 :            :  *  If there is not enough room the block splits and the item is then
     716                 :            :  *  added to the appropriate half.
     717                 :            :  */
     718                 :            : void
     719                 :     953603 : FlintTable::add_item(Item_wr_ kt_, int j)
     720                 :            : {
     721                 :            :     Assert(writable);
     722                 :     953603 :     byte * p = C[j].p;
     723                 :     953603 :     int c = C[j].c;
     724                 :            :     uint4 n;
     725                 :            : 
     726                 :     953603 :     int needed = kt_.size() + D2;
     727         [ +  + ]:     953603 :     if (TOTAL_FREE(p) < needed) {
     728                 :            :         int m;
     729                 :            :         // Prepare to split p. After splitting, the block is in two halves, the
     730                 :            :         // lower half is split_p, the upper half p again. add_to_upper_half
     731                 :            :         // becomes true when the item gets added to p, false when it gets added
     732                 :            :         // to split_p.
     733                 :            : 
     734         [ +  + ]:      12487 :         if (seq_count < 0) {
     735                 :            :             // If we're not in sequential mode, we split at the mid point
     736                 :            :             // of the node.
     737                 :       2911 :             m = mid_point(p);
     738                 :            :         } else {
     739                 :            :             // During sequential addition, split at the insert point
     740                 :       9576 :             m = c;
     741                 :            :         }
     742                 :            : 
     743                 :      12487 :         uint4 split_n = C[j].n;
     744                 :      12487 :         C[j].n = base.next_free_block();
     745                 :            : 
     746                 :      12487 :         memcpy(split_p, p, block_size);  // replicate the whole block in split_p
     747                 :      12487 :         SET_DIR_END(split_p, m);
     748                 :      12487 :         compact(split_p);      /* to reset TOTAL_FREE, MAX_FREE */
     749                 :            : 
     750                 :            :         {
     751                 :      12487 :             int residue = DIR_END(p) - m;
     752                 :      12487 :             int new_dir_end = DIR_START + residue;
     753                 :      12487 :             memmove(p + DIR_START, p + m, residue);
     754                 :      12487 :             SET_DIR_END(p, new_dir_end);
     755                 :            :         }
     756                 :            : 
     757                 :      12487 :         compact(p);      /* to reset TOTAL_FREE, MAX_FREE */
     758                 :            : 
     759                 :            :         bool add_to_upper_half;
     760         [ +  + ]:      12487 :         if (seq_count < 0) {
     761                 :       2911 :             add_to_upper_half = (c >= m);
     762                 :            :         } else {
     763                 :            :             // And add item to lower half if split_p has room, otherwise upper
     764                 :            :             // half
     765                 :       9576 :             add_to_upper_half = (TOTAL_FREE(split_p) < needed);
     766                 :            :         }
     767                 :            : 
     768         [ +  + ]:      12487 :         if (add_to_upper_half) {
     769                 :      12472 :             c -= (m - DIR_START);
     770                 :            :             Assert(seq_count < 0 || c <= DIR_START + D2);
     771                 :            :             Assert(c >= DIR_START);
     772                 :            :             Assert(c <= DIR_END(p));
     773                 :      12472 :             add_item_to_block(p, kt_, c);
     774                 :      12472 :             n = C[j].n;
     775                 :            :         } else {
     776                 :            :             Assert(c >= DIR_START);
     777                 :            :             Assert(c <= DIR_END(split_p));
     778                 :         15 :             add_item_to_block(split_p, kt_, c);
     779                 :         15 :             n = split_n;
     780                 :            :         }
     781                 :      12487 :         write_block(split_n, split_p);
     782                 :            : 
     783                 :            :         // Check if we're splitting the root block.
     784         [ +  + ]:      12487 :         if (j == level) split_root(split_n);
     785                 :            : 
     786                 :            :         /* Enter a separating key at level j + 1 between */
     787                 :            :         /* the last key of block split_p, and the first key of block p */
     788                 :            :         enter_key(j + 1,
     789                 :            :                   Item_(split_p, DIR_END(split_p) - D2).key(),
     790                 :      12487 :                   Item_(p, DIR_START).key());
     791                 :            :     } else {
     792                 :     941116 :         add_item_to_block(p, kt_, c);
     793                 :     941116 :         n = C[j].n;
     794                 :            :     }
     795         [ +  + ]:     953603 :     if (j == 0) {
     796                 :     940891 :         changed_n = n;
     797                 :     940891 :         changed_c = c;
     798                 :            :     }
     799                 :     953603 : }
     800                 :            : 
     801                 :            : /** FlintTable::delete_item(j, repeatedly) is (almost) the converse of add_item.
     802                 :            :  *
     803                 :            :  * If repeatedly is true, the process repeats at the next level when a
     804                 :            :  * block has been completely emptied, freeing the block and taking out
     805                 :            :  * the pointer to it.  Emptied root blocks are also removed, which
     806                 :            :  * reduces the number of levels in the B-tree.
     807                 :            :  */
     808                 :            : void
     809                 :      58530 : FlintTable::delete_item(int j, bool repeatedly)
     810                 :            : {
     811                 :            :     Assert(writable);
     812                 :      58530 :     byte * p = C[j].p;
     813                 :      58530 :     int c = C[j].c;
     814                 :      58530 :     int kt_len = Item_(p, c).size(); /* size of the item to be deleted */
     815                 :      58530 :     int dir_end = DIR_END(p) - D2;   /* directory length will go down by 2 bytes */
     816                 :            : 
     817                 :      58530 :     memmove(p + c, p + c + D2, dir_end - c);
     818                 :      58530 :     SET_DIR_END(p, dir_end);
     819                 :      58530 :     SET_MAX_FREE(p, MAX_FREE(p) + D2);
     820                 :      58530 :     SET_TOTAL_FREE(p, TOTAL_FREE(p) + kt_len + D2);
     821                 :            : 
     822         [ +  + ]:      58530 :     if (!repeatedly) return;
     823         [ +  + ]:      57772 :     if (j < level) {
     824         [ +  + ]:      56702 :         if (dir_end == DIR_START) {
     825                 :        466 :             base.free_block(C[j].n);
     826                 :        466 :             C[j].rewrite = false;
     827                 :        466 :             C[j].n = BLK_UNUSED;
     828                 :        466 :             C[j + 1].rewrite = true;  /* *is* necessary */
     829                 :        466 :             delete_item(j + 1, true);
     830                 :            :         }
     831                 :            :     } else {
     832                 :            :         Assert(j == level);
     833 [ +  + ][ +  + ]:      58549 :         while (dir_end == DIR_START + D2 && level > 0) {
                 [ +  + ]
     834                 :            :             /* single item in the root block, so lose a level */
     835                 :         19 :             uint4 new_root = Item_(p, DIR_START).block_given_by();
     836         [ +  - ]:         19 :             delete [] p;
     837                 :         19 :             C[level].p = 0;
     838                 :         19 :             base.free_block(C[level].n);
     839                 :         19 :             C[level].rewrite = false;
     840                 :         19 :             C[level].n = BLK_UNUSED;
     841                 :         19 :             level--;
     842                 :            : 
     843                 :         19 :             block_to_cursor(C, level, new_root);
     844                 :            : 
     845                 :         19 :             p = C[level].p;
     846                 :         19 :             dir_end = DIR_END(p); /* prepare for the loop */
     847                 :            :         }
     848                 :            :     }
     849                 :            : }
     850                 :            : 
     851                 :            : /* debugging aid:
     852                 :            : static addcount = 0;
     853                 :            : */
     854                 :            : 
     855                 :            : /** add_kt(found) adds the item (key-tag pair) at B->kt into the
     856                 :            :    B-tree, using cursor C.
     857                 :            : 
     858                 :            :    found == find() is handed over as a parameter from Btree::add.
     859                 :            :    Btree::alter() prepares for the alteration to the B-tree. Then
     860                 :            :    there are a number of cases to consider:
     861                 :            : 
     862                 :            :      If an item with the same key is in the B-tree (found is true),
     863                 :            :      the new kt replaces it.
     864                 :            : 
     865                 :            :      If then kt is smaller, or the same size as, the item it replaces,
     866                 :            :      kt is put in the same place as the item it replaces, and the
     867                 :            :      TOTAL_FREE measure is reduced.
     868                 :            : 
     869                 :            :      If kt is larger than the item it replaces it is put in the
     870                 :            :      MAX_FREE space if there is room, and the directory entry and
     871                 :            :      space counts are adjusted accordingly.
     872                 :            : 
     873                 :            :      - But if there is not room we do it the long way: the old item is
     874                 :            :      deleted with delete_item and kt is added in with add_item.
     875                 :            : 
     876                 :            :      If the key of kt is not in the B-tree (found is false), the new
     877                 :            :      kt is added in with add_item.
     878                 :            : */
     879                 :            : 
     880                 :            : int
     881                 :     980906 : FlintTable::add_kt(bool found)
     882                 :            : {
     883                 :            :     Assert(writable);
     884                 :     980906 :     int components = 0;
     885                 :            : 
     886                 :            :     /*
     887                 :            :     {
     888                 :            :         printf("%d) %s ", addcount++, (found ? "replacing" : "adding"));
     889                 :            :         print_bytes(kt[I2] - K1 - C2, kt + I2 + K1); putchar('\n');
     890                 :            :     }
     891                 :            :     */
     892                 :     980906 :     alter();
     893                 :            : 
     894         [ +  + ]:     980906 :     if (found) { /* replacement */
     895                 :      40773 :         seq_count = SEQ_START_POINT;
     896                 :      40773 :         sequential = false;
     897                 :            : 
     898                 :      40773 :         byte * p = C[0].p;
     899                 :      40773 :         int c = C[0].c;
     900                 :      40773 :         Item_ item(p, c);
     901                 :      40773 :         int kt_size = kt.size();
     902                 :      40773 :         int needed = kt_size - item.size();
     903                 :            : 
     904                 :      40773 :         components = Item_(p, c).components_of();
     905                 :            : 
     906         [ +  + ]:      40773 :         if (needed <= 0) {
     907                 :            :             /* simple replacement */
     908                 :            :             memmove(const_cast<byte *>(item.get_address()),
     909                 :      33719 :                     kt.get_address(), kt_size);
     910                 :      33719 :             SET_TOTAL_FREE(p, TOTAL_FREE(p) - needed);
     911                 :            :         } else {
     912                 :            :             /* new item into the block's freespace */
     913                 :       7054 :             int new_max = MAX_FREE(p) - kt_size;
     914         [ +  + ]:       7054 :             if (new_max >= 0) {
     915                 :       6296 :                 int o = DIR_END(p) + new_max;
     916                 :       6296 :                 memmove(p + o, kt.get_address(), kt_size);
     917                 :       6296 :                 setD(p, c, o);
     918                 :       6296 :                 SET_MAX_FREE(p, new_max);
     919                 :       6296 :                 SET_TOTAL_FREE(p, TOTAL_FREE(p) - needed);
     920                 :            :             } else {
     921                 :            :                 /* do it the long way */
     922                 :        758 :                 delete_item(0, false);
     923                 :        758 :                 add_item(kt, 0);
     924                 :            :             }
     925                 :            :         }
     926                 :            :     } else {
     927                 :            :         /* addition */
     928 [ +  + ][ +  + ]:    1870485 :         if (changed_n == C[0].n && changed_c == C[0].c) {
     929         [ +  + ]:     930352 :             if (seq_count < 0) seq_count++;
     930                 :            :         } else {
     931                 :       9781 :             seq_count = SEQ_START_POINT;
     932                 :       9781 :             sequential = false;
     933                 :            :         }
     934                 :     940133 :         C[0].c += D2;
     935                 :     940133 :         add_item(kt, 0);
     936                 :            :     }
     937                 :     980906 :     return components;
     938                 :            : }
     939                 :            : 
     940                 :            : /* delete_kt() corresponds to add_kt(found), but there are only
     941                 :            :    two cases: if the key is not found nothing is done, and if it is
     942                 :            :    found the corresponding item is deleted with delete_item.
     943                 :            : */
     944                 :            : 
     945                 :            : int
     946                 :      57419 : FlintTable::delete_kt()
     947                 :            : {
     948                 :            :     Assert(writable);
     949                 :            : 
     950                 :      57419 :     bool found = find(C);
     951                 :            : 
     952                 :      57419 :     int components = 0;
     953                 :      57419 :     seq_count = SEQ_START_POINT;
     954                 :      57419 :     sequential = false;
     955                 :            : 
     956                 :            :     /*
     957                 :            :     {
     958                 :            :         printf("%d) %s ", addcount++, (found ? "deleting " : "ignoring "));
     959                 :            :         print_bytes(B->kt[I2] - K1 - C2, B->kt + I2 + K1); putchar('\n');
     960                 :            :     }
     961                 :            :     */
     962         [ +  + ]:      57419 :     if (found) {
     963                 :      57306 :         components = Item_(C[0].p, C[0].c).components_of();
     964                 :      57306 :         alter();
     965                 :      57306 :         delete_item(0, true);
     966                 :            :     }
     967                 :      57419 :     return components;
     968                 :            : }
     969                 :            : 
     970                 :            : /* FlintTable::form_key(key) treats address kt as an item holder and fills in
     971                 :            : the key part:
     972                 :            : 
     973                 :            :            (I) K key c (C tag)
     974                 :            : 
     975                 :            : The bracketed parts are left blank. The key is filled in with key_len bytes and
     976                 :            : K set accordingly. c is set to 1.
     977                 :            : */
     978                 :            : 
     979                 :    2428533 : void FlintTable::form_key(const string & key) const
     980                 :            : {
     981                 :    2428533 :     kt.form_key(key);
     982                 :    2428525 : }
     983                 :            : 
     984                 :            : /* FlintTable::add(key, tag) adds the key/tag item to the
     985                 :            :    B-tree, replacing any existing item with the same key.
     986                 :            : 
     987                 :            :    For a long tag, we end up having to add m components, of the form
     988                 :            : 
     989                 :            :        key 1 m tag1
     990                 :            :        key 2 m tag2
     991                 :            :        ...
     992                 :            :        key m m tagm
     993                 :            : 
     994                 :            :    and tag1+tag2+...+tagm are equal to tag. These in their turn may be replacing
     995                 :            :    n components of the form
     996                 :            : 
     997                 :            :        key 1 n TAG1
     998                 :            :        key 2 n TAG2
     999                 :            :        ...
    1000                 :            :        key n n TAGn
    1001                 :            : 
    1002                 :            :    and n may be greater than, equal to, or less than m. These cases are dealt
    1003                 :            :    with in the code below. If m < n for example, we end up with a series of
    1004                 :            :    deletions.
    1005                 :            : */
    1006                 :            : 
    1007                 :            : void
    1008                 :     973401 : FlintTable::add(const string &key, string tag, bool already_compressed)
    1009                 :            : {
    1010                 :            :     LOGCALL_VOID(DB, "FlintTable::add", key | tag);
    1011                 :            :     Assert(writable);
    1012                 :            : 
    1013         [ +  + ]:     973401 :     if (handle < 0) create_and_open(block_size);
    1014                 :            : 
    1015                 :     973401 :     form_key(key);
    1016                 :            : 
    1017                 :     973393 :     bool compressed = false;
    1018         [ +  + ]:     973393 :     if (already_compressed) {
    1019                 :        113 :         compressed = true;
    1020 [ +  + ][ +  + ]:     973280 :     } else if (compress_strategy != DONT_COMPRESS && tag.size() > COMPRESS_MIN) {
                 [ +  + ]
    1021                 :            :         CompileTimeAssert(DONT_COMPRESS != Z_DEFAULT_STRATEGY);
    1022                 :            :         CompileTimeAssert(DONT_COMPRESS != Z_FILTERED);
    1023                 :            :         CompileTimeAssert(DONT_COMPRESS != Z_HUFFMAN_ONLY);
    1024                 :            : #ifdef Z_RLE
    1025                 :            :         CompileTimeAssert(DONT_COMPRESS != Z_RLE);
    1026                 :            : #endif
    1027                 :            : 
    1028                 :      51529 :         lazy_alloc_deflate_zstream();
    1029                 :            : 
    1030                 :            :         // zlib takes a non-const pointer to the input, but doesn't modify it.
    1031                 :      51529 :         char * non_const_tag = const_cast<char *>(tag.data());
    1032                 :      51529 :         deflate_zstream->next_in = reinterpret_cast<Byte *>(non_const_tag);
    1033                 :      51529 :         deflate_zstream->avail_in = uInt(tag.size());
    1034                 :            : 
    1035                 :            :         // If compressed size is >= tag.size(), we don't want to compress.
    1036                 :      51529 :         unsigned long blk_len = tag.size() - 1;
    1037                 :      51529 :         unsigned char * blk = new unsigned char[blk_len];
    1038                 :      51529 :         deflate_zstream->next_out = blk;
    1039                 :      51529 :         deflate_zstream->avail_out = uInt(blk_len);
    1040                 :            : 
    1041                 :      51529 :         int err = deflate(deflate_zstream, Z_FINISH);
    1042         [ +  + ]:      51529 :         if (err == Z_STREAM_END) {
    1043                 :            :             // If deflate succeeded, then the output was at least one byte
    1044                 :            :             // smaller than the input.
    1045                 :       9352 :             tag.assign(reinterpret_cast<const char *>(blk), deflate_zstream->total_out);
    1046                 :       9352 :             compressed = true;
    1047                 :            :         } else {
    1048                 :            :             // Deflate failed - presumably the data wasn't compressible.
    1049                 :            :         }
    1050                 :            : 
    1051         [ +  - ]:      51529 :         delete [] blk;
    1052                 :            :     }
    1053                 :            : 
    1054                 :            :     // sort of matching kt.append_chunk(), but setting the chunk
    1055                 :     973393 :     const size_t cd = kt.key().length() + K1 + I2 + C2 + C2;  // offset to the tag data
    1056                 :     973393 :     const size_t L = max_item_size - cd; // largest amount of tag data for any chunk
    1057                 :     973393 :     size_t first_L = L;                  // - amount for tag1
    1058                 :     973393 :     bool found = find(C);
    1059         [ +  + ]:     973393 :     if (!found) {
    1060                 :     932860 :         byte * p = C[0].p;
    1061                 :     932860 :         size_t n = TOTAL_FREE(p) % (max_item_size + D2);
    1062         [ +  + ]:     932860 :         if (n > D2 + cd) {
    1063                 :     904574 :             n -= (D2 + cd);
    1064                 :            :             // if n >= last then fully filling this block won't produce
    1065                 :            :             // an extra item, so we might as well do this even if
    1066                 :            :             // full_compaction isn't active.
    1067                 :            :             //
    1068                 :            :             // In the full_compaction case, it turns out we shouldn't always
    1069                 :            :             // try to fill every last byte.  Doing so can actually increase the
    1070                 :            :             // total space required (I believe this effect is due to longer
    1071                 :            :             // dividing keys being required in the index blocks).  Empirically,
    1072                 :            :             // n >= key.size() + K appears a good criterion for K ~= 34.  This
    1073                 :            :             // seems to save about 0.2% in total database size over always
    1074                 :            :             // splitting the tag.  It'll also give be slightly faster retrieval
    1075                 :            :             // as we can avoid reading an extra block occasionally.
    1076                 :     904574 :             size_t last = tag.length() % L;
    1077   [ +  +  +  + ]:     904574 :             if (n >= last || (full_compaction && n >= key.size() + 34))
         [ +  + ][ +  + ]
    1078                 :     895414 :                 first_L = n;
    1079                 :            :         }
    1080                 :            :     }
    1081                 :            : 
    1082                 :            :     // a null tag must be added in of course
    1083         [ +  + ]:     973393 :     int m = tag.empty() ? 1 : (tag.length() - first_L + L - 1) / L + 1;
    1084                 :            :                                       // there are m items to add
    1085                 :            :     /* FIXME: sort out this error higher up and turn this into
    1086                 :            :      * an assert.
    1087                 :            :      */
    1088         [ -  + ]:     973393 :     if (m >= BYTE_PAIR_RANGE)
    1089                 :          0 :         throw Xapian::UnimplementedError("Can't handle insanely large tags");
    1090                 :            : 
    1091                 :     973393 :     int n = 0; // initialise to shut off warning
    1092                 :            :                                       // - and there will be n to delete
    1093                 :     973393 :     int o = 0;                        // Offset into the tag
    1094                 :     973393 :     size_t residue = tag.length();    // Bytes of the tag remaining to add in
    1095                 :     973393 :     int replacement = false;          // Has there been a replacement ?
    1096                 :            :     int i;
    1097                 :     973393 :     kt.set_components_of(m);
    1098         [ +  + ]:    1954299 :     for (i = 1; i <= m; i++) {
    1099 [ +  + ][ +  + ]:     980906 :         size_t l = (i == m ? residue : (i == 1 ? first_L : L));
    1100                 :            :         Assert(cd + l <= block_size);
    1101                 :            :         Assert(string::size_type(o + l) <= tag.length());
    1102                 :     980906 :         kt.set_tag(cd, tag.data() + o, l, compressed);
    1103                 :     980906 :         kt.set_component_of(i);
    1104                 :            : 
    1105                 :     980906 :         o += l;
    1106                 :     980906 :         residue -= l;
    1107                 :            : 
    1108         [ +  + ]:     980906 :         if (i > 1) found = find(C);
    1109                 :     980906 :         n = add_kt(found);
    1110         [ +  + ]:     980906 :         if (n > 0) replacement = true;
    1111                 :            :     }
    1112                 :            :     /* o == tag.length() here, and n may be zero */
    1113         [ -  + ]:     973393 :     for (i = m + 1; i <= n; i++) {
    1114                 :          0 :         kt.set_component_of(i);
    1115                 :          0 :         delete_kt();
    1116                 :            :     }
    1117         [ +  + ]:     973393 :     if (!replacement) ++item_count;
    1118                 :     973393 :     Btree_modified = true;
    1119         [ +  + ]:     973393 :     if (cursor_created_since_last_modification) {
    1120                 :      21619 :         cursor_created_since_last_modification = false;
    1121                 :      21619 :         ++cursor_version;
    1122                 :            :     }
    1123                 :     973393 : }
    1124                 :            : 
    1125                 :            : /* FlintTable::del(key) returns false if the key is not in the B-tree,
    1126                 :            :    otherwise deletes it and returns true.
    1127                 :            : 
    1128                 :            :    Again, this is parallel to FlintTable::add, but simpler in form.
    1129                 :            : */
    1130                 :            : 
    1131                 :            : bool
    1132                 :      61736 : FlintTable::del(const string &key)
    1133                 :            : {
    1134                 :            :     LOGCALL(DB, bool, "FlintTable::del", key);
    1135                 :            :     Assert(writable);
    1136                 :            : 
    1137         [ +  + ]:      61736 :     if (handle < 0) {
    1138         [ -  + ]:       4443 :         if (handle == -2) {
    1139                 :          0 :             FlintTable::throw_database_closed();
    1140                 :            :         }
    1141                 :       4443 :         RETURN(false);
    1142                 :            :     }
    1143                 :            : 
    1144                 :            :     // We can't delete a key which we is too long for us to store.
    1145         [ -  + ]:      57293 :     if (key.size() > FLINT_BTREE_MAX_KEY_LEN) RETURN(false);
    1146                 :            : 
    1147         [ -  + ]:      57293 :     if (key.empty()) RETURN(false);
    1148                 :      57293 :     form_key(key);
    1149                 :            : 
    1150                 :      57293 :     int n = delete_kt();  /* there are n items to delete */
    1151         [ +  + ]:      57293 :     if (n <= 0) RETURN(false);
    1152                 :            : 
    1153         [ +  + ]:      57306 :     for (int i = 2; i <= n; i++) {
    1154                 :        126 :         kt.set_component_of(i);
    1155                 :        126 :         delete_kt();
    1156                 :            :     }
    1157                 :            : 
    1158                 :      57180 :     item_count--;
    1159                 :      57180 :     Btree_modified = true;
    1160         [ +  + ]:      57180 :     if (cursor_created_since_last_modification) {
    1161                 :         49 :         cursor_created_since_last_modification = false;
    1162                 :         49 :         ++cursor_version;
    1163                 :            :     }
    1164                 :      61736 :     RETURN(true);
    1165                 :            : }
    1166                 :            : 
    1167                 :            : bool
    1168                 :    1280996 : FlintTable::get_exact_entry(const string &key, string & tag) const
    1169                 :            : {
    1170                 :            :     LOGCALL(DB, bool, "FlintTable::get_exact_entry", key | tag);
    1171                 :            :     Assert(!key.empty());
    1172                 :            : 
    1173         [ +  + ]:    1280996 :     if (handle < 0) {
    1174         [ +  + ]:      50964 :         if (handle == -2) {
    1175                 :          8 :             FlintTable::throw_database_closed();
    1176                 :            :         }
    1177                 :      50956 :         RETURN(false);
    1178                 :            :     }
    1179                 :            : 
    1180                 :            :     // An oversized key can't exist, so attempting to search for it should fail.
    1181         [ +  + ]:    1230032 :     if (key.size() > FLINT_BTREE_MAX_KEY_LEN) RETURN(false);
    1182                 :            : 
    1183                 :    1230000 :     form_key(key);
    1184         [ +  + ]:    1230000 :     if (!find(C)) RETURN(false);
    1185                 :            : 
    1186                 :    1108737 :     (void)read_tag(C, &tag, false);
    1187                 :    1280986 :     RETURN(true);
    1188                 :            : }
    1189                 :            : 
    1190                 :            : bool
    1191                 :        657 : FlintTable::key_exists(const string &key) const
    1192                 :            : {
    1193                 :            :     LOGCALL(DB, bool, "FlintTable::key_exists", key);
    1194                 :            :     Assert(!key.empty());
    1195                 :            : 
    1196                 :            :     // An oversized key can't exist, so attempting to search for it should fail.
    1197         [ -  + ]:        657 :     if (key.size() > FLINT_BTREE_MAX_KEY_LEN) RETURN(false);
    1198                 :            : 
    1199                 :        657 :     form_key(key);
    1200                 :        657 :     RETURN(find(C));
    1201                 :            : }
    1202                 :            : 
    1203                 :            : bool
    1204                 :    1284142 : FlintTable::read_tag(Cursor_ * C_, string *tag, bool keep_compressed) const
    1205                 :            : {
    1206                 :    1284142 :     Item_ item(C_[0].p, C_[0].c);
    1207                 :            : 
    1208                 :            :     /* n components to join */
    1209                 :    1284142 :     int n = item.components_of();
    1210                 :            : 
    1211                 :    1284142 :     tag->resize(0);
    1212                 :            :     // max_item_size also includes K1 + I2 + C2 + C2 bytes overhead and the key
    1213                 :            :     // (which is at least 1 byte long).
    1214         [ +  + ]:    1284142 :     if (n > 1) tag->reserve((max_item_size - (1 + K1 + I2 + C2 + C2)) * n);
    1215                 :            : 
    1216                 :    1284142 :     item.append_chunk(tag);
    1217                 :    1284142 :     bool compressed = item.get_compressed();
    1218                 :            : 
    1219         [ +  + ]:    1808255 :     for (int i = 2; i <= n; i++) {
    1220         [ -  + ]:     524114 :         if (!next(C_, 0)) {
    1221                 :          0 :             throw Xapian::DatabaseCorruptError("Unexpected end of table when reading continuation of tag");
    1222                 :            :         }
    1223                 :     524113 :         (void)Item_(C_[0].p, C_[0].c).append_chunk(tag);
    1224                 :            :     }
    1225                 :            :     // At this point the cursor is on the last item - calling next will move
    1226                 :            :     // it to the next key (FlintCursor::get_tag() relies on this).
    1227 [ +  + ][ +  + ]:    1284141 :     if (!compressed || keep_compressed) return compressed;
    1228                 :            : 
    1229                 :            :     // FIXME: Perhaps we should we decompress each chunk as we read it so we
    1230                 :            :     // don't need both the full compressed and uncompressed tags in memory
    1231                 :            :     // at once.
    1232                 :            : 
    1233                 :     216758 :     string utag;
    1234                 :            :     // May not be enough for a compressed tag, but it's a reasonable guess.
    1235                 :     216758 :     utag.reserve(tag->size() + tag->size() / 2);
    1236                 :            : 
    1237                 :            :     Bytef buf[8192];
    1238                 :            : 
    1239                 :     216758 :     lazy_alloc_inflate_zstream();
    1240                 :            : 
    1241                 :            :     // zlib takes a non-const pointer to the input, but doesn't modify it.
    1242                 :     216758 :     char * non_const_tag = const_cast<char *>(tag->data());
    1243                 :     216758 :     inflate_zstream->next_in = reinterpret_cast<Byte *>(non_const_tag);
    1244                 :     216758 :     inflate_zstream->avail_in = uInt(tag->size());
    1245                 :            : 
    1246                 :     216758 :     int err = Z_OK;
    1247         [ +  + ]:     433516 :     while (err != Z_STREAM_END) {
    1248                 :     216758 :         inflate_zstream->next_out = buf;
    1249                 :     216758 :         inflate_zstream->avail_out = uInt(sizeof(buf));
    1250                 :     216758 :         err = inflate(inflate_zstream, Z_SYNC_FLUSH);
    1251   [ -  +  #  # ]:     216758 :         if (err == Z_BUF_ERROR && inflate_zstream->avail_in == 0) {
    1252                 :            :             LOGLINE(DB, "Z_BUF_ERROR - faking checksum of " << inflate_zstream->adler);
    1253                 :            :             Bytef header2[4];
    1254                 :          0 :             setint4(header2, 0, inflate_zstream->adler);
    1255                 :          0 :             inflate_zstream->next_in = header2;
    1256                 :          0 :             inflate_zstream->avail_in = 4;
    1257                 :          0 :             err = inflate(inflate_zstream, Z_SYNC_FLUSH);
    1258         [ #  # ]:          0 :             if (err == Z_STREAM_END) break;
    1259                 :            :         }
    1260                 :            : 
    1261 [ +  - ][ -  + ]:     216758 :         if (err != Z_OK && err != Z_STREAM_END) {
    1262         [ #  # ]:          0 :             if (err == Z_MEM_ERROR) throw std::bad_alloc();
    1263                 :          0 :             string msg = "inflate failed";
    1264         [ #  # ]:          0 :             if (inflate_zstream->msg) {
    1265                 :          0 :                 msg += " (";
    1266                 :          0 :                 msg += inflate_zstream->msg;
    1267                 :          0 :                 msg += ')';
    1268                 :            :             }
    1269                 :          0 :             throw Xapian::DatabaseError(msg);
    1270                 :            :         }
    1271                 :            : 
    1272                 :            :         utag.append(reinterpret_cast<const char *>(buf),
    1273                 :     216758 :                     inflate_zstream->next_out - buf);
    1274                 :            :     }
    1275         [ -  + ]:     216758 :     if (utag.size() != inflate_zstream->total_out) {
    1276                 :          0 :         string msg = "compressed tag didn't expand to the expected size: ";
    1277                 :          0 :         msg += str(utag.size());
    1278                 :          0 :         msg += " != ";
    1279                 :            :         // OpenBSD's zlib.h uses off_t instead of uLong for total_out.
    1280                 :          0 :         msg += str(size_t(inflate_zstream->total_out));
    1281                 :          0 :         throw Xapian::DatabaseCorruptError(msg);
    1282                 :            :     }
    1283                 :            : 
    1284                 :     216758 :     swap(*tag, utag);
    1285                 :            : 
    1286                 :    1284141 :     return false;
    1287                 :            : }
    1288                 :            : 
    1289                 :            : void
    1290                 :         51 : FlintTable::set_full_compaction(bool parity)
    1291                 :            : {
    1292                 :            :     Assert(writable);
    1293                 :            : 
    1294         [ +  - ]:         51 :     if (parity) seq_count = 0;
    1295                 :         51 :     full_compaction = parity;
    1296                 :         51 : }
    1297                 :            : 
    1298                 :     166211 : FlintCursor * FlintTable::cursor_get() const {
    1299         [ +  + ]:     166211 :     if (handle < 0) {
    1300         [ +  + ]:          7 :         if (handle == -2) {
    1301                 :          5 :             FlintTable::throw_database_closed();
    1302                 :            :         }
    1303                 :          2 :         return NULL;
    1304                 :            :     }
    1305                 :            :     // FIXME Ick - casting away const is nasty
    1306                 :     166206 :     return new FlintCursor(const_cast<FlintTable *>(this));
    1307                 :            : }
    1308                 :            : 
    1309                 :            : /************ B-tree opening and closing ************/
    1310                 :            : 
    1311                 :            : bool
    1312                 :      10447 : FlintTable::basic_open(bool revision_supplied, flint_revision_number_t revision_)
    1313                 :            : {
    1314                 :      10447 :     char ch = 'X'; /* will be 'A' or 'B' */
    1315                 :            : 
    1316                 :            :     {
    1317                 :      10447 :         const size_t BTREE_BASES = 2;
    1318                 :      10447 :         string err_msg;
    1319                 :            :         static const char basenames[BTREE_BASES] = { 'A', 'B' };
    1320                 :            : 
    1321 [ +  + ][ #  # ]:      31341 :         FlintTable_base bases[BTREE_BASES];
                 [ #  # ]
    1322                 :            :         bool base_ok[BTREE_BASES];
    1323                 :            : 
    1324                 :      10447 :         both_bases = true;
    1325                 :      10447 :         bool valid_base = false;
    1326                 :            :         {
    1327         [ +  + ]:      31341 :             for (size_t i = 0; i < BTREE_BASES; ++i) {
    1328                 :      20894 :                 bool ok = bases[i].read(name, basenames[i], writable, err_msg);
    1329                 :      20894 :                 base_ok[i] = ok;
    1330         [ +  + ]:      20894 :                 if (ok) {
    1331                 :      18226 :                     valid_base = true;
    1332                 :            :                 } else {
    1333                 :       2668 :                     both_bases = false;
    1334                 :            :                 }
    1335                 :            :             }
    1336                 :            :         }
    1337                 :            : 
    1338         [ -  + ]:      10447 :         if (!valid_base) {
    1339         [ #  # ]:          0 :             if (handle >= 0) {
    1340                 :          0 :                 ::close(handle);
    1341                 :          0 :                 handle = -1;
    1342                 :            :             }
    1343                 :          0 :             string message = "Error opening table `";
    1344                 :          0 :             message += name;
    1345                 :          0 :             message += "':\n";
    1346                 :          0 :             message += err_msg;
    1347                 :          0 :             throw Xapian::DatabaseOpeningError(message);
    1348                 :            :         }
    1349                 :            : 
    1350         [ +  + ]:      10447 :         if (revision_supplied) {
    1351                 :       6660 :             bool found_revision = false;
    1352         [ +  - ]:      12330 :             for (size_t i = 0; i < BTREE_BASES; ++i) {
    1353 [ +  + ][ +  + ]:      12330 :                 if (base_ok[i] && bases[i].get_revision() == revision_) {
                 [ +  + ]
    1354                 :       6660 :                     ch = basenames[i];
    1355                 :       6660 :                     found_revision = true;
    1356                 :       6660 :                     break;
    1357                 :            :                 }
    1358                 :            :             }
    1359         [ -  + ]:       6660 :             if (!found_revision) {
    1360                 :            :                 /* Couldn't open the revision that was asked for.
    1361                 :            :                  * This shouldn't throw an exception, but should just return
    1362                 :            :                  * false to upper levels.
    1363                 :            :                  */
    1364                 :          0 :                 return false;
    1365                 :            :             }
    1366                 :            :         } else {
    1367                 :       3787 :             flint_revision_number_t highest_revision = 0;
    1368         [ +  + ]:      11361 :             for (size_t i = 0; i < BTREE_BASES; ++i) {
    1369 [ +  + ][ +  + ]:       7574 :                 if (base_ok[i] && bases[i].get_revision() >= highest_revision) {
                 [ +  + ]
    1370                 :       5698 :                     ch = basenames[i];
    1371                 :       5698 :                     highest_revision = bases[i].get_revision();
    1372                 :            :                 }
    1373                 :            :             }
    1374                 :            :         }
    1375                 :            : 
    1376                 :      10447 :         FlintTable_base *basep = 0;
    1377                 :      10447 :         FlintTable_base *other_base = 0;
    1378                 :            : 
    1379         [ +  - ]:      18028 :         for (size_t i = 0; i < BTREE_BASES; ++i) {
    1380                 :            :             LOGLINE(DB, "Checking (ch == " << ch << ") against "
    1381                 :            :                     "basenames[" << i << "] == " << basenames[i]);
    1382                 :            :             LOGLINE(DB, "bases[" << i << "].get_revision() == " <<
    1383                 :            :                     bases[i].get_revision());
    1384                 :            :             LOGLINE(DB, "base_ok[" << i << "] == " << base_ok[i]);
    1385         [ +  + ]:      18028 :             if (ch == basenames[i]) {
    1386                 :      10447 :                 basep = &bases[i];
    1387                 :            : 
    1388                 :            :                 // FIXME: assuming only two bases for other_base
    1389                 :      10447 :                 size_t otherbase_num = 1-i;
    1390         [ +  + ]:      10447 :                 if (base_ok[otherbase_num]) {
    1391                 :       7779 :                     other_base = &bases[otherbase_num];
    1392                 :            :                 }
    1393                 :      10447 :                 break;
    1394                 :            :             }
    1395                 :            :         }
    1396                 :            :         Assert(basep);
    1397                 :            : 
    1398                 :            :         /* basep now points to the most recent base block */
    1399                 :            : 
    1400                 :            :         /* Avoid copying the bitmap etc. - swap contents with the base
    1401                 :            :          * object in the vector, since it'll be destroyed anyway soon.
    1402                 :            :          */
    1403                 :      10447 :         base.swap(*basep);
    1404                 :            : 
    1405                 :      10447 :         revision_number =  base.get_revision();
    1406                 :      10447 :         block_size =       base.get_block_size();
    1407                 :      10447 :         root =             base.get_root();
    1408                 :      10447 :         level =            base.get_level();
    1409                 :            :         //bit_map_size =     basep->get_bit_map_size();
    1410                 :      10447 :         item_count =       base.get_item_count();
    1411                 :      10447 :         faked_root_block = base.get_have_fakeroot();
    1412                 :      10447 :         sequential =       base.get_sequential();
    1413                 :            : 
    1414         [ +  + ]:      10447 :         if (other_base != 0) {
    1415                 :       7779 :             latest_revision_number = other_base->get_revision();
    1416         [ +  - ]:       7779 :             if (revision_number > latest_revision_number)
    1417                 :       7779 :                 latest_revision_number = revision_number;
    1418                 :            :         } else {
    1419                 :      10447 :             latest_revision_number = revision_number;
    1420 [ #  # ][ +  + ]:      31341 :         }
         [ -  + ][ +  - ]
    1421                 :            :     }
    1422                 :            : 
    1423                 :            :     /* kt holds constructed items as well as keys */
    1424                 :      10447 :     kt = Item_wr_(zeroed_new(block_size));
    1425                 :            : 
    1426                 :      10447 :     set_max_item_size(BLOCK_CAPACITY);
    1427                 :            : 
    1428                 :      10447 :     base_letter = ch;
    1429                 :            : 
    1430                 :            :     /* ready to open the main file */
    1431                 :            : 
    1432                 :      10447 :     return true;
    1433                 :            : }
    1434                 :            : 
    1435                 :            : void
    1436                 :      13189 : FlintTable::read_root()
    1437                 :            : {
    1438         [ +  + ]:      13189 :     if (faked_root_block) {
    1439                 :            :         /* root block for an unmodified database. */
    1440                 :       3333 :         byte * p = C[0].p;
    1441                 :            :         Assert(p);
    1442                 :            : 
    1443                 :            :         /* clear block - shouldn't be necessary, but is a bit nicer,
    1444                 :            :          * and means that the same operations should always produce
    1445                 :            :          * the same database. */
    1446                 :       3333 :         memset(p, 0, block_size);
    1447                 :            : 
    1448                 :       3333 :         int o = block_size - I2 - K1 - C2 - C2;
    1449                 :       3333 :         Item_wr_(p + o).fake_root_item();
    1450                 :            : 
    1451                 :       3333 :         setD(p, DIR_START, o);         // its directory entry
    1452                 :       3333 :         SET_DIR_END(p, DIR_START + D2);// the directory size
    1453                 :            : 
    1454                 :       3333 :         o -= (DIR_START + D2);
    1455                 :       3333 :         SET_MAX_FREE(p, o);
    1456                 :       3333 :         SET_TOTAL_FREE(p, o);
    1457                 :       3333 :         SET_LEVEL(p, 0);
    1458                 :            : 
    1459         [ +  + ]:       3333 :         if (!writable) {
    1460                 :            :             /* reading - revision number doesn't matter as long as
    1461                 :            :              * it's not greater than the current one. */
    1462                 :        572 :             SET_REVISION(p, 0);
    1463                 :        572 :             C[0].n = 0;
    1464                 :            :         } else {
    1465                 :            :             /* writing - */
    1466                 :       2761 :             SET_REVISION(p, latest_revision_number + 1);
    1467                 :       2761 :             C[0].n = base.next_free_block();
    1468                 :            :         }
    1469                 :            :     } else {
    1470                 :            :         /* using a root block stored on disk */
    1471                 :       9856 :         block_to_cursor(C, level, root);
    1472                 :            : 
    1473         [ -  + ]:       9856 :         if (REVISION(C[level].p) > revision_number) set_overwritten();
    1474                 :            :         /* although this is unlikely */
    1475                 :            :     }
    1476                 :      13189 : }
    1477                 :            : 
    1478                 :            : bool
    1479                 :       3756 : FlintTable::do_open_to_write(bool revision_supplied,
    1480                 :            :                              flint_revision_number_t revision_,
    1481                 :            :                              bool create_db)
    1482                 :            : {
    1483         [ -  + ]:       3756 :     if (handle == -2) {
    1484                 :          0 :         FlintTable::throw_database_closed();
    1485                 :            :     }
    1486                 :       3756 :     int flags = O_RDWR | O_BINARY;
    1487         [ +  + ]:       3756 :     if (create_db) flags |= O_CREAT | O_TRUNC;
    1488                 :       3756 :     handle = ::open((name + "DB").c_str(), flags, 0666);
    1489         [ +  + ]:       3756 :     if (handle < 0) {
    1490                 :            :         // lazy doesn't make a lot of sense with create_db anyway, but ENOENT
    1491                 :            :         // with O_CREAT means a parent directory doesn't exist.
    1492 [ +  - ][ +  - ]:       1154 :         if (lazy && !create_db && errno == ENOENT) {
                 [ +  - ]
    1493                 :       1154 :             revision_number = revision_;
    1494                 :       1154 :             return true;
    1495                 :            :         }
    1496         [ #  # ]:          0 :         string message(create_db ? "Couldn't create " : "Couldn't open ");
    1497                 :          0 :         message += name;
    1498                 :          0 :         message += "DB read/write: ";
    1499                 :          0 :         message += strerror(errno);
    1500                 :          0 :         throw Xapian::DatabaseOpeningError(message);
    1501                 :            :     }
    1502                 :            : 
    1503         [ -  + ]:       2602 :     if (!basic_open(revision_supplied, revision_)) {
    1504                 :          0 :         ::close(handle);
    1505                 :          0 :         handle = -1;
    1506         [ #  # ]:          0 :         if (!revision_supplied) {
    1507                 :          0 :             throw Xapian::DatabaseOpeningError("Failed to open for writing");
    1508                 :            :         }
    1509                 :            :         /* When the revision is supplied, it's not an exceptional
    1510                 :            :          * case when open failed, so we just return false here.
    1511                 :            :          */
    1512                 :          0 :         return false;
    1513                 :            :     }
    1514                 :            : 
    1515                 :       2602 :     writable = true;
    1516                 :            : 
    1517         [ +  + ]:       5292 :     for (int j = 0; j <= level; j++) {
    1518                 :       2690 :         C[j].n = BLK_UNUSED;
    1519                 :       2690 :         C[j].p = new byte[block_size];
    1520         [ -  + ]:       2690 :         if (C[j].p == 0) {
    1521                 :          0 :             throw std::bad_alloc();
    1522                 :            :         }
    1523                 :            :     }
    1524                 :       2602 :     split_p = new byte[block_size];
    1525         [ -  + ]:       2602 :     if (split_p == 0) {
    1526                 :          0 :         throw std::bad_alloc();
    1527                 :            :     }
    1528                 :       2602 :     read_root();
    1529                 :            : 
    1530                 :       2602 :     buffer = zeroed_new(block_size);
    1531                 :            : 
    1532                 :       2602 :     changed_n = 0;
    1533                 :       2602 :     changed_c = DIR_START;
    1534                 :       2602 :     seq_count = SEQ_START_POINT;
    1535                 :            : 
    1536                 :       3756 :     return true;
    1537                 :            : }
    1538                 :            : 
    1539                 :      15543 : FlintTable::FlintTable(const char * tablename_, const string & path_,
    1540                 :            :                        bool readonly_, int compress_strategy_, bool lazy_)
    1541                 :            :         : tablename(tablename_),
    1542                 :            :           revision_number(0),
    1543                 :            :           item_count(0),
    1544                 :            :           block_size(0),
    1545                 :            :           latest_revision_number(0),
    1546                 :            :           both_bases(false),
    1547                 :            :           base_letter('A'),
    1548                 :            :           faked_root_block(true),
    1549                 :            :           sequential(true),
    1550                 :            :           handle(-1),
    1551                 :            :           level(0),
    1552                 :            :           root(0),
    1553                 :            :           kt(0),
    1554                 :            :           buffer(0),
    1555                 :            :           base(),
    1556                 :            :           name(path_),
    1557                 :            :           seq_count(0),
    1558                 :            :           changed_n(0),
    1559                 :            :           changed_c(0),
    1560                 :            :           max_item_size(0),
    1561                 :            :           Btree_modified(false),
    1562                 :            :           full_compaction(false),
    1563                 :            :           writable(!readonly_),
    1564                 :            :           cursor_created_since_last_modification(false),
    1565                 :            :           cursor_version(0),
    1566                 :            :           split_p(0),
    1567                 :            :           compress_strategy(compress_strategy_),
    1568                 :            :           deflate_zstream(NULL),
    1569                 :            :           inflate_zstream(NULL),
    1570 [ +  + ][ +  + ]:     170973 :           lazy(lazy_)
    1571                 :            : {
    1572                 :            :     LOGCALL_VOID(DB, "FlintTable::FlintTable", tablename_ | path_ | readonly_ | compress_strategy_ | lazy_);
    1573                 :      15543 : }
    1574                 :            : 
    1575                 :            : bool
    1576                 :        886 : FlintTable::really_empty() const
    1577                 :            : {
    1578         [ +  + ]:        886 :     if (handle < 0) {
    1579         [ -  + ]:        400 :         if (handle == -2) {
    1580                 :          0 :             FlintTable::throw_database_closed();
    1581                 :            :         }
    1582                 :        400 :         return true;
    1583                 :            :     }
    1584                 :        486 :     FlintCursor cur(const_cast<FlintTable*>(this));
    1585                 :        486 :     cur.find_entry(string());
    1586                 :        886 :     return !cur.next();
    1587                 :            : }
    1588                 :            : 
    1589                 :            : void
    1590                 :      51529 : FlintTable::lazy_alloc_deflate_zstream() const {
    1591         [ +  + ]:      51529 :     if (usual(deflate_zstream)) {
    1592         [ +  - ]:      51175 :         if (usual(deflateReset(deflate_zstream) == Z_OK)) return;
    1593                 :            :         // Try to recover by deleting the stream and starting from scratch.
    1594                 :          0 :         delete deflate_zstream;
    1595                 :            :     }
    1596                 :            : 
    1597                 :        354 :     deflate_zstream = new z_stream;
    1598                 :            : 
    1599                 :        354 :     deflate_zstream->zalloc = Z_NULL;
    1600                 :        354 :     deflate_zstream->zfree = Z_NULL;
    1601                 :        354 :     deflate_zstream->opaque = Z_NULL;
    1602                 :            : 
    1603                 :            :     // -15 means raw deflate with 32K LZ77 window (largest)
    1604                 :            :     // memLevel 9 is the highest (8 is default)
    1605                 :            :     int err;
    1606                 :            :     err = deflateInit2(deflate_zstream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
    1607                 :        354 :                        -15, 9, compress_strategy);
    1608         [ -  + ]:        354 :     if (rare(err != Z_OK)) {
    1609         [ #  # ]:          0 :         if (err == Z_MEM_ERROR) {
    1610                 :          0 :             delete deflate_zstream;
    1611                 :          0 :             deflate_zstream = 0;
    1612                 :          0 :             throw std::bad_alloc();
    1613                 :            :         }
    1614                 :          0 :         string msg = "deflateInit2 failed (";
    1615         [ #  # ]:          0 :         if (deflate_zstream->msg) {
    1616                 :          0 :             msg += deflate_zstream->msg;
    1617                 :            :         } else {
    1618                 :          0 :             msg += str(err);
    1619                 :            :         }
    1620                 :          0 :         msg += ')';
    1621                 :          0 :         delete deflate_zstream;
    1622                 :          0 :         deflate_zstream = 0;
    1623                 :      51529 :         throw Xapian::DatabaseError(msg);
    1624                 :            :     }
    1625                 :            : }
    1626                 :            : 
    1627                 :            : void
    1628                 :     216758 : FlintTable::lazy_alloc_inflate_zstream() const {
    1629         [ +  + ]:     216758 :     if (usual(inflate_zstream)) {
    1630         [ +  - ]:     216564 :         if (usual(inflateReset(inflate_zstream) == Z_OK)) return;
    1631                 :            :         // Try to recover by deleting the stream and starting from scratch.
    1632                 :          0 :         delete inflate_zstream;
    1633                 :            :     }
    1634                 :            : 
    1635                 :        194 :     inflate_zstream = new z_stream;
    1636                 :            : 
    1637                 :        194 :     inflate_zstream->zalloc = Z_NULL;
    1638                 :        194 :     inflate_zstream->zfree = Z_NULL;
    1639                 :        194 :     inflate_zstream->opaque = Z_NULL;
    1640                 :            : 
    1641                 :        194 :     inflate_zstream->next_in = Z_NULL;
    1642                 :        194 :     inflate_zstream->avail_in = 0;
    1643                 :            : 
    1644                 :        194 :     int err = inflateInit2(inflate_zstream, -15);
    1645         [ -  + ]:        194 :     if (rare(err != Z_OK)) {
    1646         [ #  # ]:          0 :         if (err == Z_MEM_ERROR) {
    1647                 :          0 :             delete inflate_zstream;
    1648                 :          0 :             inflate_zstream = 0;
    1649                 :          0 :             throw std::bad_alloc();
    1650                 :            :         }
    1651                 :          0 :         string msg = "inflateInit2 failed (";
    1652         [ #  # ]:          0 :         if (inflate_zstream->msg) {
    1653                 :          0 :             msg += inflate_zstream->msg;
    1654                 :            :         } else {
    1655                 :          0 :             msg += str(err);
    1656                 :            :         }
    1657                 :          0 :         msg += ')';
    1658                 :          0 :         delete inflate_zstream;
    1659                 :          0 :         inflate_zstream = 0;
    1660                 :     216758 :         throw Xapian::DatabaseError(msg);
    1661                 :            :     }
    1662                 :            : }
    1663                 :            : 
    1664                 :            : bool
    1665                 :        735 : FlintTable::exists() const {
    1666                 :            :     LOGCALL(DB, bool, "FlintTable::exists", NO_ARGS);
    1667                 :            :     return (file_exists(name + "DB") &&
    1668 [ +  + ][ -  + ]:        735 :             (file_exists(name + "baseA") || file_exists(name + "baseB")));
         [ #  # ][ -  + ]
         [ #  # ][ #  # ]
         [ +  + ][ #  # ]
                 [ +  - ]
    1669                 :            : }
    1670                 :            : 
    1671                 :            : void
    1672                 :       1238 : FlintTable::erase()
    1673                 :            : {
    1674                 :            :     LOGCALL_VOID(DB, "FlintTable::erase", NO_ARGS);
    1675                 :       1238 :     close();
    1676                 :            : 
    1677                 :       1238 :     (void)io_unlink(name + "baseA");
    1678                 :       1238 :     (void)io_unlink(name + "baseB");
    1679                 :       1238 :     (void)io_unlink(name + "DB");
    1680                 :       1238 : }
    1681                 :            : 
    1682                 :            : void
    1683                 :      10195 : FlintTable::set_block_size(unsigned int block_size_)
    1684                 :            : {
    1685                 :            :     LOGCALL_VOID(DB, "FlintTable::set_block_size", block_size_);
    1686                 :            :     // Block size must in the range 2048..BYTE_PAIR_RANGE, and a power of two.
    1687 [ +  - ][ +  - ]:      10195 :     if (block_size_ < 2048 || block_size_ > BYTE_PAIR_RANGE ||
                 [ -  + ]
    1688                 :            :         (block_size_ & (block_size_ - 1)) != 0) {
    1689                 :          0 :         block_size_ = FLINT_DEFAULT_BLOCK_SIZE;
    1690                 :            :     }
    1691                 :      10195 :     block_size = block_size_;
    1692                 :      10195 : }
    1693                 :            : 
    1694                 :            : void
    1695                 :       1393 : FlintTable::create_and_open(unsigned int block_size_)
    1696                 :            : {
    1697                 :            :     LOGCALL_VOID(DB, "FlintTable::create_and_open", block_size_);
    1698         [ -  + ]:       1393 :     if (handle == -2) {
    1699                 :          0 :         FlintTable::throw_database_closed();
    1700                 :            :     }
    1701                 :            :     Assert(writable);
    1702                 :       1393 :     close();
    1703                 :            : 
    1704         [ -  + ]:       1393 :     if (block_size_ == 0) abort();
    1705                 :       1393 :     set_block_size(block_size_);
    1706                 :            : 
    1707                 :            :     // FIXME: it would be good to arrange that this works such that there's
    1708                 :            :     // always a valid table in place if you run create_and_open() on an
    1709                 :            :     // existing table.
    1710                 :            : 
    1711                 :            :     /* write initial values to files */
    1712                 :            : 
    1713                 :            :     /* create the base file */
    1714                 :       1393 :     FlintTable_base base_;
    1715                 :       1393 :     base_.set_revision(revision_number);
    1716                 :       1393 :     base_.set_block_size(block_size_);
    1717                 :       1393 :     base_.set_have_fakeroot(true);
    1718                 :       1393 :     base_.set_sequential(true);
    1719                 :       1393 :     base_.write_to_file(name + "baseA", 'A', "", -1, NULL);
    1720                 :            : 
    1721                 :            :     /* remove the alternative base file, if any */
    1722                 :       1393 :     (void)io_unlink(name + "baseB");
    1723                 :            : 
    1724                 :            :     // Any errors are thrown if revision_supplied is false.
    1725                 :       1393 :     (void)do_open_to_write(false, 0, true);
    1726                 :       1393 : }
    1727                 :            : 
    1728                 :      15543 : FlintTable::~FlintTable() {
    1729                 :            :     LOGCALL_VOID(DB, "FlintTable::~FlintTable", NO_ARGS);
    1730                 :      15543 :     FlintTable::close();
    1731                 :            : 
    1732   [ +  +  +  + ]:      15543 :     if (deflate_zstream) {
    1733                 :            :         // Errors which we care about have already been handled, so just ignore
    1734                 :            :         // any which get returned here.
    1735                 :        354 :         (void) deflateEnd(deflate_zstream);
    1736                 :        354 :         delete deflate_zstream;
    1737                 :            :     }
    1738                 :            : 
    1739 [ -  + ][ +  + ]:      15543 :     if (inflate_zstream) {
    1740                 :            :         // Errors which we care about have already been handled, so just ignore
    1741                 :            :         // any which get returned here.
    1742                 :        194 :         (void) inflateEnd(inflate_zstream);
    1743                 :        194 :         delete inflate_zstream;
    1744                 :            :     }
    1745                 :      15543 : }
    1746                 :            : 
    1747                 :      32025 : void FlintTable::close(bool permanent) {
    1748                 :            :     LOGCALL_VOID(DB, "FlintTable::close", NO_ARGS);
    1749                 :            : 
    1750         [ +  + ]:      32025 :     if (handle >= 0) {
    1751                 :            :         // If an error occurs here, we just ignore it, since we're just
    1752                 :            :         // trying to free everything.
    1753                 :      10447 :         (void)::close(handle);
    1754                 :      10447 :         handle = -1;
    1755                 :            :     }
    1756                 :            : 
    1757         [ +  + ]:      32025 :     if (permanent) {
    1758                 :        105 :         handle = -2;
    1759                 :            :         // Don't delete the resources in the table, since they may
    1760                 :            :         // still be used to look up cached content.
    1761                 :        105 :         return;
    1762                 :            :     }
    1763                 :            : 
    1764         [ +  + ]:      66820 :     for (int j = level; j >= 0; j--) {
    1765         [ +  + ]:      34900 :         delete [] C[j].p;
    1766                 :      34900 :         C[j].p = 0;
    1767                 :            :     }
    1768         [ +  + ]:      31920 :     delete [] split_p;
    1769                 :      31920 :     split_p = 0;
    1770                 :            : 
    1771         [ +  + ]:      31920 :     delete [] kt.get_address();
    1772                 :      31920 :     kt = 0;
    1773         [ +  + ]:      31920 :     delete [] buffer;
    1774                 :      32025 :     buffer = 0;
    1775                 :            : }
    1776                 :            : 
    1777                 :            : void
    1778                 :       3271 : FlintTable::flush_db()
    1779                 :            : {
    1780                 :            :     LOGCALL_VOID(DB, "FlintTable::flush_db", NO_ARGS);
    1781                 :            :     Assert(writable);
    1782         [ +  + ]:       3271 :     if (handle < 0) {
    1783         [ -  + ]:       1138 :         if (handle == -2) {
    1784                 :          0 :             FlintTable::throw_database_closed();
    1785                 :            :         }
    1786                 :       1138 :         return;
    1787                 :            :     }
    1788                 :            : 
    1789         [ +  + ]:       4659 :     for (int j = level; j >= 0; j--) {
    1790         [ +  + ]:       2526 :         if (C[j].rewrite) {
    1791                 :       2288 :             write_block(C[j].n, C[j].p);
    1792                 :            :         }
    1793                 :            :     }
    1794                 :            : 
    1795         [ +  + ]:       2133 :     if (Btree_modified) {
    1796                 :       3271 :         faked_root_block = false;
    1797                 :            :     }
    1798                 :            : }
    1799                 :            : 
    1800                 :            : void
    1801                 :       3271 : FlintTable::commit(flint_revision_number_t revision, int changes_fd,
    1802                 :            :                    const string * changes_tail)
    1803                 :            : {
    1804                 :            :     LOGCALL_VOID(DB, "FlintTable::commit", revision | changes_fd | changes_tail);
    1805                 :            :     Assert(writable);
    1806                 :            : 
    1807         [ -  + ]:       3271 :     if (revision <= revision_number) {
    1808                 :          0 :         throw Xapian::DatabaseError("New revision too low");
    1809                 :            :     }
    1810                 :            : 
    1811         [ +  + ]:       3271 :     if (handle < 0) {
    1812         [ -  + ]:       1138 :         if (handle == -2) {
    1813                 :          0 :             FlintTable::throw_database_closed();
    1814                 :            :         }
    1815                 :       1138 :         latest_revision_number = revision_number = revision;
    1816                 :       1138 :         return;
    1817                 :            :     }
    1818                 :            : 
    1819                 :            :     try {
    1820         [ +  + ]:       2133 :         if (faked_root_block) {
    1821                 :            :             /* We will use a dummy bitmap. */
    1822                 :        102 :             base.clear_bit_map();
    1823                 :            :         }
    1824                 :            : 
    1825                 :       2133 :         base.set_revision(revision);
    1826                 :       2133 :         base.set_root(C[level].n);
    1827                 :       2133 :         base.set_level(level);
    1828                 :       2133 :         base.set_item_count(item_count);
    1829                 :       2133 :         base.set_have_fakeroot(faked_root_block);
    1830                 :       2133 :         base.set_sequential(sequential);
    1831                 :            : 
    1832                 :       2133 :         base_letter = other_base_letter();
    1833                 :            : 
    1834                 :       2133 :         both_bases = true;
    1835                 :       2133 :         latest_revision_number = revision_number = revision;
    1836                 :       2133 :         root = C[level].n;
    1837                 :            : 
    1838                 :       2133 :         Btree_modified = false;
    1839                 :            : 
    1840         [ +  + ]:      23463 :         for (int i = 0; i < BTREE_CURSOR_LEVELS; ++i) {
    1841                 :      21330 :             C[i].n = BLK_UNUSED;
    1842                 :      21330 :             C[i].c = -1;
    1843                 :      21330 :             C[i].rewrite = false;
    1844                 :            :         }
    1845                 :            : 
    1846                 :            :         // Do this as late as possible to allow maximum time for writes to be
    1847                 :            :         // committed.
    1848         [ -  + ]:       2133 :         if (!io_sync(handle)) {
    1849                 :          0 :             (void)::close(handle);
    1850                 :          0 :             handle = -1;
    1851                 :          0 :             throw Xapian::DatabaseError("Can't commit new revision - failed to flush DB to disk");
    1852                 :            :         }
    1853                 :            : 
    1854                 :            :         // Save to "<table>.tmp" and then rename to "<table>.base<letter>" so
    1855                 :            :         // that a reader can't try to read a partially written base file.
    1856                 :       2133 :         string tmp = name;
    1857                 :       2133 :         tmp += "tmp";
    1858                 :       2133 :         string basefile = name;
    1859                 :       2133 :         basefile += "base";
    1860                 :       2133 :         basefile += char(base_letter);
    1861                 :       2133 :         base.write_to_file(tmp, base_letter, tablename, changes_fd, changes_tail);
    1862                 :            : #if defined __WIN32__
    1863                 :            :         if (msvc_posix_rename(tmp.c_str(), basefile.c_str()) < 0)
    1864                 :            : #else
    1865         [ -  + ]:       2133 :         if (rename(tmp.c_str(), basefile.c_str()) < 0)
    1866                 :            : #endif
    1867                 :            :         {
    1868                 :            :             // With NFS, rename() failing may just mean that the server crashed
    1869                 :            :             // after successfully renaming, but before reporting this, and then
    1870                 :            :             // the retried operation fails.  So we need to check if the source
    1871                 :            :             // file still exists, which we do by calling unlink(), since we want
    1872                 :            :             // to remove the temporary file anyway.
    1873                 :          0 :             int saved_errno = errno;
    1874   [ #  #  #  # ]:          0 :             if (unlink(tmp) == 0 || errno != ENOENT) {
                 [ #  # ]
    1875                 :          0 :                 string msg("Couldn't update base file ");
    1876                 :          0 :                 msg += basefile;
    1877                 :          0 :                 msg += ": ";
    1878                 :          0 :                 msg += strerror(saved_errno);
    1879                 :          0 :                 throw Xapian::DatabaseError(msg);
    1880                 :            :             }
    1881                 :            :         }
    1882                 :       2133 :         base.commit();
    1883                 :            : 
    1884                 :       2133 :         read_root();
    1885                 :            : 
    1886                 :       2133 :         changed_n = 0;
    1887                 :       2133 :         changed_c = DIR_START;
    1888                 :       2133 :         seq_count = SEQ_START_POINT;
    1889                 :       3271 :     } catch (...) {
    1890                 :          0 :         FlintTable::close();
    1891                 :          0 :         throw;
    1892                 :            :     }
    1893                 :            : }
    1894                 :            : 
    1895                 :            : void
    1896                 :         70 : FlintTable::write_changed_blocks(int changes_fd)
    1897                 :            : {
    1898                 :            :     Assert(changes_fd >= 0);
    1899         [ +  + ]:         70 :     if (handle < 0) return;
    1900         [ -  + ]:         50 :     if (faked_root_block) return;
    1901                 :            : 
    1902                 :         50 :     string buf;
    1903                 :         50 :     buf += F_pack_uint(2u); // Indicate the item is a list of blocks
    1904                 :         50 :     buf += F_pack_uint(strlen(tablename));
    1905                 :         50 :     buf += tablename;
    1906                 :         50 :     buf += F_pack_uint(block_size);
    1907                 :         50 :     io_write(changes_fd, buf.data(), buf.size());
    1908                 :            : 
    1909                 :            :     // Compare the old and new bitmaps to find blocks which have changed, and
    1910                 :            :     // write them to the file descriptor.
    1911                 :         50 :     uint4 n = 0;
    1912                 :         50 :     byte * p = new byte[block_size];
    1913                 :            :     try {
    1914                 :         50 :         base.calculate_last_block();
    1915         [ +  + ]:         99 :         while (base.find_changed_block(&n)) {
    1916                 :         49 :             buf = F_pack_uint(n + 1);
    1917                 :         49 :             io_write(changes_fd, buf.data(), buf.size());
    1918                 :            : 
    1919                 :            :             // Read block n.
    1920                 :         49 :             read_block(n, p);
    1921                 :            : 
    1922                 :            :             // Write block n to the file.
    1923                 :         49 :             io_write(changes_fd, reinterpret_cast<const char *>(p), block_size);
    1924                 :         49 :             ++n;
    1925                 :            :         }
    1926         [ +  - ]:         50 :         delete[] p;
    1927                 :         50 :         p = 0;
    1928                 :          0 :     } catch (...) {
    1929         [ #  # ]:          0 :         delete[] p;
    1930                 :          0 :         throw;
    1931                 :            :     }
    1932                 :         50 :     buf = F_pack_uint(0u);
    1933                 :         70 :     io_write(changes_fd, buf.data(), buf.size());
    1934                 :            : }
    1935                 :            : 
    1936                 :            : void
    1937                 :       1045 : FlintTable::cancel()
    1938                 :            : {
    1939                 :            :     LOGCALL_VOID(DB, "FlintTable::cancel", NO_ARGS);
    1940                 :            :     Assert(writable);
    1941                 :            : 
    1942         [ +  + ]:       1045 :     if (handle < 0) {
    1943         [ +  + ]:        436 :         if (handle == -2) {
    1944                 :          2 :             FlintTable::throw_database_closed();
    1945                 :            :         }
    1946                 :        434 :         latest_revision_number = revision_number; // FIXME: we can end up reusing a revision if we opened a btree at an older revision, start to modify it, then cancel...
    1947                 :        434 :         return;
    1948                 :            :     }
    1949                 :            : 
    1950                 :            :     // This causes problems: if (!Btree_modified) return;
    1951                 :            : 
    1952                 :        609 :     string err_msg;
    1953         [ -  + ]:        609 :     if (!base.read(name, base_letter, writable, err_msg)) {
    1954                 :          0 :         throw Xapian::DatabaseCorruptError(string("Couldn't reread base ") + base_letter);
    1955                 :            :     }
    1956                 :            : 
    1957                 :        609 :     revision_number =  base.get_revision();
    1958                 :        609 :     block_size =       base.get_block_size();
    1959                 :        609 :     root =             base.get_root();
    1960                 :        609 :     level =            base.get_level();
    1961                 :            :     //bit_map_size =     basep->get_bit_map_size();
    1962                 :        609 :     item_count =       base.get_item_count();
    1963                 :        609 :     faked_root_block = base.get_have_fakeroot();
    1964                 :        609 :     sequential =       base.get_sequential();
    1965                 :            : 
    1966                 :        609 :     latest_revision_number = revision_number; // FIXME: we can end up reusing a revision if we opened a btree at an older revision, start to modify it, then cancel...
    1967                 :            : 
    1968                 :        609 :     Btree_modified = false;
    1969                 :            : 
    1970         [ +  + ]:       1257 :     for (int j = 0; j <= level; j++) {
    1971                 :        648 :         C[j].n = BLK_UNUSED;
    1972                 :        648 :         C[j].rewrite = false;
    1973                 :            :     }
    1974                 :        609 :     read_root();
    1975                 :            : 
    1976                 :        609 :     changed_n = 0;
    1977                 :        609 :     changed_c = DIR_START;
    1978                 :       1043 :     seq_count = SEQ_START_POINT;
    1979                 :            : }
    1980                 :            : 
    1981                 :            : /************ B-tree reading ************/
    1982                 :            : 
    1983                 :            : bool
    1984                 :      11383 : FlintTable::do_open_to_read(bool revision_supplied, flint_revision_number_t revision_)
    1985                 :            : {
    1986         [ +  + ]:      11383 :     if (handle == -2) {
    1987                 :          4 :         FlintTable::throw_database_closed();
    1988                 :            :     }
    1989                 :      11379 :     handle = ::open((name + "DB").c_str(), O_RDONLY | O_BINARY);
    1990         [ +  + ]:      11379 :     if (handle < 0) {
    1991         [ +  - ]:       3534 :         if (lazy) {
    1992                 :            :             // This table is optional when reading!
    1993                 :       3534 :             revision_number = revision_;
    1994                 :       3534 :             return true;
    1995                 :            :         }
    1996                 :          0 :         string message("Couldn't open ");
    1997                 :          0 :         message += name;
    1998                 :          0 :         message += "DB to read: ";
    1999                 :          0 :         message += strerror(errno);
    2000                 :          0 :         throw Xapian::DatabaseOpeningError(message);
    2001                 :            :     }
    2002                 :            : 
    2003         [ -  + ]:       7845 :     if (!basic_open(revision_supplied, revision_)) {
    2004                 :          0 :         ::close(handle);
    2005                 :          0 :         handle = -1;
    2006         [ #  # ]:          0 :         if (revision_supplied) {
    2007                 :            :             // The requested revision was not available.
    2008                 :            :             // This could be because the database was modified underneath us, or
    2009                 :            :             // because a base file is missing.  Return false, and work out what
    2010                 :            :             // the problem was at a higher level.
    2011                 :          0 :             return false;
    2012                 :            :         }
    2013                 :          0 :         throw Xapian::DatabaseOpeningError("Failed to open table for reading");
    2014                 :            :     }
    2015                 :            : 
    2016         [ +  + ]:      18376 :     for (int j = 0; j <= level; j++) {
    2017                 :      10531 :         C[j].n = BLK_UNUSED;
    2018                 :      10531 :         C[j].p = new byte[block_size];
    2019         [ -  + ]:      10531 :         if (C[j].p == 0) {
    2020                 :          0 :             throw std::bad_alloc();
    2021                 :            :         }
    2022                 :            :     }
    2023                 :            : 
    2024                 :       7845 :     read_root();
    2025                 :      11379 :     return true;
    2026                 :            : }
    2027                 :            : 
    2028                 :            : void
    2029                 :       2400 : FlintTable::open()
    2030                 :            : {
    2031                 :            :     LOGCALL_VOID(DB, "FlintTable::open", NO_ARGS);
    2032                 :            :     LOGLINE(DB, "opening at path " << name);
    2033                 :       2400 :     close();
    2034                 :            : 
    2035         [ +  + ]:       2400 :     if (!writable) {
    2036                 :            :         // Any errors are thrown if revision_supplied is false
    2037                 :       2059 :         (void)do_open_to_read(false, 0);
    2038                 :       2055 :         return;
    2039                 :            :     }
    2040                 :            : 
    2041                 :            :     // Any errors are thrown if revision_supplied is false.
    2042                 :       2396 :     (void)do_open_to_write(false, 0);
    2043                 :            : }
    2044                 :            : 
    2045                 :            : bool
    2046                 :      11346 : FlintTable::open(flint_revision_number_t revision)
    2047                 :            : {
    2048                 :            :     LOGCALL(DB, bool, "FlintTable::open", revision);
    2049                 :            :     LOGLINE(DB, "opening for particular revision at path " << name);
    2050                 :      11346 :     close();
    2051                 :            : 
    2052         [ +  + ]:      11346 :     if (!writable) {
    2053         [ +  - ]:       9324 :         if (do_open_to_read(true, revision)) {
    2054                 :            :             AssertEq(revision_number, revision);
    2055                 :       9324 :             RETURN(true);
    2056                 :            :         } else {
    2057                 :          0 :             close();
    2058                 :          0 :             RETURN(false);
    2059                 :            :         }
    2060                 :            :     }
    2061                 :            : 
    2062         [ -  + ]:       2022 :     if (!do_open_to_write(true, revision)) {
    2063                 :            :         // Can't open at the requested revision.
    2064                 :          0 :         close();
    2065                 :          0 :         RETURN(false);
    2066                 :            :     }
    2067                 :            : 
    2068                 :            :     AssertEq(revision_number, revision);
    2069                 :      11346 :     RETURN(true);
    2070                 :            : }
    2071                 :            : 
    2072                 :            : bool
    2073                 :          2 : FlintTable::prev_for_sequential(Cursor_ * C_, int /*dummy*/) const
    2074                 :            : {
    2075                 :          2 :     int c = C_[0].c;
    2076         [ -  + ]:          2 :     if (c == DIR_START) {
    2077                 :          0 :         byte * p = C_[0].p;
    2078                 :            :         Assert(p);
    2079                 :          0 :         uint4 n = C_[0].n;
    2080                 :          0 :         while (true) {
    2081         [ #  # ]:          0 :             if (n == 0) return false;
    2082                 :          0 :             n--;
    2083         [ #  # ]:          0 :             if (writable) {
    2084         [ #  # ]:          0 :                 if (n == C[0].n) {
    2085                 :            :                     // Block is a leaf block in the built-in cursor
    2086                 :            :                     // (potentially in modified form).
    2087                 :          0 :                     memcpy(p, C[0].p, block_size);
    2088                 :            :                 } else {
    2089                 :            :                     // Blocks in the built-in cursor may not have been written
    2090                 :            :                     // to disk yet, so we have to check that the block number
    2091                 :            :                     // isn't in the built-in cursor or we'll read an
    2092                 :            :                     // uninitialised block (for which GET_LEVEL(p) will
    2093                 :            :                     // probably return 0).
    2094                 :            :                     int j;
    2095         [ #  # ]:          0 :                     for (j = 1; j <= level; ++j) {
    2096         [ #  # ]:          0 :                         if (n == C[j].n) break;
    2097                 :            :                     }
    2098         [ #  # ]:          0 :                     if (j <= level) continue;
    2099                 :            : 
    2100                 :            :                     // Block isn't in the built-in cursor, so the form on disk
    2101                 :            :                     // is valid, so read it to check if it's the next level 0
    2102                 :            :                     // block.
    2103                 :          0 :                     read_block(n, p);
    2104                 :            :                 }
    2105                 :            :             } else {
    2106                 :          0 :                 read_block(n, p);
    2107                 :            :             }
    2108                 :            :             
    2109                 :          0 :             if (writable) AssertEq(revision_number, latest_revision_number);
    2110         [ #  # ]:          0 :             if (REVISION(p) > revision_number + writable) {
    2111                 :          0 :                 set_overwritten();
    2112                 :            :                 return false;
    2113                 :            :             }
    2114         [ #  # ]:          0 :             if (GET_LEVEL(p) == 0) break;
    2115                 :            :         }
    2116                 :          0 :         c = DIR_END(p);
    2117                 :          0 :         C_[0].n = n;
    2118                 :            :     }
    2119                 :          2 :     c -= D2;
    2120                 :          2 :     C_[0].c = c;
    2121                 :          2 :     return true;
    2122                 :            : }
    2123                 :            : 
    2124                 :            : bool
    2125                 :     419517 : FlintTable::next_for_sequential(Cursor_ * C_, int /*dummy*/) const
    2126                 :            : {
    2127                 :     419517 :     byte * p = C_[0].p;
    2128                 :            :     Assert(p);
    2129                 :     419517 :     int c = C_[0].c;
    2130                 :     419517 :     c += D2;
    2131                 :            :     Assert((unsigned)c < block_size);
    2132         [ +  + ]:     419517 :     if (c == DIR_END(p)) {
    2133                 :     101731 :         uint4 n = C_[0].n;
    2134                 :        714 :         while (true) {
    2135                 :     102445 :             n++;
    2136         [ +  + ]:     102445 :             if (n > base.get_last_block()) return false;
    2137         [ +  + ]:     102129 :             if (writable) {
    2138         [ -  + ]:         48 :                 if (n == C[0].n) {
    2139                 :            :                     // Block is a leaf block in the built-in cursor
    2140                 :            :                     // (potentially in modified form).
    2141                 :          0 :                     memcpy(p, C[0].p, block_size);
    2142                 :            :                 } else {
    2143                 :            :                     // Blocks in the built-in cursor may not have been written
    2144                 :            :                     // to disk yet, so we have to check that the block number
    2145                 :            :                     // isn't in the built-in cursor or we'll read an
    2146                 :            :                     // uninitialised block (for which GET_LEVEL(p) will
    2147                 :            :                     // probably return 0).
    2148                 :            :                     int j;
    2149         [ +  + ]:         90 :                     for (j = 1; j <= level; ++j) {
    2150         [ +  + ]:         48 :                         if (n == C[j].n) break;
    2151                 :            :                     }
    2152         [ +  + ]:         48 :                     if (j <= level) continue;
    2153                 :            : 
    2154                 :            :                     // Block isn't in the built-in cursor, so the form on disk
    2155                 :            :                     // is valid, so read it to check if it's the next level 0
    2156                 :            :                     // block.
    2157                 :         42 :                     read_block(n, p);
    2158                 :            :                 }
    2159                 :            :             } else {
    2160                 :     102081 :                 read_block(n, p);
    2161                 :            :             }
    2162                 :     102123 :             if (writable) AssertEq(revision_number, latest_revision_number);
    2163         [ -  + ]:     102123 :             if (REVISION(p) > revision_number + writable) {
    2164                 :          0 :                 set_overwritten();
    2165                 :            :                 return false;
    2166                 :            :             }
    2167         [ +  + ]:     102123 :             if (GET_LEVEL(p) == 0) break;
    2168                 :            :         }
    2169                 :     101415 :         c = DIR_START;
    2170                 :     101415 :         C_[0].n = n;
    2171                 :            :     }
    2172                 :     419201 :     C_[0].c = c;
    2173                 :     419517 :     return true;
    2174                 :            : }
    2175                 :            : 
    2176                 :            : bool
    2177                 :        129 : FlintTable::prev_default(Cursor_ * C_, int j) const
    2178                 :            : {
    2179                 :        129 :     byte * p = C_[j].p;
    2180                 :        129 :     int c = C_[j].c;
    2181                 :            :     Assert(c >= DIR_START);
    2182                 :            :     Assert((unsigned)c < block_size);
    2183                 :            :     Assert(c <= DIR_END(p));
    2184         [ +  + ]:        129 :     if (c == DIR_START) {
    2185         [ -  + ]:         24 :         if (j == level) return false;
    2186         [ -  + ]:         24 :         if (!prev_default(C_, j + 1)) return false;
    2187                 :         24 :         c = DIR_END(p);
    2188                 :            :     }
    2189                 :        129 :     c -= D2;
    2190                 :        129 :     C_[j].c = c;
    2191         [ +  + ]:        129 :     if (j > 0) {
    2192                 :         24 :         block_to_cursor(C_, j - 1, Item_(p, c).block_given_by());
    2193                 :            :     }
    2194                 :        129 :     return true;
    2195                 :            : }
    2196                 :            : 
    2197                 :            : bool
    2198                 :     376336 : FlintTable::next_default(Cursor_ * C_, int j) const
    2199                 :            : {
    2200                 :     376336 :     byte * p = C_[j].p;
    2201                 :     376336 :     int c = C_[j].c;
    2202                 :            :     Assert(c >= DIR_START);
    2203                 :     376336 :     c += D2;
    2204                 :            :     Assert((unsigned)c < block_size);
    2205                 :            :     // Sometimes c can be DIR_END(p) + 2 here it appears...
    2206         [ +  + ]:     376336 :     if (c >= DIR_END(p)) {
    2207         [ +  + ]:     113095 :         if (j == level) return false;
    2208         [ +  + ]:      91395 :         if (!next_default(C_, j + 1)) return false;
    2209                 :      69379 :         c = DIR_START;
    2210                 :            :     }
    2211                 :     332620 :     C_[j].c = c;
    2212         [ +  + ]:     332620 :     if (j > 0) {
    2213                 :      69380 :         block_to_cursor(C_, j - 1, Item_(p, c).block_given_by());
    2214                 :            : #ifdef BTREE_DEBUG_FULL
    2215                 :            :         printf("Block in FlintTable:next_default");
    2216                 :            :         report_block_full(j - 1, C_[j - 1].n, C_[j - 1].p);
    2217                 :            : #endif /* BTREE_DEBUG_FULL */
    2218                 :            :     }
    2219                 :     376334 :     return true;
    2220                 :            : }
    2221                 :            : 
    2222                 :            : void
    2223                 :         19 : FlintTable::throw_database_closed()
    2224                 :            : {
    2225                 :         19 :     throw Xapian::DatabaseError("Database has been closed");
    2226                 :            : }
    2227                 :            : 
    2228                 :            : /** Compares this key with key2.
    2229                 :            : 
    2230                 :            :    The result is true if this key precedes key2. The comparison is for byte
    2231                 :            :    sequence collating order, taking lengths into account. So if the keys are
    2232                 :            :    made up of lower case ASCII letters we get alphabetical ordering.
    2233                 :            : 
    2234                 :            :    Now remember that items are added into the B-tree in fastest time
    2235                 :            :    when they are preordered by their keys. This is therefore the piece
    2236                 :            :    of code that needs to be followed to arrange for the preordering.
    2237                 :            : 
    2238                 :            :    This is complicated by the fact that keys have two parts - a value
    2239                 :            :    and then a count.  We first compare the values, and only if they
    2240                 :            :    are equal do we compare the counts.
    2241                 :            : */
    2242                 :            : 
    2243                 :   14569123 : bool Key_::operator<(Key_ key2) const
    2244                 :            : {
    2245                 :            :     LOGCALL(DB, bool, "Key_::operator<", static_cast<const void*>(key2.p));
    2246                 :   14569123 :     int key1_len = length();
    2247                 :   14569123 :     int key2_len = key2.length();
    2248         [ +  + ]:   14569123 :     if (key1_len == key2_len) {
    2249                 :            :         // The keys are the same length, so we can compare the counts
    2250                 :            :         // in the same operation since they're stored as 2 byte
    2251                 :            :         // bigendian numbers.
    2252                 :    8944009 :         RETURN(memcmp(p + K1, key2.p + K1, key1_len + C2) < 0);
    2253                 :            :     }
    2254                 :            : 
    2255         [ +  + ]:    5625114 :     int k_smaller = (key2_len < key1_len ? key2_len : key1_len);
    2256                 :            : 
    2257                 :            :     // Compare the common part of the keys
    2258                 :    5625114 :     int diff = memcmp(p + K1, key2.p + K1, k_smaller);
    2259         [ +  + ]:    5625114 :     if (diff != 0) RETURN(diff < 0);
    2260                 :            : 
    2261                 :            :     // We dealt with the "same length" case above so we never need to check
    2262                 :            :     // the count here.
    2263                 :   14569123 :     RETURN(key1_len < key2_len);
    2264                 :            : }
    2265                 :            : 
    2266                 :    2436010 : bool Key_::operator==(Key_ key2) const
    2267                 :            : {
    2268                 :            :     LOGCALL(DB, bool, "Key_::operator==", static_cast<const void*>(key2.p));
    2269                 :    2436010 :     int key1_len = length();
    2270         [ +  + ]:    2436010 :     if (key1_len != key2.length()) return false;
    2271                 :            :     // The keys are the same length, so we can compare the counts
    2272                 :            :     // in the same operation since they're stored as 2 byte
    2273                 :            :     // bigendian numbers.
    2274                 :    2436010 :     RETURN(memcmp(p + K1, key2.p + K1, key1_len + C2) == 0);
    2275                 :            : }

Generated by: LCOV version 1.8