LCOV - code coverage report
Current view: top level - backends/chert - chert_table.h (source / functions) Hit Total Coverage
Test: Test Coverage for xapian-core r Lines: 106 106 100.0 %
Date: 2011-08-21 Functions: 45 45 100.0 %
Branches: 15 16 93.8 %

           Branch data     Line data    Source code
       1                 :            : /* chert_table.h: Btree implementation
       2                 :            :  *
       3                 :            :  * Copyright 1999,2000,2001 BrightStation PLC
       4                 :            :  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010 Olly Betts
       5                 :            :  * Copyright 2008 Lemur Consulting Ltd
       6                 :            :  *
       7                 :            :  * This program is free software; you can redistribute it and/or
       8                 :            :  * modify it under the terms of the GNU General Public License as
       9                 :            :  * published by the Free Software Foundation; either version 2 of the
      10                 :            :  * License, or (at your option) any later version.
      11                 :            :  *
      12                 :            :  * This program is distributed in the hope that it will be useful,
      13                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      15                 :            :  * GNU General Public License for more details.
      16                 :            :  *
      17                 :            :  * You should have received a copy of the GNU General Public License
      18                 :            :  * along with this program; if not, write to the Free Software
      19                 :            :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
      20                 :            :  * USA
      21                 :            :  */
      22                 :            : 
      23                 :            : #ifndef OM_HGUARD_CHERT_TABLE_H
      24                 :            : #define OM_HGUARD_CHERT_TABLE_H
      25                 :            : 
      26                 :            : #include <xapian/error.h>
      27                 :            : #include <xapian/visibility.h>
      28                 :            : 
      29                 :            : #include "chert_types.h"
      30                 :            : #include "chert_btreebase.h"
      31                 :            : #include "chert_cursor.h"
      32                 :            : 
      33                 :            : #include "noreturn.h"
      34                 :            : #include "omassert.h"
      35                 :            : #include "str.h"
      36                 :            : #include "stringutils.h"
      37                 :            : #include "unaligned.h"
      38                 :            : 
      39                 :            : #include <algorithm>
      40                 :            : #include <string>
      41                 :            : 
      42                 :            : #include <zlib.h>
      43                 :            : 
      44                 :            : #define DONT_COMPRESS -1
      45                 :            : 
      46                 :            : /** The largest possible value of a key_len.
      47                 :            :  *
      48                 :            :  *  This gives the upper limit of the size of a key that may be stored in the
      49                 :            :  *  B-tree (252 bytes with the present implementation).
      50                 :            :  */
      51                 :            : #define CHERT_BTREE_MAX_KEY_LEN 252
      52                 :            : 
      53                 :            : /** Even for items of at maximum size, it must be possible to get this number of
      54                 :            :  *  items in a block */
      55                 :            : #define BLOCK_CAPACITY 4
      56                 :            : 
      57                 :            : // FIXME: This named constant probably isn't used everywhere it should be...
      58                 :            : #define BYTES_PER_BLOCK_NUMBER 4
      59                 :            : 
      60                 :            : /*  The B-tree blocks have a number of internal lengths and offsets held in 1, 2
      61                 :            :     or 4 bytes. To make the coding a little clearer,
      62                 :            :        we use  for
      63                 :            :        ------  ---
      64                 :            :        K1      the 1 byte length of key
      65                 :            :        I2      the 2 byte length of an item (key-tag pair)
      66                 :            :        D2      the 2 byte offset to the item from the directory
      67                 :            :        C2      the 2 byte counter that ends each key and begins each tag
      68                 :            : */
      69                 :            : 
      70                 :            : #define K1 1
      71                 :            : #define I2 2
      72                 :            : #define D2 2
      73                 :            : #define C2 2
      74                 :            : 
      75                 :            : /*  and when getting K1 or setting D2, we use getK, setD defined as: */
      76                 :            : 
      77                 :            : #define getK(p, c)    getint1(p, c)
      78                 :            : #define setD(p, c, x) setint2(p, c, x)
      79                 :            : 
      80                 :            : /* if you've been reading the comments from the top, the next four procedures
      81                 :            :    will not cause any headaches.
      82                 :            : 
      83                 :            :    Recall that item has this form:
      84                 :            : 
      85                 :            :            i k
      86                 :            :            | |
      87                 :            :            I K key x C tag
      88                 :            :              <--K-->
      89                 :            :            <------I------>
      90                 :            : 
      91                 :            : 
      92                 :            :    item_of(p, c) returns i, the address of the item at block address p,
      93                 :            :    directory offset c,
      94                 :            : 
      95                 :            :    component_of(p, c) returns the number marked 'x' above,
      96                 :            : 
      97                 :            :    components_of(p, c) returns the number marked 'C' above,
      98                 :            : */
      99                 :            : 
     100                 :            : #define REVISION(b)      static_cast<unsigned int>(getint4(b, 0))
     101                 :            : #define GET_LEVEL(b)     getint1(b, 4)
     102                 :            : #define MAX_FREE(b)      getint2(b, 5)
     103                 :            : #define TOTAL_FREE(b)    getint2(b, 7)
     104                 :            : #define DIR_END(b)       getint2(b, 9)
     105                 :            : #define DIR_START        11
     106                 :            : 
     107                 :            : #define SET_REVISION(b, x)      setint4(b, 0, x)
     108                 :            : #define SET_LEVEL(b, x)         setint1(b, 4, x)
     109                 :            : #define SET_MAX_FREE(b, x)      setint2(b, 5, x)
     110                 :            : #define SET_TOTAL_FREE(b, x)    setint2(b, 7, x)
     111                 :            : #define SET_DIR_END(b, x)       setint2(b, 9, x)
     112                 :            : 
     113                 :            : class XAPIAN_VISIBILITY_DEFAULT Key {
     114                 :            :     const byte *p;
     115                 :            : public:
     116                 :   51181534 :     explicit Key(const byte * p_) : p(p_) { }
     117                 :      33698 :     const byte * get_address() const { return p; }
     118                 :    2124502 :     void read(std::string * key) const {
     119                 :    2124502 :         key->assign(reinterpret_cast<const char *>(p + K1), length());
     120                 :    2124502 :     }
     121                 :            :     bool operator==(Key key2) const;
     122                 :            :     bool operator!=(Key key2) const { return !(*this == key2); }
     123                 :            :     bool operator<(Key key2) const;
     124                 :          1 :     bool operator>=(Key key2) const { return !(*this < key2); }
     125                 :            :     bool operator>(Key key2) const { return key2 < *this; }
     126                 :    5705316 :     bool operator<=(Key key2) const { return !(key2 < *this); }
     127                 :   89637144 :     int length() const {
     128                 :            :         AssertRel(getK(p, 0),>=,3);
     129                 :   89637144 :         return getK(p, 0) - C2 - K1;
     130                 :            :     }
     131                 :     172724 :     char operator[](size_t i) const {
     132                 :            :         AssertRel(i,<,(size_t)length());
     133                 :     172724 :         return p[i + K1];
     134                 :            :     }
     135                 :            : };
     136                 :            : 
     137                 :            : // Item_wr wants to be "Item with non-const p and more methods" - we can't
     138                 :            : // achieve that nicely with inheritance, so we use a template base class.
     139                 :            : template <class T> class Item_base {
     140                 :            : protected:
     141                 :            :     T p;
     142                 :            : public:
     143                 :            :     /* Item from block address and offset to item pointer */
     144                 :   69806851 :     Item_base(T p_, int c) : p(p_ + getint2(p_, c)) { }
     145                 :      71629 :     Item_base(T p_) : p(p_) { }
     146                 :    3141610 :     T get_address() const { return p; }
     147                 :            :     /** I in diagram above. */
     148                 :   21328279 :     int size() const {
     149                 :   21328279 :         int item_size = getint2(p, 0) & 0x7fff;
     150                 :            :         AssertRel(item_size,>=,5);
     151                 :   21328279 :         return item_size;
     152                 :            :     }
     153                 :    2698122 :     bool get_compressed() const { return *p & 0x80; }
     154                 :    5937266 :     int component_of() const {
     155                 :    5937266 :         return getint2(p, getK(p, I2) + I2 - C2);
     156                 :            :     }
     157                 :    2783206 :     int components_of() const {
     158                 :    2783206 :         return getint2(p, getK(p, I2) + I2);
     159                 :            :     }
     160                 :   51181534 :     Key key() const { return Key(p + I2); }
     161                 :    6965416 :     void append_chunk(std::string * tag) const {
     162                 :            :         /* number of bytes to extract from current component */
     163                 :    6965416 :         int cd = getK(p, I2) + I2 + C2;
     164                 :    6965416 :         int l = size() - cd;
     165                 :    6965416 :         tag->append(reinterpret_cast<const char *>(p + cd), l);
     166                 :    6965416 :     }
     167                 :            :     /** Get this item's tag as a block number (this block should not be at
     168                 :            :      *  level 0).
     169                 :            :      */
     170                 :    9514946 :     uint4 block_given_by() const {
     171                 :            :         AssertRel(size(),>=,BYTES_PER_BLOCK_NUMBER);
     172                 :    9514946 :         return getint4(p, size() - BYTES_PER_BLOCK_NUMBER);
     173                 :            :     }
     174                 :            : };
     175                 :            : 
     176                 :            : class Item : public Item_base<const byte *> {
     177                 :            : public:
     178                 :            :     /* Item from block address and offset to item pointer */
     179                 :   69805954 :     Item(const byte * p_, int c) : Item_base<const byte *>(p_, c) { }
     180                 :            :     Item(const byte * p_) : Item_base<const byte *>(p_) { }
     181                 :            : };
     182                 :            : 
     183                 :            : class Item_wr : public Item_base<byte *> {
     184                 :    4594259 :     void set_key_len(int x) { setint1(p, I2, x); }
     185                 :            : public:
     186                 :            :     /* Item_wr from block address and offset to item pointer */
     187                 :        897 :     Item_wr(byte * p_, int c) : Item_base<byte *>(p_, c) { }
     188                 :      71629 :     Item_wr(byte * p_) : Item_base<byte *>(p_) { }
     189                 :    5954653 :     void set_component_of(int i) {
     190                 :    5954653 :         setint2(p, getK(p, I2) + I2 - C2, i);
     191                 :    5954653 :     }
     192                 :    1355102 :     void set_components_of(int m) {
     193                 :    1355102 :         setint2(p, getK(p, I2) + I2, m);
     194                 :    1355102 :     }
     195                 :            :     // Takes size as we may be truncating newkey.
     196                 :      16725 :     void set_key_and_block(Key newkey, int truncate_size, uint4 n) {
     197                 :      16725 :         int i = truncate_size;
     198                 :            :         // Read the length now because we may be copying the key over itself.
     199                 :            :         // FIXME that's stupid!  sort this out
     200                 :      16725 :         int newkey_len = newkey.length();
     201                 :            :         AssertRel(i,<=,newkey_len);
     202                 :      16725 :         int newsize = I2 + K1 + i + C2;
     203                 :            :         // Item size (BYTES_PER_BLOCK_NUMBER since tag contains block number)
     204                 :      16725 :         setint2(p, 0, newsize + BYTES_PER_BLOCK_NUMBER);
     205                 :            :         // Key size
     206                 :      16725 :         setint1(p, I2, newsize - I2);
     207                 :            :         // Copy the main part of the key, possibly truncating.
     208                 :      16725 :         std::memmove(p + I2 + K1, newkey.get_address() + K1, i);
     209                 :            :         // Copy the count part.
     210                 :      16725 :         std::memmove(p + I2 + K1 + i, newkey.get_address() + K1 + newkey_len, C2);
     211                 :            :         // Set tag contents to block number
     212                 :            : //      set_block_given_by(n);
     213                 :      16725 :         setint4(p, newsize, n);
     214                 :      16725 :     }
     215                 :            : 
     216                 :            :     /** Set this item's tag to point to block n (this block should not be at
     217                 :            :      *  level 0).
     218                 :            :      */
     219                 :        897 :     void set_block_given_by(uint4 n) {
     220                 :        897 :         setint4(p, size() - BYTES_PER_BLOCK_NUMBER, n);
     221                 :        897 :     }
     222                 :    1363770 :     void set_size(int l) {
     223                 :            :         AssertRel(l,>=,5);
     224                 :    1363770 :         setint2(p, 0, l);
     225                 :    1363770 :     }
     226                 :            :     /** Form an item with a null key and with block number n in the tag.
     227                 :            :      */
     228                 :        330 :     void form_null_key(uint4 n) {
     229                 :        330 :         setint4(p, I2 + K1, n);
     230                 :        330 :         set_key_len(K1);        /* null key */
     231                 :        330 :         set_size(I2 + K1 + BYTES_PER_BLOCK_NUMBER);  /* total length */
     232                 :        330 :     }
     233                 :    4590854 :     void form_key(const std::string & key_) {
     234                 :    4590854 :         std::string::size_type key_len = key_.length();
     235         [ +  + ]:    4590854 :         if (key_len > CHERT_BTREE_MAX_KEY_LEN) {
     236                 :            :             // We check term length when a term is added to a document but
     237                 :            :             // chert doubles zero bytes, so this can still happen for terms
     238                 :            :             // which contain one or more zero bytes.
     239                 :          8 :             std::string msg("Key too long: length was ");
     240                 :          8 :             msg += str(key_len);
     241                 :            :             msg += " bytes, maximum length of a key is "
     242                 :          8 :                    STRINGIZE(CHERT_BTREE_MAX_KEY_LEN) " bytes";
     243                 :         16 :             throw Xapian::InvalidArgumentError(msg);
     244                 :            :         }
     245                 :            : 
     246                 :    4590846 :         set_key_len(key_len + K1 + C2);
     247                 :    4590846 :         std::memmove(p + I2 + K1, key_.data(), key_len);
     248                 :    4590846 :         set_component_of(1);
     249                 :    4590846 :     }
     250                 :            :     // FIXME passing cd here is icky
     251                 :    1360357 :     void set_tag(int cd, const char *start, int len, bool compressed) {
     252                 :    1360357 :         std::memmove(p + cd, start, len);
     253                 :    1360357 :         set_size(cd + len);
     254         [ +  + ]:    1360357 :         if (compressed) *p |= 0x80;
     255                 :    1360357 :     }
     256                 :       3083 :     void fake_root_item() {
     257                 :       3083 :         set_key_len(K1 + C2);   // null key length
     258                 :       3083 :         set_size(I2 + K1 + 2 * C2);   // length of the item
     259                 :       3083 :         set_component_of(1);
     260                 :       3083 :         set_components_of(1);
     261                 :       3083 :     }
     262                 :            : };
     263                 :            : 
     264                 :            : // Allow for BTREE_CURSOR_LEVELS levels in the B-tree.
     265                 :            : // With 10, overflow is practically impossible
     266                 :            : // FIXME: but we want it to be completely impossible...
     267                 :            : #define BTREE_CURSOR_LEVELS 10
     268                 :            : 
     269                 :            : /** Class managing a Btree table in a Chert database.
     270                 :            :  *
     271                 :            :  *  A table is a store holding a set of key/tag pairs.
     272                 :            :  *
     273                 :            :  *  A key is used to access a block of data in a chert table.
     274                 :            :  * 
     275                 :            :  *  Keys are of limited length.
     276                 :            :  *
     277                 :            :  *  Keys may not be empty (each Btree has a special empty key for internal use).
     278                 :            :  *
     279                 :            :  *  A tag is a piece of data associated with a given key.  The contents
     280                 :            :  *  of the tag are opaque to the Btree.
     281                 :            :  *
     282                 :            :  *  Tags may be of arbitrary length (the Btree imposes a very large limit).
     283                 :            :  *  Note though that they will be loaded into memory in their entirety, so
     284                 :            :  *  should not be permitted to grow without bound in normal usage.
     285                 :            :  *
     286                 :            :  *  Tags which are null strings _are_ valid, and are different from a
     287                 :            :  *  tag simply not being in the table.
     288                 :            :  */
     289                 :            : class XAPIAN_VISIBILITY_DEFAULT ChertTable {
     290                 :            :     friend class ChertCursor; /* Should probably fix this. */
     291                 :            :     private:
     292                 :            :         /// Copying not allowed
     293                 :            :         ChertTable(const ChertTable &);
     294                 :            : 
     295                 :            :         /// Assignment not allowed
     296                 :            :         ChertTable & operator=(const ChertTable &);
     297                 :            : 
     298                 :            :         /// Return true if there are no entries in the table.
     299                 :            :         bool really_empty() const;
     300                 :            : 
     301                 :            :     public:
     302                 :            :         /** Create a new Btree object.
     303                 :            :          *
     304                 :            :          *  This does not create the table on disk - the create_and_open()
     305                 :            :          *  method must be called to create the table on disk.
     306                 :            :          *
     307                 :            :          *  This also does not open the table - either the create_and_open()
     308                 :            :          *  or open() methods must be called before use is made of the table.
     309                 :            :          *
     310                 :            :          *  @param tablename_   The name of the table (used in changesets).
     311                 :            :          *  @param path_        Path at which the table is stored.
     312                 :            :          *  @param readonly_    whether to open the table for read only access.
     313                 :            :          *  @param compress_strategy_   DONT_COMPRESS, Z_DEFAULT_STRATEGY,
     314                 :            :          *                              Z_FILTERED, Z_HUFFMAN_ONLY, or Z_RLE.
     315                 :            :          *  @param lazy         If true, don't create the table until it's
     316                 :            :          *                      needed.
     317                 :            :          */
     318                 :            :         ChertTable(const char * tablename_, const std::string & path_,
     319                 :            :                    bool readonly_, int compress_strategy_ = DONT_COMPRESS,
     320                 :            :                    bool lazy = false);
     321                 :            : 
     322                 :            :         /** Close the Btree.
     323                 :            :          *
     324                 :            :          *  Any outstanding changes (ie, changes made without commit() having
     325                 :            :          *  subsequently been called) will be lost.
     326                 :            :          */
     327                 :            :         ~ChertTable();
     328                 :            : 
     329                 :            :         /** Close the Btree.  This closes and frees any of the btree
     330                 :            :          *  structures which have been created and opened.
     331                 :            :          *
     332                 :            :          *  @param permanent If true, the Btree will not reopen on demand.
     333                 :            :          */
     334                 :            :         void close(bool permanent=false);
     335                 :            : 
     336                 :            :         /** Determine whether the btree exists on disk.
     337                 :            :          */
     338                 :            :         bool exists() const;
     339                 :            : 
     340                 :            :         /** Open the btree at the latest revision.
     341                 :            :          *
     342                 :            :          *  @exception Xapian::DatabaseCorruptError will be thrown if the table
     343                 :            :          *      is in a corrupt state.
     344                 :            :          *  @exception Xapian::DatabaseOpeningError will be thrown if the table
     345                 :            :          *      cannot be opened (but is not corrupt - eg, permission problems,
     346                 :            :          *      not present, etc).
     347                 :            :          */
     348                 :            :         void open();
     349                 :            : 
     350                 :            :         /** Open the btree at a given revision.
     351                 :            :          *
     352                 :            :          *  Like Btree::open, but try to open at the given revision number
     353                 :            :          *  and fail if that isn't possible.
     354                 :            :          *
     355                 :            :          *  @param revision_      - revision number to open.
     356                 :            :          *
     357                 :            :          *  @return true if table is successfully opened at desired revision;
     358                 :            :          *          false if table cannot be opened at desired revision (but
     359                 :            :          *          table is otherwise consistent).
     360                 :            :          *
     361                 :            :          *  @exception Xapian::DatabaseCorruptError will be thrown if the table
     362                 :            :          *      is in a corrupt state.
     363                 :            :          *  @exception Xapian::DatabaseOpeningError will be thrown if the table
     364                 :            :          *      cannot be opened (but is not corrupt - eg, permission problems,
     365                 :            :          *      not present, etc).
     366                 :            :          */
     367                 :            :         bool open(chert_revision_number_t revision_);
     368                 :            : 
     369                 :            :         /** Return true if this table is open.
     370                 :            :          *
     371                 :            :          *  NB If the table is lazy and doesn't yet exist, returns false.
     372                 :            :          */
     373                 :     611948 :         bool is_open() const { return handle >= 0; }
     374                 :            : 
     375                 :            :         /** Flush any outstanding changes to the DB file of the table.
     376                 :            :          *
     377                 :            :          *  This must be called before commit, to ensure that the DB file is
     378                 :            :          *  ready to be switched to a new version by the commit.
     379                 :            :          */
     380                 :            :         void flush_db();
     381                 :            : 
     382                 :            :         /** Commit any outstanding changes to the table.
     383                 :            :          *
     384                 :            :          *  Commit changes made by calling add() and del() to the Btree.
     385                 :            :          *
     386                 :            :          *  If an error occurs during the operation, this will be signalled
     387                 :            :          *  by an exception.  In case of error, changes made will not be
     388                 :            :          *  committed to the Btree - they will be discarded.
     389                 :            :          *
     390                 :            :          *  @param new_revision  The new revision number to store.  This must
     391                 :            :          *          be greater than the latest revision number (see
     392                 :            :          *          get_latest_revision_number()), or an exception will be
     393                 :            :          *          thrown.
     394                 :            :          *
     395                 :            :          *  @param changes_fd  The file descriptor to write changes to.
     396                 :            :          *          Defaults to -1, meaning no changes will be written.
     397                 :            :          */
     398                 :            :         void commit(chert_revision_number_t revision, int changes_fd = -1,
     399                 :            :                     const std::string * changes_tail = NULL);
     400                 :            : 
     401                 :            :         /** Append the list of blocks changed to a changeset file.
     402                 :            :          *
     403                 :            :          *  @param changes_fd  The file descriptor to write changes to.
     404                 :            :          */
     405                 :            :         void write_changed_blocks(int changes_fd);
     406                 :            : 
     407                 :            :         /** Cancel any outstanding changes.
     408                 :            :          *
     409                 :            :          *  This will discard any modifications which haven't been committed
     410                 :            :          *  by calling commit().
     411                 :            :          */
     412                 :            :         void cancel();
     413                 :            : 
     414                 :            :         /** Read an entry from the table, if and only if it is exactly that
     415                 :            :          *  being asked for.
     416                 :            :          *
     417                 :            :          *  If the key is found in the table, then the tag is copied to @a
     418                 :            :          *  tag.  If the key is not found tag is left unchanged.
     419                 :            :          * 
     420                 :            :          *  The result is true iff the specified key is found in the Btree.
     421                 :            :          *
     422                 :            :          *  @param key  The key to look for in the table.
     423                 :            :          *  @param tag  A tag object to fill with the value if found.
     424                 :            :          *
     425                 :            :          *  @return true if key is found in table,
     426                 :            :          *          false if key is not found in table.
     427                 :            :          */
     428                 :            :         bool get_exact_entry(const std::string & key, std::string & tag) const;
     429                 :            : 
     430                 :            :         /** Check if a key exists in the Btree.
     431                 :            :          *
     432                 :            :          *  This is just like get_exact_entry() except it doesn't read the tag
     433                 :            :          *  value so is more efficient if you only want to check that the key
     434                 :            :          *  exists.
     435                 :            :          *
     436                 :            :          *  @param key  The key to look for in the table.
     437                 :            :          *
     438                 :            :          *  @return true if key is found in table,
     439                 :            :          *          false if key is not found in table.
     440                 :            :          */
     441                 :            :         bool key_exists(const std::string &key) const;
     442                 :            : 
     443                 :            :         /** Read the tag value for the key pointed to by cursor C_.
     444                 :            :          *
     445                 :            :          *  @param keep_compressed  Don't uncompress the tag - e.g. useful
     446                 :            :          *                          if it's just being opaquely copied.
     447                 :            :          *
     448                 :            :          *  @return     true if current_tag holds compressed data (always
     449                 :            :          *              false if keep_compressed was false).
     450                 :            :          */
     451                 :            :         bool read_tag(Cursor * C_, std::string *tag, bool keep_compressed) const;
     452                 :            : 
     453                 :            :         /** Add a key/tag pair to the table, replacing any existing pair with
     454                 :            :          *  the same key.
     455                 :            :          *
     456                 :            :          *  If an error occurs during the operation, an exception will be
     457                 :            :          *  thrown.
     458                 :            :          *
     459                 :            :          *  If key is empty, then the null item is replaced.
     460                 :            :          *
     461                 :            :          *  e.g.    btree.add("TODAY", "Mon 9 Oct 2000");
     462                 :            :          *
     463                 :            :          *  @param key   The key to store in the table.
     464                 :            :          *  @param tag   The tag to store in the table.
     465                 :            :          *  @param already_compressed   true if tag is already compressed,
     466                 :            :          *              for example because it is being opaquely copied
     467                 :            :          *              (default: false).
     468                 :            :          */
     469                 :            :         void add(const std::string &key, std::string tag, bool already_compressed = false);
     470                 :            : 
     471                 :            :         /** Delete an entry from the table.
     472                 :            :          *
     473                 :            :          *  The entry will be removed from the table, if it exists.  If
     474                 :            :          *  it does not exist, no action will be taken.  The item with
     475                 :            :          *  an empty key can't be removed, and false is returned.
     476                 :            :          *
     477                 :            :          *  If an error occurs during the operation, this will be signalled
     478                 :            :          *  by an exception.
     479                 :            :          *
     480                 :            :          *  e.g.    bool deleted = btree.del("TODAY")
     481                 :            :          *
     482                 :            :          *  @param key   The key to remove from the table.
     483                 :            :          *
     484                 :            :          *  @return true if an entry was removed; false if it did not exist.
     485                 :            :          */
     486                 :            :         bool del(const std::string &key);
     487                 :            : 
     488                 :            :         /// Erase this table from disk.
     489                 :            :         void erase();
     490                 :            : 
     491                 :            :         /** Set the block size.
     492                 :            :          *
     493                 :            :          *  It's only safe to do this before the table is created.
     494                 :            :          */
     495                 :            :         void set_block_size(unsigned int block_size_);
     496                 :            : 
     497                 :            :         /** Get the block size.
     498                 :            :          */
     499                 :       1979 :         unsigned int get_block_size() const { return block_size; }
     500                 :            : 
     501                 :            :         /** Create a new empty btree structure on disk and open it at the
     502                 :            :          *  initial revision.
     503                 :            :          *
     504                 :            :          *  The table must be writable - it doesn't make sense to create
     505                 :            :          *  a table that is read-only!
     506                 :            :          *
     507                 :            :          *  The block size must be less than 64K, where K = 1024. It is unwise
     508                 :            :          *  to use a small block size (less than 1024 perhaps), so we enforce a
     509                 :            :          *  minimum block size of 2K.
     510                 :            :          *
     511                 :            :          *  Example:
     512                 :            :          *
     513                 :            :          *    Btree btree("X-");
     514                 :            :          *    btree.create_and_open(8192);
     515                 :            :          *    // Files will be X-DB, X-baseA (and X-baseB).
     516                 :            :          *
     517                 :            :          *  @param blocksize     - Size of blocks to use.
     518                 :            :          *
     519                 :            :          *  @exception Xapian::DatabaseCreateError if the table can't be
     520                 :            :          *      created.
     521                 :            :          *  @exception Xapian::InvalidArgumentError if the requested blocksize
     522                 :            :          *      is unsuitable.
     523                 :            :          */
     524                 :            :         void create_and_open(unsigned int blocksize);
     525                 :            : 
     526                 :            :         void set_full_compaction(bool parity);
     527                 :            : 
     528                 :            :         /** Get the latest revision number stored in this table.
     529                 :            :          *
     530                 :            :          *  This gives the higher of the revision numbers held in the base
     531                 :            :          *  files of the B-tree, or just the revision number if there's only
     532                 :            :          *  one base file.
     533                 :            :          *
     534                 :            :          *  It is possible that there are other, older, revisions of this
     535                 :            :          *  table available, and indeed that the revision currently open
     536                 :            :          *  is one of these older revisions.
     537                 :            :          */
     538                 :        829 :         chert_revision_number_t get_latest_revision_number() const {
     539                 :        829 :             return latest_revision_number;
     540                 :            :         }
     541                 :            : 
     542                 :            :         /** Get the revision number at which this table
     543                 :            :          *  is currently open.
     544                 :            :          *
     545                 :            :          *  It is possible that there are other, more recent or older
     546                 :            :          *  revisions available.
     547                 :            :          *
     548                 :            :          *  @return the current revision number.
     549                 :            :          */
     550                 :       6434 :         chert_revision_number_t get_open_revision_number() const {
     551                 :       6434 :             return revision_number;
     552                 :            :         }
     553                 :            : 
     554                 :            :         /** Return a count of the number of entries in the table.
     555                 :            :          *
     556                 :            :          *  The count does not include the ever-present item with null key.
     557                 :            :          *
     558                 :            :          *  Use @a empty() if you only want to know if the table is empty or
     559                 :            :          *  not.
     560                 :            :          *
     561                 :            :          *  @return The number of entries in the table.
     562                 :            :          */
     563                 :     226095 :         chert_tablesize_t get_entry_count() const {
     564                 :     226095 :             return item_count;
     565                 :            :         }
     566                 :            : 
     567                 :            :         /// Return true if there are no entries in the table.
     568                 :       1674 :         bool empty() const {
     569                 :            :             // Prior to 1.1.4/1.0.18, item_count was stored in 32 bits, so we
     570                 :            :             // can't trust it as there could be more than 1<<32 entries.
     571                 :            :             //
     572                 :            :             // In theory it should wrap, so if non-zero the table isn't empty,
     573                 :            :             // but the table this was first noticed in wasn't off by a multiple
     574                 :            :             // of 1<<32.
     575                 :            : 
     576                 :            :             // An empty table will always have level == 0, and most non-empty
     577                 :            :             // tables will have more levels, so use that as a short-cut.
     578 [ +  + ][ +  + ]:       1674 :             return (level == 0) && really_empty();
     579                 :            :         }
     580                 :            : 
     581                 :            :         /** Get a cursor for reading from the table.
     582                 :            :          *
     583                 :            :          *  The cursor is owned by the caller - it is the caller's
     584                 :            :          *  responsibility to ensure that it is deleted.
     585                 :            :          */
     586                 :            :         ChertCursor * cursor_get() const;
     587                 :            : 
     588                 :            :         /** Determine whether the object contains uncommitted modifications.
     589                 :            :          *
     590                 :            :          *  @return true if there have been modifications since the last
     591                 :            :          *          the last call to commit().
     592                 :            :          */
     593                 :       4752 :         bool is_modified() const { return Btree_modified; }
     594                 :            : 
     595                 :            :         /** Set the maximum item size given the block capacity.
     596                 :            :          *
     597                 :            :          *  At least this many items of maximum size must fit into a block.
     598                 :            :          *  The default is BLOCK_CAPACITY (which is currently 4).
     599                 :            :          */
     600                 :       9113 :         void set_max_item_size(size_t block_capacity) {
     601         [ -  + ]:       9113 :             if (block_capacity > BLOCK_CAPACITY) block_capacity = BLOCK_CAPACITY;
     602                 :            :             max_item_size = (block_size - DIR_START - block_capacity * D2)
     603                 :       9113 :                 / block_capacity;
     604                 :       9113 :         }
     605                 :            : 
     606                 :            :     protected:
     607                 :            : 
     608                 :            :         /** Perform the opening operation to read.
     609                 :            :          *
     610                 :            :          *  Return true iff the open succeeded.
     611                 :            :          */
     612                 :            :         bool do_open_to_read(bool revision_supplied, chert_revision_number_t revision_);
     613                 :            : 
     614                 :            :         /** Perform the opening operation to write.
     615                 :            :          *
     616                 :            :          *  Return true iff the open succeeded.
     617                 :            :          */
     618                 :            :         bool do_open_to_write(bool revision_supplied,
     619                 :            :                               chert_revision_number_t revision_,
     620                 :            :                               bool create_db = false);
     621                 :            :         bool basic_open(bool revision_supplied, chert_revision_number_t revision);
     622                 :            : 
     623                 :            :         bool find(Cursor *) const;
     624                 :            :         int delete_kt();
     625                 :            :         void read_block(uint4 n, byte *p) const;
     626                 :            :         void write_block(uint4 n, const byte *p) const;
     627                 :            :         XAPIAN_NORETURN(void set_overwritten() const);
     628                 :            :         void block_to_cursor(Cursor *C_, int j, uint4 n) const;
     629                 :            :         void alter();
     630                 :            :         void compact(byte *p);
     631                 :            :         void enter_key(int j, Key prevkey, Key newkey);
     632                 :            :         int mid_point(byte *p);
     633                 :            :         void add_item_to_block(byte *p, Item_wr kt, int c);
     634                 :            :         void add_item(Item_wr kt, int j);
     635                 :            :         void delete_item(int j, bool repeatedly);
     636                 :            :         int add_kt(bool found);
     637                 :            :         void read_root();
     638                 :            :         void split_root(uint4 split_n);
     639                 :            :         void form_key(const std::string & key) const;
     640                 :            : 
     641                 :       2384 :         char other_base_letter() const {
     642         [ +  + ]:       2384 :            return (base_letter == 'A') ? 'B' : 'A';
     643                 :            :         }
     644                 :            : 
     645                 :            :         /// The name of the table (used when writing changesets).
     646                 :            :         const char * tablename;
     647                 :            : 
     648                 :            :         /// Allocate the zstream for deflating, if not already allocated.
     649                 :            :         void lazy_alloc_deflate_zstream() const;
     650                 :            : 
     651                 :            :         /// Allocate the zstream for inflating, if not already allocated.
     652                 :            :         void lazy_alloc_inflate_zstream() const;
     653                 :            : 
     654                 :            :         /** revision number of the opened B-tree. */
     655                 :            :         chert_revision_number_t revision_number;
     656                 :            : 
     657                 :            :         /** keeps a count of the number of items in the B-tree. */
     658                 :            :         chert_tablesize_t item_count;
     659                 :            : 
     660                 :            :         /** block size of the B tree in bytes */
     661                 :            :         unsigned int block_size;
     662                 :            : 
     663                 :            :         /** Revision number of the other base, or zero if there is only one
     664                 :            :          *  base file.
     665                 :            :          */
     666                 :            :         mutable chert_revision_number_t latest_revision_number;
     667                 :            : 
     668                 :            :         /** set to true if baseA and baseB both exist as valid bases.
     669                 :            :          *
     670                 :            :          *  The unused base is deleted as soon as a write to the Btree takes
     671                 :            :          *  place. */
     672                 :            :         mutable bool both_bases;
     673                 :            : 
     674                 :            :         /** the value 'A' or 'B' of the current base */
     675                 :            :         char base_letter;
     676                 :            : 
     677                 :            :         /** true if the root block is faked (not written to disk).
     678                 :            :          * false otherwise.  This is true when the btree hasn't been
     679                 :            :          * modified yet.
     680                 :            :          */
     681                 :            :         bool faked_root_block;
     682                 :            : 
     683                 :            :         /** true iff the data has been written in a single write in
     684                 :            :          * sequential order.
     685                 :            :          */
     686                 :            :         bool sequential;
     687                 :            : 
     688                 :            :         /** File descriptor of the table.
     689                 :            :          *
     690                 :            :          *  If the table is lazily created and doesn't yet exist, this will be
     691                 :            :          *  -1.
     692                 :            :          *
     693                 :            :          *  If close() has been called, this will be -2.
     694                 :            :          */
     695                 :            :         int handle;
     696                 :            : 
     697                 :            :         /// number of levels, counting from 0
     698                 :            :         int level;
     699                 :            : 
     700                 :            :         /// the root block of the B-tree
     701                 :            :         uint4 root;
     702                 :            : 
     703                 :            :         /// buffer of size block_size for making up key-tag items
     704                 :            :         mutable Item_wr kt;
     705                 :            : 
     706                 :            :         /// buffer of size block_size for reforming blocks
     707                 :            :         byte * buffer;
     708                 :            : 
     709                 :            :         /// For writing back as file baseA or baseB.
     710                 :            :         ChertTable_base base;
     711                 :            : 
     712                 :            :         /// The path name of the B tree.
     713                 :            :         std::string name;
     714                 :            : 
     715                 :            :         /** count of the number of successive instances of purely
     716                 :            :          * sequential addition, starting at SEQ_START_POINT (neg) and
     717                 :            :          * going up to zero. */
     718                 :            :         int seq_count;
     719                 :            : 
     720                 :            :         /** the last block to be changed by an addition */
     721                 :            :         uint4 changed_n;
     722                 :            : 
     723                 :            :         /** directory offset corresponding to last block to be changed
     724                 :            :          *  by an addition */
     725                 :            :         int changed_c;
     726                 :            : 
     727                 :            :         /// maximum size of an item (key-tag pair)
     728                 :            :         size_t max_item_size;
     729                 :            : 
     730                 :            :         /// Set to true the first time the B-tree is modified.
     731                 :            :         mutable bool Btree_modified;
     732                 :            : 
     733                 :            :         /// set to true when full compaction is to be achieved
     734                 :            :         bool full_compaction;
     735                 :            : 
     736                 :            :         /// Set to true when the database is opened to write.
     737                 :            :         bool writable;
     738                 :            : 
     739                 :            :         /// Flag for tracking when cursors need to rebuild.
     740                 :            :         mutable bool cursor_created_since_last_modification;
     741                 :            : 
     742                 :            :         /// Version count for tracking when cursors need to rebuild.
     743                 :            :         unsigned long cursor_version;
     744                 :            : 
     745                 :            :         /* B-tree navigation functions */
     746                 :    3886481 :         bool prev(Cursor *C_, int j) const {
     747         [ +  + ]:    3886481 :             if (sequential) return prev_for_sequential(C_, j);
     748                 :    3886481 :             return prev_default(C_, j);
     749                 :            :         }
     750                 :            : 
     751                 :    6388116 :         bool next(Cursor *C_, int j) const {
     752         [ +  + ]:    6388116 :             if (sequential) return next_for_sequential(C_, j);
     753                 :    6388116 :             return next_default(C_, j);
     754                 :            :         }
     755                 :            : 
     756                 :            :         /* Default implementations. */
     757                 :            :         bool prev_default(Cursor *C_, int j) const;
     758                 :            :         bool next_default(Cursor *C_, int j) const;
     759                 :            : 
     760                 :            :         /* Implementations for sequential mode. */
     761                 :            :         bool prev_for_sequential(Cursor *C_, int dummy) const;
     762                 :            :         bool next_for_sequential(Cursor *C_, int dummy) const;
     763                 :            : 
     764                 :            :         static int find_in_block(const byte * p, Key key, bool leaf, int c);
     765                 :            : 
     766                 :            :         /** block_given_by(p, c) finds the item at block address p, directory
     767                 :            :          *  offset c, and returns its tag value as an integer.
     768                 :            :          */
     769                 :            :         static uint4 block_given_by(const byte * p, int c);
     770                 :            : 
     771                 :            :         mutable Cursor C[BTREE_CURSOR_LEVELS];
     772                 :            : 
     773                 :            :         /** Buffer used when splitting a block.
     774                 :            :          *
     775                 :            :          *  This buffer holds the split off part of the block.  It's only used
     776                 :            :          *  when updating (in ChertTable::add_item().
     777                 :            :          */
     778                 :            :         byte * split_p;
     779                 :            : 
     780                 :            :         /** DONT_COMPRESS or Z_DEFAULT_STRATEGY, Z_FILTERED, Z_HUFFMAN_ONLY,
     781                 :            :          *  Z_RLE. */
     782                 :            :         int compress_strategy;
     783                 :            : 
     784                 :            :         /// Zlib state object for deflating
     785                 :            :         mutable z_stream *deflate_zstream;
     786                 :            : 
     787                 :            :         /// Zlib state object for inflating
     788                 :            :         mutable z_stream *inflate_zstream;
     789                 :            : 
     790                 :            :         /// If true, don't create the table until it's needed.
     791                 :            :         bool lazy;
     792                 :            : 
     793                 :            :         /* Debugging methods */
     794                 :            : //      void report_block_full(int m, int n, const byte * p);
     795                 :            : 
     796                 :            :         /// Throw an exception indicating that the database is closed.
     797                 :            :         XAPIAN_NORETURN(static void throw_database_closed());
     798                 :            : };
     799                 :            : 
     800                 :            : #endif /* OM_HGUARD_CHERT_TABLE_H */

Generated by: LCOV version 1.8