LCOV - code coverage report
Current view: top level - backends/chert - chert_database.h (source / functions) Hit Total Coverage
Test: Test Coverage for xapian-core r Lines: 2 2 100.0 %
Date: 2011-08-21 Functions: 1 1 100.0 %
Branches: 0 0 -

           Branch data     Line data    Source code
       1                 :            : /* chert_database.h: C++ class definition for chert database
       2                 :            :  *
       3                 :            :  * Copyright 1999,2000,2001 BrightStation PLC
       4                 :            :  * Copyright 2002 Ananova Ltd
       5                 :            :  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010 Olly Betts
       6                 :            :  * Copyright 2008 Lemur Consulting Ltd
       7                 :            :  *
       8                 :            :  * This program is free software; you can redistribute it and/or
       9                 :            :  * modify it under the terms of the GNU General Public License as
      10                 :            :  * published by the Free Software Foundation; either version 2 of the
      11                 :            :  * License, or (at your option) any later version.
      12                 :            :  *
      13                 :            :  * This program is distributed in the hope that it will be useful,
      14                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      15                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      16                 :            :  * GNU General Public License for more details.
      17                 :            :  *
      18                 :            :  * You should have received a copy of the GNU General Public License
      19                 :            :  * along with this program; if not, write to the Free Software
      20                 :            :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
      21                 :            :  * USA
      22                 :            :  */
      23                 :            : 
      24                 :            : #ifndef OM_HGUARD_CHERT_DATABASE_H
      25                 :            : #define OM_HGUARD_CHERT_DATABASE_H
      26                 :            : 
      27                 :            : #include "database.h"
      28                 :            : #include "chert_dbstats.h"
      29                 :            : #include "chert_positionlist.h"
      30                 :            : #include "chert_postlist.h"
      31                 :            : #include "chert_record.h"
      32                 :            : #include "chert_spelling.h"
      33                 :            : #include "chert_synonym.h"
      34                 :            : #include "chert_termlisttable.h"
      35                 :            : #include "chert_values.h"
      36                 :            : #include "chert_version.h"
      37                 :            : #include "../flint_lock.h"
      38                 :            : #include "chert_types.h"
      39                 :            : #include "valuestats.h"
      40                 :            : 
      41                 :            : #include <map>
      42                 :            : 
      43                 :            : class ChertTermList;
      44                 :            : class ChertAllDocsPostList;
      45                 :            : class RemoteConnection;
      46                 :            : 
      47                 :            : /** A backend designed for efficient indexing and retrieval, using
      48                 :            :  *  compressed posting lists and a btree storage scheme.
      49                 :            :  */
      50                 :            : class ChertDatabase : public Xapian::Database::Internal {
      51                 :            :     friend class ChertWritableDatabase;
      52                 :            :     friend class ChertTermList;
      53                 :            :     friend class ChertPostList;
      54                 :            :     friend class ChertAllTermsList;
      55                 :            :     friend class ChertAllDocsPostList;
      56                 :            :     private:
      57                 :            :         /** Directory to store databases in.
      58                 :            :          */
      59                 :            :         std::string db_dir;
      60                 :            : 
      61                 :            :         /** Whether the database is readonly.
      62                 :            :          */
      63                 :            :         bool readonly;
      64                 :            : 
      65                 :            :         /** The file describing the Chert database.
      66                 :            :          *  This file has information about the format of the database
      67                 :            :          *  which can't easily be stored in any of the individual tables.
      68                 :            :          */
      69                 :            :         ChertVersion version_file;
      70                 :            : 
      71                 :            :         /** Table storing posting lists.
      72                 :            :          *
      73                 :            :          *  Whenever an update is performed, this table is the first to be
      74                 :            :          *  updated: therefore, its most recent revision number is the most
      75                 :            :          *  recent anywhere in the database.
      76                 :            :          */
      77                 :            :         mutable ChertPostListTable postlist_table;
      78                 :            : 
      79                 :            :         /** Table storing position lists.
      80                 :            :          */
      81                 :            :         ChertPositionListTable position_table;
      82                 :            : 
      83                 :            :         /** Table storing term lists.
      84                 :            :          */
      85                 :            :         ChertTermListTable termlist_table;
      86                 :            : 
      87                 :            :         /** Value manager. */
      88                 :            :         mutable ChertValueManager value_manager;
      89                 :            : 
      90                 :            :         /** Table storing synonym data.
      91                 :            :          */
      92                 :            :         mutable ChertSynonymTable synonym_table;
      93                 :            : 
      94                 :            :         /** Table storing spelling correction data.
      95                 :            :          */
      96                 :            :         mutable ChertSpellingTable spelling_table;
      97                 :            : 
      98                 :            :         /** Table storing records.
      99                 :            :          *
     100                 :            :          *  Whenever an update is performed, this table is the last to be
     101                 :            :          *  updated: therefore, its most recent revision number is the most
     102                 :            :          *  recent consistent revision available.  If this table's most
     103                 :            :          *  recent revision number is not available for all tables, there
     104                 :            :          *  is no consistent revision available, and the database is corrupt.
     105                 :            :          */
     106                 :            :         ChertRecordTable record_table;
     107                 :            : 
     108                 :            :         /// Lock object.
     109                 :            :         FlintLock lock;
     110                 :            : 
     111                 :            :         /** The maximum number of changesets which should be kept in the
     112                 :            :          *  database. */
     113                 :            :         unsigned int max_changesets;
     114                 :            : 
     115                 :            :         /// Database statistics.
     116                 :            :         ChertDatabaseStats stats;
     117                 :            : 
     118                 :            :         /** Return true if a database exists at the path specified for this
     119                 :            :          *  database.
     120                 :            :          */
     121                 :            :         bool database_exists();
     122                 :            : 
     123                 :            :         /** Create new tables, and open them.
     124                 :            :          *  Any existing tables will be removed first.
     125                 :            :          */
     126                 :            :         void create_and_open_tables(unsigned int blocksize);
     127                 :            : 
     128                 :            :         /** Open all tables at most recent consistent revision.
     129                 :            :          *
     130                 :            :          *  @exception Xapian::DatabaseCorruptError is thrown if there is no
     131                 :            :          *  consistent revision available.
     132                 :            :          */
     133                 :            :         void open_tables_consistent();
     134                 :            : 
     135                 :            :         /** Get a write lock on the database, or throw an
     136                 :            :          *  Xapian::DatabaseLockError if failure.
     137                 :            :          *
     138                 :            :          *  @param creating true if the database is in the process of being
     139                 :            :          *  created - if false, will throw a DatabaseOpening error if the lock
     140                 :            :          *  can't be acquired and the database doesn't exist.
     141                 :            :          */
     142                 :            :         void get_database_write_lock(bool creating);
     143                 :            : 
     144                 :            :         /** Open tables at specified revision number.
     145                 :            :          *
     146                 :            :          *  @exception Xapian::InvalidArgumentError is thrown if the specified
     147                 :            :          *  revision is not available.
     148                 :            :          */
     149                 :            :         void open_tables(chert_revision_number_t revision);
     150                 :            : 
     151                 :            :         /** Get an object holding the revision number which the tables are
     152                 :            :          *  opened at.
     153                 :            :          *
     154                 :            :          *  @return the current revision number.
     155                 :            :          */
     156                 :            :         chert_revision_number_t get_revision_number() const;
     157                 :            : 
     158                 :            :         /** Get an object holding the next revision number which should be
     159                 :            :          *  used in the tables.
     160                 :            :          *
     161                 :            :          *  @return the next revision number.
     162                 :            :          */
     163                 :            :         chert_revision_number_t get_next_revision_number() const;
     164                 :            : 
     165                 :            :         /** Set the revision number in the tables.
     166                 :            :          *
     167                 :            :          *  This updates the disk tables so that the currently open revision
     168                 :            :          *  becomes the specified revision number.
     169                 :            :          *
     170                 :            :          *  @param new_revision The new revision number to store.  This must
     171                 :            :          *          be greater than the latest revision number (see
     172                 :            :          *          get_latest_revision_number()), or undefined behaviour will
     173                 :            :          *          result.
     174                 :            :          */
     175                 :            :         void set_revision_number(chert_revision_number_t new_revision);
     176                 :            : 
     177                 :            :         /** Re-open tables to recover from an overwritten condition,
     178                 :            :          *  or just get most up-to-date version.
     179                 :            :          */
     180                 :            :         void reopen();
     181                 :            : 
     182                 :            :         /** Close all the tables permanently.
     183                 :            :          */
     184                 :            :         void close();
     185                 :            : 
     186                 :            :         /** Called if a modifications fail.
     187                 :            :          *
     188                 :            :          *  @param msg is a string description of the exception that was
     189                 :            :          *  raised when the modifications failed.
     190                 :            :          */
     191                 :            :         void modifications_failed(chert_revision_number_t old_revision,
     192                 :            :                                   chert_revision_number_t new_revision,
     193                 :            :                                   const std::string & msg);
     194                 :            : 
     195                 :            :         /** Apply any outstanding changes to the tables.
     196                 :            :          *
     197                 :            :          *  If an error occurs during this operation, this will be signalled
     198                 :            :          *  by an exception being thrown.  In this case the contents of the
     199                 :            :          *  tables on disk will be left in an unmodified state (though possibly
     200                 :            :          *  with increased revision numbers), and the outstanding changes will
     201                 :            :          *  be lost.
     202                 :            :          */
     203                 :            :         void apply();
     204                 :            : 
     205                 :            :         /** Cancel any outstanding changes to the tables.
     206                 :            :          */
     207                 :            :         void cancel();
     208                 :            : 
     209                 :            :         /** Send a set of messages which transfer the whole database.
     210                 :            :          */
     211                 :            :         void send_whole_database(RemoteConnection & conn, double end_time);
     212                 :            : 
     213                 :            :         /** Get the revision stored in a changeset.
     214                 :            :          */
     215                 :            :         void get_changeset_revisions(const string & path,
     216                 :            :                                      chert_revision_number_t * startrev,
     217                 :            :                                      chert_revision_number_t * endrev) const;
     218                 :            :     public:
     219                 :            :         /** Create and open a chert database.
     220                 :            :          *
     221                 :            :          *  @exception Xapian::DatabaseCorruptError is thrown if there is no
     222                 :            :          *             consistent revision available.
     223                 :            :          *
     224                 :            :          *  @exception Xapian::DatabaseOpeningError thrown if database can't
     225                 :            :          *             be opened.
     226                 :            :          *
     227                 :            :          *  @exception Xapian::DatabaseVersionError thrown if database is in an
     228                 :            :          *             unsupported format.  This implies that the database was
     229                 :            :          *             created by an older or newer version of Xapian.
     230                 :            :          *
     231                 :            :          *  @param dbdir directory holding chert tables
     232                 :            :          *
     233                 :            :          *  @param block_size Block size, in bytes, to use when creating
     234                 :            :          *                    tables.  This is only important, and has the
     235                 :            :          *                    correct value, when the database is being
     236                 :            :          *                    created.
     237                 :            :          */
     238                 :            :         ChertDatabase(const string &db_dir_, int action = XAPIAN_DB_READONLY,
     239                 :            :                        unsigned int block_size = 0u);
     240                 :            : 
     241                 :            :         ~ChertDatabase();
     242                 :            : 
     243                 :            :         /// Get a postlist table cursor (used by ChertValueList).
     244                 :       2688 :         ChertCursor * get_postlist_cursor() const {
     245                 :       2688 :             return postlist_table.cursor_get();
     246                 :            :         }
     247                 :            : 
     248                 :            :         /** Virtual methods of Database::Internal. */
     249                 :            :         //@{
     250                 :            :         Xapian::doccount  get_doccount() const;
     251                 :            :         Xapian::docid get_lastdocid() const;
     252                 :            :         totlen_t get_total_length() const;
     253                 :            :         Xapian::doclength get_avlength() const;
     254                 :            :         Xapian::termcount get_doclength(Xapian::docid did) const;
     255                 :            :         Xapian::doccount get_termfreq(const string & tname) const;
     256                 :            :         Xapian::termcount get_collection_freq(const string & tname) const;
     257                 :            :         Xapian::doccount get_value_freq(Xapian::valueno valno) const;
     258                 :            :         std::string get_value_lower_bound(Xapian::valueno valno) const;
     259                 :            :         std::string get_value_upper_bound(Xapian::valueno valno) const;
     260                 :            :         Xapian::termcount get_doclength_lower_bound() const;
     261                 :            :         Xapian::termcount get_doclength_upper_bound() const;
     262                 :            :         Xapian::termcount get_wdf_upper_bound(const string & term) const;
     263                 :            :         bool term_exists(const string & tname) const;
     264                 :            :         bool has_positions() const;
     265                 :            : 
     266                 :            :         LeafPostList * open_post_list(const string & tname) const;
     267                 :            :         ValueList * open_value_list(Xapian::valueno slot) const;
     268                 :            :         Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy) const;
     269                 :            : 
     270                 :            :         PositionList * open_position_list(Xapian::docid did, const string & term) const;
     271                 :            :         TermList * open_term_list(Xapian::docid did) const;
     272                 :            :         TermList * open_allterms(const string & prefix) const;
     273                 :            : 
     274                 :            :         TermList * open_spelling_termlist(const string & word) const;
     275                 :            :         TermList * open_spelling_wordlist() const;
     276                 :            :         Xapian::doccount get_spelling_frequency(const string & word) const;
     277                 :            : 
     278                 :            :         TermList * open_synonym_termlist(const string & term) const;
     279                 :            :         TermList * open_synonym_keylist(const string & prefix) const;
     280                 :            : 
     281                 :            :         string get_metadata(const string & key) const;
     282                 :            :         TermList * open_metadata_keylist(const std::string &prefix) const;
     283                 :            :         void write_changesets_to_fd(int fd,
     284                 :            :                                     const string & start_revision,
     285                 :            :                                     bool need_whole_db,
     286                 :            :                                     Xapian::ReplicationInfo * info);
     287                 :            :         string get_revision_info() const;
     288                 :            :         string get_uuid() const;
     289                 :            :         //@}
     290                 :            : 
     291                 :            : };
     292                 :            : 
     293                 :            : /** A writable chert database.
     294                 :            :  */
     295                 :            : class ChertWritableDatabase : public ChertDatabase {
     296                 :            :         /** Unflushed changes to term frequencies and collection frequencies. */
     297                 :            :         mutable map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> >
     298                 :            :                 freq_deltas;
     299                 :            : 
     300                 :            :         /** Document lengths of new and modified documents which haven't been flushed yet. */
     301                 :            :         mutable map<Xapian::docid, Xapian::termcount> doclens;
     302                 :            : 
     303                 :            :         /// Modifications to posting lists.
     304                 :            :         mutable map<string, map<Xapian::docid,
     305                 :            :                                 pair<char, Xapian::termcount> > > mod_plists;
     306                 :            : 
     307                 :            :         mutable map<Xapian::valueno, ValueStats> value_stats;
     308                 :            : 
     309                 :            :         /** The number of documents added, deleted, or replaced since the last
     310                 :            :          *  flush.
     311                 :            :          */
     312                 :            :         mutable Xapian::doccount change_count;
     313                 :            : 
     314                 :            :         /// If change_count reaches this threshold we automatically flush.
     315                 :            :         Xapian::doccount flush_threshold;
     316                 :            : 
     317                 :            :         /** A pointer to the last document which was returned by
     318                 :            :          *  open_document(), or NULL if there is no such valid document.  This
     319                 :            :          *  is used purely for comparing with a supplied document to help with
     320                 :            :          *  optimising replace_document.  When the document internals are
     321                 :            :          *  deleted, this pointer gets set to NULL.
     322                 :            :          */
     323                 :            :         mutable Xapian::Document::Internal * modify_shortcut_document;
     324                 :            : 
     325                 :            :         /** The document ID for the last document returned by open_document().
     326                 :            :          */
     327                 :            :         mutable Xapian::docid modify_shortcut_docid;
     328                 :            : 
     329                 :            :         /// Flush any unflushed postlist changes, but don't commit them.
     330                 :            :         void flush_postlist_changes() const;
     331                 :            : 
     332                 :            :         /// Close all the tables permanently.
     333                 :            :         void close();
     334                 :            : 
     335                 :            :         /// Apply changes.
     336                 :            :         void apply();
     337                 :            : 
     338                 :            :         /** Add or modify an entry in freq_deltas.
     339                 :            :          *
     340                 :            :          *  @param tname The term to modify the entry for.
     341                 :            :          *  @param tf_delta The change in the term frequency delta.
     342                 :            :          *  @param cf_delta The change in the collection frequency delta.
     343                 :            :          */
     344                 :            :         void add_freq_delta(const string & tname,
     345                 :            :                             Xapian::termcount_diff tf_delta,
     346                 :            :                             Xapian::termcount_diff cf_delta);
     347                 :            : 
     348                 :            :         /** Insert modifications for a new document to the postlists.
     349                 :            :          *
     350                 :            :          *  @param did The document ID to insert the entry for.
     351                 :            :          *  @param tname The term to insert the entry for.
     352                 :            :          *  @param wdf The new wdf value to store.
     353                 :            :          */
     354                 :            :         void insert_mod_plist(Xapian::docid did,
     355                 :            :                               const string & tname,
     356                 :            :                               Xapian::termcount wdf);
     357                 :            : 
     358                 :            :         /** Update the stored modifications to the postlists.
     359                 :            :          *
     360                 :            :          *  @param did The document ID to modify the entry for.
     361                 :            :          *  @param tname The term to modify the entry for.
     362                 :            :          *  @param type The type of change to the postlist.
     363                 :            :          *  @param wdf The new wdf value to store.
     364                 :            :          *
     365                 :            :          *  If type is 'A', and an existing entry is in the stored
     366                 :            :          *  modifications, the stored type will be set to 'M'.  In all other
     367                 :            :          *  cases, the stored type is simply the value supplied.
     368                 :            :          */
     369                 :            :         void update_mod_plist(Xapian::docid did,
     370                 :            :                               const string & tname,
     371                 :            :                               char type,
     372                 :            :                               Xapian::termcount wdf);
     373                 :            : 
     374                 :            :         //@{
     375                 :            :         /** Implementation of virtual methods: see Database::Internal for
     376                 :            :          *  details.
     377                 :            :          */
     378                 :            :         void commit();
     379                 :            : 
     380                 :            :         /** Cancel pending modifications to the database. */
     381                 :            :         void cancel();
     382                 :            : 
     383                 :            :         Xapian::docid add_document(const Xapian::Document & document);
     384                 :            :         Xapian::docid add_document_(Xapian::docid did, const Xapian::Document & document);
     385                 :            :         // Stop the default implementation of delete_document(term) and
     386                 :            :         // replace_document(term) from being hidden.  This isn't really
     387                 :            :         // a problem as we only try to call them through the base class
     388                 :            :         // (where they aren't hidden) but some compilers generate a warning
     389                 :            :         // about the hiding.
     390                 :            : #ifndef _MSC_VER
     391                 :            :         using Xapian::Database::Internal::delete_document;
     392                 :            :         using Xapian::Database::Internal::replace_document;
     393                 :            : #endif
     394                 :            :         void delete_document(Xapian::docid did);
     395                 :            :         void replace_document(Xapian::docid did, const Xapian::Document & document);
     396                 :            : 
     397                 :            :         Xapian::Document::Internal * open_document(Xapian::docid did,
     398                 :            :                                                    bool lazy) const;
     399                 :            : 
     400                 :            :         //@}
     401                 :            : 
     402                 :            :     public:
     403                 :            :         /** Create and open a writable chert database.
     404                 :            :          *
     405                 :            :          *  @exception Xapian::DatabaseOpeningError thrown if database can't
     406                 :            :          *             be opened.
     407                 :            :          *
     408                 :            :          *  @exception Xapian::DatabaseVersionError thrown if database is in an
     409                 :            :          *             unsupported format.  This implies that the database was
     410                 :            :          *             created by an older or newer version of Xapian.
     411                 :            :          *
     412                 :            :          *  @param dir directory holding chert tables
     413                 :            :          */
     414                 :            :         ChertWritableDatabase(const string &dir, int action, int block_size);
     415                 :            : 
     416                 :            :         ~ChertWritableDatabase();
     417                 :            : 
     418                 :            :         /** Virtual methods of Database::Internal. */
     419                 :            :         //@{
     420                 :            :         Xapian::termcount get_doclength(Xapian::docid did) const;
     421                 :            :         Xapian::doccount get_termfreq(const string & tname) const;
     422                 :            :         Xapian::termcount get_collection_freq(const string & tname) const;
     423                 :            :         Xapian::doccount get_value_freq(Xapian::valueno valno) const;
     424                 :            :         std::string get_value_lower_bound(Xapian::valueno valno) const;
     425                 :            :         std::string get_value_upper_bound(Xapian::valueno valno) const;
     426                 :            :         bool term_exists(const string & tname) const;
     427                 :            : 
     428                 :            :         LeafPostList * open_post_list(const string & tname) const;
     429                 :            :         ValueList * open_value_list(Xapian::valueno slot) const;
     430                 :            :         TermList * open_allterms(const string & prefix) const;
     431                 :            : 
     432                 :            :         void add_spelling(const string & word, Xapian::termcount freqinc) const;
     433                 :            :         void remove_spelling(const string & word, Xapian::termcount freqdec) const;
     434                 :            :         TermList * open_spelling_wordlist() const;
     435                 :            : 
     436                 :            :         TermList * open_synonym_keylist(const string & prefix) const;
     437                 :            :         void add_synonym(const string & word, const string & synonym) const;
     438                 :            :         void remove_synonym(const string & word, const string & synonym) const;
     439                 :            :         void clear_synonyms(const string & word) const;
     440                 :            : 
     441                 :            :         void set_metadata(const string & key, const string & value);
     442                 :            :         void invalidate_doc_object(Xapian::Document::Internal * obj) const;
     443                 :            :         //@}
     444                 :            : };
     445                 :            : 
     446                 :            : #endif /* OM_HGUARD_CHERT_DATABASE_H */

Generated by: LCOV version 1.8