Branch data Line data Source code
1 : : /* brass_database.h: C++ class definition for brass database
2 : : *
3 : : * Copyright 1999,2000,2001 BrightStation PLC
4 : : * Copyright 2002 Ananova Ltd
5 : : * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010 Olly Betts
6 : : * Copyright 2008 Lemur Consulting Ltd
7 : : *
8 : : * This program is free software; you can redistribute it and/or
9 : : * modify it under the terms of the GNU General Public License as
10 : : * published by the Free Software Foundation; either version 2 of the
11 : : * License, or (at your option) any later version.
12 : : *
13 : : * This program is distributed in the hope that it will be useful,
14 : : * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 : : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 : : * GNU General Public License for more details.
17 : : *
18 : : * You should have received a copy of the GNU General Public License
19 : : * along with this program; if not, write to the Free Software
20 : : * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21 : : * USA
22 : : */
23 : :
24 : : #ifndef OM_HGUARD_BRASS_DATABASE_H
25 : : #define OM_HGUARD_BRASS_DATABASE_H
26 : :
27 : : #include "database.h"
28 : : #include "brass_dbstats.h"
29 : : #include "brass_inverter.h"
30 : : #include "brass_positionlist.h"
31 : : #include "brass_postlist.h"
32 : : #include "brass_record.h"
33 : : #include "brass_spelling.h"
34 : : #include "brass_synonym.h"
35 : : #include "brass_termlisttable.h"
36 : : #include "brass_values.h"
37 : : #include "brass_version.h"
38 : : #include "../flint_lock.h"
39 : : #include "brass_types.h"
40 : : #include "valuestats.h"
41 : :
42 : : #include <map>
43 : :
44 : : class BrassTermList;
45 : : class BrassAllDocsPostList;
46 : : class RemoteConnection;
47 : :
48 : : /** A backend designed for efficient indexing and retrieval, using
49 : : * compressed posting lists and a btree storage scheme.
50 : : */
51 : : class BrassDatabase : public Xapian::Database::Internal {
52 : : friend class BrassWritableDatabase;
53 : : friend class BrassTermList;
54 : : friend class BrassPostList;
55 : : friend class BrassAllTermsList;
56 : : friend class BrassAllDocsPostList;
57 : : private:
58 : : /** Directory to store databases in.
59 : : */
60 : : std::string db_dir;
61 : :
62 : : /** Whether the database is readonly.
63 : : */
64 : : bool readonly;
65 : :
66 : : /** The file describing the Brass database.
67 : : * This file has information about the format of the database
68 : : * which can't easily be stored in any of the individual tables.
69 : : */
70 : : BrassVersion version_file;
71 : :
72 : : /** Table storing posting lists.
73 : : *
74 : : * Whenever an update is performed, this table is the first to be
75 : : * updated: therefore, its most recent revision number is the most
76 : : * recent anywhere in the database.
77 : : */
78 : : mutable BrassPostListTable postlist_table;
79 : :
80 : : /** Table storing position lists.
81 : : */
82 : : BrassPositionListTable position_table;
83 : :
84 : : /** Table storing term lists.
85 : : */
86 : : BrassTermListTable termlist_table;
87 : :
88 : : /** Value manager. */
89 : : mutable BrassValueManager value_manager;
90 : :
91 : : /** Table storing synonym data.
92 : : */
93 : : mutable BrassSynonymTable synonym_table;
94 : :
95 : : /** Table storing spelling correction data.
96 : : */
97 : : mutable BrassSpellingTable spelling_table;
98 : :
99 : : /** Table storing records.
100 : : *
101 : : * Whenever an update is performed, this table is the last to be
102 : : * updated: therefore, its most recent revision number is the most
103 : : * recent consistent revision available. If this table's most
104 : : * recent revision number is not available for all tables, there
105 : : * is no consistent revision available, and the database is corrupt.
106 : : */
107 : : BrassRecordTable record_table;
108 : :
109 : : /// Lock object.
110 : : FlintLock lock;
111 : :
112 : : /** The maximum number of changesets which should be kept in the
113 : : * database. */
114 : : unsigned int max_changesets;
115 : :
116 : : /// Database statistics.
117 : : BrassDatabaseStats stats;
118 : :
119 : : /** Return true if a database exists at the path specified for this
120 : : * database.
121 : : */
122 : : bool database_exists();
123 : :
124 : : /** Create new tables, and open them.
125 : : * Any existing tables will be removed first.
126 : : */
127 : : void create_and_open_tables(unsigned int blocksize);
128 : :
129 : : /** Open all tables at most recent consistent revision.
130 : : *
131 : : * @exception Xapian::DatabaseCorruptError is thrown if there is no
132 : : * consistent revision available.
133 : : */
134 : : void open_tables_consistent();
135 : :
136 : : /** Get a write lock on the database, or throw an
137 : : * Xapian::DatabaseLockError if failure.
138 : : *
139 : : * @param creating true if the database is in the process of being
140 : : * created - if false, will throw a DatabaseOpening error if the lock
141 : : * can't be acquired and the database doesn't exist.
142 : : */
143 : : void get_database_write_lock(bool creating);
144 : :
145 : : /** Open tables at specified revision number.
146 : : *
147 : : * @exception Xapian::InvalidArgumentError is thrown if the specified
148 : : * revision is not available.
149 : : */
150 : : void open_tables(brass_revision_number_t revision);
151 : :
152 : : /** Get an object holding the revision number which the tables are
153 : : * opened at.
154 : : *
155 : : * @return the current revision number.
156 : : */
157 : : brass_revision_number_t get_revision_number() const;
158 : :
159 : : /** Get an object holding the next revision number which should be
160 : : * used in the tables.
161 : : *
162 : : * @return the next revision number.
163 : : */
164 : : brass_revision_number_t get_next_revision_number() const;
165 : :
166 : : /** Set the revision number in the tables.
167 : : *
168 : : * This updates the disk tables so that the currently open revision
169 : : * becomes the specified revision number.
170 : : *
171 : : * @param new_revision The new revision number to store. This must
172 : : * be greater than the latest revision number (see
173 : : * get_latest_revision_number()), or undefined behaviour will
174 : : * result.
175 : : */
176 : : void set_revision_number(brass_revision_number_t new_revision);
177 : :
178 : : /** Re-open tables to recover from an overwritten condition,
179 : : * or just get most up-to-date version.
180 : : */
181 : : void reopen();
182 : :
183 : : /** Close all the tables permanently.
184 : : */
185 : : void close();
186 : :
187 : : /** Called if a modifications fail.
188 : : *
189 : : * @param msg is a string description of the exception that was
190 : : * raised when the modifications failed.
191 : : */
192 : : void modifications_failed(brass_revision_number_t old_revision,
193 : : brass_revision_number_t new_revision,
194 : : const std::string & msg);
195 : :
196 : : /** Apply any outstanding changes to the tables.
197 : : *
198 : : * If an error occurs during this operation, this will be signalled
199 : : * by an exception being thrown. In this case the contents of the
200 : : * tables on disk will be left in an unmodified state (though possibly
201 : : * with increased revision numbers), and the outstanding changes will
202 : : * be lost.
203 : : */
204 : : void apply();
205 : :
206 : : /** Cancel any outstanding changes to the tables.
207 : : */
208 : : void cancel();
209 : :
210 : : /** Send a set of messages which transfer the whole database.
211 : : */
212 : : void send_whole_database(RemoteConnection & conn, double end_time);
213 : :
214 : : /** Get the revision stored in a changeset.
215 : : */
216 : : void get_changeset_revisions(const string & path,
217 : : brass_revision_number_t * startrev,
218 : : brass_revision_number_t * endrev) const;
219 : :
220 : : public:
221 : : /** Create and open a brass database.
222 : : *
223 : : * @exception Xapian::DatabaseCorruptError is thrown if there is no
224 : : * consistent revision available.
225 : : *
226 : : * @exception Xapian::DatabaseOpeningError thrown if database can't
227 : : * be opened.
228 : : *
229 : : * @exception Xapian::DatabaseVersionError thrown if database is in an
230 : : * unsupported format. This implies that the database was
231 : : * created by an older or newer version of Xapian.
232 : : *
233 : : * @param dbdir directory holding brass tables
234 : : *
235 : : * @param block_size Block size, in bytes, to use when creating
236 : : * tables. This is only important, and has the
237 : : * correct value, when the database is being
238 : : * created.
239 : : */
240 : : BrassDatabase(const string &db_dir_, int action = XAPIAN_DB_READONLY,
241 : : unsigned int block_size = 0u);
242 : :
243 : : ~BrassDatabase();
244 : :
245 : : /// Get a postlist table cursor (used by BrassValueList).
246 : 2682 : BrassCursor * get_postlist_cursor() const {
247 : 2682 : return postlist_table.cursor_get();
248 : : }
249 : :
250 : : /** Virtual methods of Database::Internal. */
251 : : //@{
252 : : Xapian::doccount get_doccount() const;
253 : : Xapian::docid get_lastdocid() const;
254 : : totlen_t get_total_length() const;
255 : : Xapian::doclength get_avlength() const;
256 : : Xapian::termcount get_doclength(Xapian::docid did) const;
257 : : Xapian::doccount get_termfreq(const string & tname) const;
258 : : Xapian::termcount get_collection_freq(const string & tname) const;
259 : : Xapian::doccount get_value_freq(Xapian::valueno valno) const;
260 : : std::string get_value_lower_bound(Xapian::valueno valno) const;
261 : : std::string get_value_upper_bound(Xapian::valueno valno) const;
262 : : Xapian::termcount get_doclength_lower_bound() const;
263 : : Xapian::termcount get_doclength_upper_bound() const;
264 : : Xapian::termcount get_wdf_upper_bound(const string & term) const;
265 : : bool term_exists(const string & tname) const;
266 : : bool has_positions() const;
267 : :
268 : : LeafPostList * open_post_list(const string & tname) const;
269 : : ValueList * open_value_list(Xapian::valueno slot) const;
270 : : Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy) const;
271 : :
272 : : PositionList * open_position_list(Xapian::docid did, const string & term) const;
273 : : TermList * open_term_list(Xapian::docid did) const;
274 : : TermList * open_allterms(const string & prefix) const;
275 : :
276 : : TermList * open_spelling_termlist(const string & word) const;
277 : : TermList * open_spelling_wordlist() const;
278 : : Xapian::doccount get_spelling_frequency(const string & word) const;
279 : :
280 : : TermList * open_synonym_termlist(const string & term) const;
281 : : TermList * open_synonym_keylist(const string & prefix) const;
282 : :
283 : : string get_metadata(const string & key) const;
284 : : TermList * open_metadata_keylist(const std::string &prefix) const;
285 : : void write_changesets_to_fd(int fd,
286 : : const string & start_revision,
287 : : bool need_whole_db,
288 : : Xapian::ReplicationInfo * info);
289 : : string get_revision_info() const;
290 : : string get_uuid() const;
291 : : //@}
292 : :
293 : : };
294 : :
295 : : /** A writable brass database.
296 : : */
297 : : class BrassWritableDatabase : public BrassDatabase {
298 : : mutable Inverter inverter;
299 : :
300 : : mutable map<Xapian::valueno, ValueStats> value_stats;
301 : :
302 : : /** The number of documents added, deleted, or replaced since the last
303 : : * flush.
304 : : */
305 : : mutable Xapian::doccount change_count;
306 : :
307 : : /// If change_count reaches this threshold we automatically flush.
308 : : Xapian::doccount flush_threshold;
309 : :
310 : : /** A pointer to the last document which was returned by
311 : : * open_document(), or NULL if there is no such valid document. This
312 : : * is used purely for comparing with a supplied document to help with
313 : : * optimising replace_document. When the document internals are
314 : : * deleted, this pointer gets set to NULL.
315 : : */
316 : : mutable Xapian::Document::Internal * modify_shortcut_document;
317 : :
318 : : /** The document ID for the last document returned by open_document().
319 : : */
320 : : mutable Xapian::docid modify_shortcut_docid;
321 : :
322 : : /// Flush any unflushed postlist changes, but don't commit them.
323 : : void flush_postlist_changes() const;
324 : :
325 : : /// Close all the tables permanently.
326 : : void close();
327 : :
328 : : /// Apply changes.
329 : : void apply();
330 : :
331 : : //@{
332 : : /** Implementation of virtual methods: see Database::Internal for
333 : : * details.
334 : : */
335 : : void commit();
336 : :
337 : : /** Cancel pending modifications to the database. */
338 : : void cancel();
339 : :
340 : : Xapian::docid add_document(const Xapian::Document & document);
341 : : Xapian::docid add_document_(Xapian::docid did, const Xapian::Document & document);
342 : : // Stop the default implementation of delete_document(term) and
343 : : // replace_document(term) from being hidden. This isn't really
344 : : // a problem as we only try to call them through the base class
345 : : // (where they aren't hidden) but some compilers generate a warning
346 : : // about the hiding.
347 : : #ifndef _MSC_VER
348 : : using Xapian::Database::Internal::delete_document;
349 : : using Xapian::Database::Internal::replace_document;
350 : : #endif
351 : : void delete_document(Xapian::docid did);
352 : : void replace_document(Xapian::docid did, const Xapian::Document & document);
353 : :
354 : : Xapian::Document::Internal * open_document(Xapian::docid did,
355 : : bool lazy) const;
356 : :
357 : : //@}
358 : :
359 : : public:
360 : : /** Create and open a writable brass database.
361 : : *
362 : : * @exception Xapian::DatabaseOpeningError thrown if database can't
363 : : * be opened.
364 : : *
365 : : * @exception Xapian::DatabaseVersionError thrown if database is in an
366 : : * unsupported format. This implies that the database was
367 : : * created by an older or newer version of Xapian.
368 : : *
369 : : * @param dir directory holding brass tables
370 : : */
371 : : BrassWritableDatabase(const string &dir, int action, int block_size);
372 : :
373 : : ~BrassWritableDatabase();
374 : :
375 : : /** Virtual methods of Database::Internal. */
376 : : //@{
377 : : Xapian::termcount get_doclength(Xapian::docid did) const;
378 : : Xapian::doccount get_termfreq(const string & tname) const;
379 : : Xapian::termcount get_collection_freq(const string & tname) const;
380 : : Xapian::doccount get_value_freq(Xapian::valueno valno) const;
381 : : std::string get_value_lower_bound(Xapian::valueno valno) const;
382 : : std::string get_value_upper_bound(Xapian::valueno valno) const;
383 : : bool term_exists(const string & tname) const;
384 : :
385 : : LeafPostList * open_post_list(const string & tname) const;
386 : : ValueList * open_value_list(Xapian::valueno slot) const;
387 : : TermList * open_allterms(const string & prefix) const;
388 : :
389 : : void add_spelling(const string & word, Xapian::termcount freqinc) const;
390 : : void remove_spelling(const string & word, Xapian::termcount freqdec) const;
391 : : TermList * open_spelling_wordlist() const;
392 : :
393 : : TermList * open_synonym_keylist(const string & prefix) const;
394 : : void add_synonym(const string & word, const string & synonym) const;
395 : : void remove_synonym(const string & word, const string & synonym) const;
396 : : void clear_synonyms(const string & word) const;
397 : :
398 : : void set_metadata(const string & key, const string & value);
399 : : void invalidate_doc_object(Xapian::Document::Internal * obj) const;
400 : : //@}
401 : : };
402 : :
403 : : #endif /* OM_HGUARD_BRASS_DATABASE_H */
|