Branch data Line data Source code
1 : : /* database.h: database class declarations
2 : : *
3 : : * Copyright 1999,2000,2001 BrightStation PLC
4 : : * Copyright 2002 Ananova Ltd
5 : : * Copyright 2002,2003,2004,2005,2006,2007,2008,2009 Olly Betts
6 : : * Copyright 2006,2008 Lemur Consulting Ltd
7 : : *
8 : : * This program is free software; you can redistribute it and/or
9 : : * modify it under the terms of the GNU General Public License as
10 : : * published by the Free Software Foundation; either version 2 of the
11 : : * License, or (at your option) any later version.
12 : : *
13 : : * This program is distributed in the hope that it will be useful,
14 : : * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 : : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 : : * GNU General Public License for more details.
17 : : *
18 : : * You should have received a copy of the GNU General Public License
19 : : * along with this program; if not, write to the Free Software
20 : : * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21 : : * USA
22 : : */
23 : :
24 : : #ifndef OM_HGUARD_DATABASE_H
25 : : #define OM_HGUARD_DATABASE_H
26 : :
27 : : #include <string>
28 : :
29 : : #include "internaltypes.h"
30 : :
31 : : #include <xapian/base.h>
32 : : #include <xapian/types.h>
33 : : #include <xapian/database.h>
34 : : #include <xapian/document.h>
35 : : #include <xapian/positioniterator.h>
36 : : #include <xapian/termiterator.h>
37 : : #include <xapian/valueiterator.h>
38 : :
39 : : using namespace std;
40 : :
41 : : class LeafPostList;
42 : : class RemoteDatabase;
43 : :
44 : : typedef Xapian::TermIterator::Internal TermList;
45 : : typedef Xapian::PositionIterator::Internal PositionList;
46 : : typedef Xapian::ValueIterator::Internal ValueList;
47 : :
48 : : // Used by flint and chert.
49 : : const int XAPIAN_DB_READONLY = 0;
50 : :
51 : : namespace Xapian {
52 : :
53 : : struct ReplicationInfo;
54 : :
55 : : /** Base class for databases.
56 : : */
57 : : class Database::Internal : public Xapian::Internal::RefCntBase {
58 : : private:
59 : : /// Copies are not allowed.
60 : : Internal(const Internal &);
61 : :
62 : : /// Assignment is not allowed.
63 : : void operator=(const Internal &);
64 : :
65 : : protected:
66 : : /// Transaction state.
67 : : enum {
68 : : TRANSACTION_UNIMPLEMENTED = -1, // Used by InMemory.
69 : : TRANSACTION_NONE = 0,
70 : : TRANSACTION_UNFLUSHED = 1,
71 : : TRANSACTION_FLUSHED = 2
72 : : } transaction_state;
73 : :
74 : : bool transaction_active() const { return int(transaction_state) > 0; }
75 : :
76 : : /** Create a database - called only by derived classes. */
77 : 0 : Internal() : transaction_state(TRANSACTION_NONE) { }
78 : :
79 : : /** Internal method to perform cleanup when a writable database is
80 : : * destroyed with uncommitted changes.
81 : : *
82 : : * A derived class' destructor should call this method before
83 : : * destroying the database to ensure that no sessions or
84 : : * transactions are in progress at destruction time.
85 : : *
86 : : * Note that it is not safe to throw exceptions from destructors,
87 : : * so this method will catch and discard any exceptions.
88 : : */
89 : : void dtor_called();
90 : :
91 : : public:
92 : : /** Destroy the database.
93 : : *
94 : : * This method should not be called until all objects using the
95 : : * database have been cleaned up.
96 : : *
97 : : * If any transactions are in progress, they should
98 : : * be finished by cancel_transaction() or
99 : : * commit_transaction() - if this is not done, the destructor
100 : : * will attempt to clean things up by cancelling the transaction,
101 : : * but any errors produced by these operations will not be reported.
102 : : */
103 : : virtual ~Internal();
104 : :
105 : : /** Send a keep-alive signal to a remote database, to stop
106 : : * it from timing out.
107 : : */
108 : : virtual void keep_alive();
109 : :
110 : : //////////////////////////////////////////////////////////////////
111 : : // Database statistics:
112 : : // ====================
113 : :
114 : : /** Return the number of docs in this (sub) database.
115 : : */
116 : : virtual Xapian::doccount get_doccount() const = 0;
117 : :
118 : : /** Return the last used document id of this (sub) database.
119 : : */
120 : : virtual Xapian::docid get_lastdocid() const = 0;
121 : :
122 : : /** Return the total length of all documents in this database. */
123 : : virtual totlen_t get_total_length() const = 0;
124 : :
125 : : /** Return the average length of a document in this (sub) database.
126 : : *
127 : : * See Database::Internal::get_doclength() for the meaning of document
128 : : * length within Xapian.
129 : : */
130 : : virtual Xapian::doclength get_avlength() const = 0;
131 : :
132 : : /** Get the length of a given document.
133 : : *
134 : : * Document length, for the purposes of Xapian, is defined to be
135 : : * the number of instances of terms within a document. Expressed
136 : : * differently, the sum of the within document frequencies over
137 : : * all the terms in the document.
138 : : *
139 : : * @param did The document id of the document whose length is
140 : : * being requested.
141 : : */
142 : : virtual Xapian::termcount get_doclength(Xapian::docid did) const = 0;
143 : :
144 : : /** Return the number of documents indexed by a given term. This
145 : : * may be an approximation, but must be an upper bound (ie,
146 : : * greater or equal to the true value), and should be as accurate
147 : : * as possible.
148 : : *
149 : : * @param tname The term whose term frequency is being requested.
150 : : */
151 : : virtual Xapian::doccount get_termfreq(const string & tname) const = 0;
152 : :
153 : : /** Return the total number of occurrences of the given term. This
154 : : * is the sum of the number of occurrences of the term in each
155 : : * document: ie, the sum of the within document frequencies of the
156 : : * term.
157 : : *
158 : : * @param tname The term whose collection frequency is being
159 : : * requested.
160 : : */
161 : : virtual Xapian::termcount get_collection_freq(const string & tname) const = 0;
162 : :
163 : : /** Return the frequency of a given value slot.
164 : : *
165 : : * This is the number of documents which have a (non-empty) value
166 : : * stored in the slot.
167 : : *
168 : : * @param valno The value slot to examine.
169 : : *
170 : : * @exception UnimplementedError The frequency of the value isn't
171 : : * available for this database type.
172 : : */
173 : : virtual Xapian::doccount get_value_freq(Xapian::valueno valno) const;
174 : :
175 : : /** Get a lower bound on the values stored in the given value slot.
176 : : *
177 : : * If the lower bound isn't available for the given database type,
178 : : * this will return the lowest possible bound - the empty string.
179 : : *
180 : : * @param valno The value slot to examine.
181 : : */
182 : : virtual std::string get_value_lower_bound(Xapian::valueno valno) const;
183 : :
184 : : /** Get an upper bound on the values stored in the given value slot.
185 : : *
186 : : * @param valno The value slot to examine.
187 : : *
188 : : * @exception UnimplementedError The upper bound of the values isn't
189 : : * available for this database type.
190 : : */
191 : : virtual std::string get_value_upper_bound(Xapian::valueno valno) const;
192 : :
193 : : /// Get a lower bound on the length of a document in this DB.
194 : : virtual Xapian::termcount get_doclength_lower_bound() const;
195 : :
196 : : /// Get an upper bound on the length of a document in this DB.
197 : : virtual Xapian::termcount get_doclength_upper_bound() const;
198 : :
199 : : /// Get an upper bound on the wdf of term @a term.
200 : : virtual Xapian::termcount get_wdf_upper_bound(const std::string & term) const;
201 : :
202 : : /** Check whether a given term is in the database.
203 : : *
204 : : * @param tname The term whose presence is being checked.
205 : : */
206 : : virtual bool term_exists(const string & tname) const = 0;
207 : :
208 : : /** Check whether this database contains any positional information.
209 : : */
210 : : virtual bool has_positions() const = 0;
211 : :
212 : : //////////////////////////////////////////////////////////////////
213 : : // Data item access methods:
214 : : // =========================
215 : :
216 : : /** Open a posting list.
217 : : *
218 : : * Method defined by subclass to open a posting list.
219 : : * This is a list of all the documents which contain a given term.
220 : : *
221 : : * @param tname The term whose posting list is being requested.
222 : : *
223 : : * @return A pointer to the newly created posting list.
224 : : * If the term doesn't exist, a LeafPostList object
225 : : * returning no documents is returned, which makes it
226 : : * easier to implement a search over multiple databases.
227 : : * This object must be deleted by the caller after
228 : : * use.
229 : : */
230 : : virtual LeafPostList * open_post_list(const string & tname) const = 0;
231 : :
232 : : /** Open a value stream.
233 : : *
234 : : * This returns the value in a particular slot for each document.
235 : : *
236 : : * @param slot The value slot.
237 : : *
238 : : * @return Pointer to a new ValueList object which should be
239 : : * deleted by the caller once it is no longer needed.
240 : : */
241 : : virtual ValueList * open_value_list(Xapian::valueno slot) const;
242 : :
243 : : /** Open a term list.
244 : : *
245 : : * This is a list of all the terms contained by a given document.
246 : : *
247 : : * @param did The document id whose term list is being requested.
248 : : *
249 : : * @return A pointer to the newly created term list.
250 : : * This object must be deleted by the caller after
251 : : * use.
252 : : */
253 : : virtual TermList * open_term_list(Xapian::docid did) const = 0;
254 : :
255 : : /** Open an allterms list.
256 : : *
257 : : * This is a list of all the terms in the database
258 : : *
259 : : * @param prefix The prefix to restrict the terms to.
260 : : * @return A pointer to the newly created allterms list.
261 : : * This object must be deleted by the caller after
262 : : * use.
263 : : */
264 : : virtual TermList * open_allterms(const string & prefix) const = 0;
265 : :
266 : : /** Open a position list for the given term in the given document.
267 : : *
268 : : * @param did The document id for which a position list is being
269 : : * requested.
270 : : * @param tname The term for which a position list is being
271 : : * requested.
272 : : *
273 : : * @return A pointer to the newly created position list.
274 : : * This object must be deleted by the caller after
275 : : * use.
276 : : */
277 : : virtual PositionList * open_position_list(Xapian::docid did,
278 : : const string & tname) const = 0;
279 : :
280 : : /** Open a document.
281 : : *
282 : : * This is used to access the values and data associated with a
283 : : * document. See class Xapian::Document::Internal for further details.
284 : : *
285 : : * @param did The document id which is being requested.
286 : : *
287 : : * @param lazy No need to check that this document actually exists.
288 : : * Used when we already know that this document exists
289 : : * (only a hint - the backend may still check).
290 : : *
291 : : * @return A pointer to the newly created document object.
292 : : * This object must be deleted by the caller after
293 : : * use.
294 : : */
295 : : virtual Xapian::Document::Internal *
296 : : open_document(Xapian::docid did, bool lazy) const = 0;
297 : :
298 : : /** Create a termlist tree from trigrams of @a word.
299 : : *
300 : : * You can assume word.size() > 1.
301 : : *
302 : : * If there are no trigrams, returns NULL.
303 : : */
304 : : virtual TermList * open_spelling_termlist(const string & word) const;
305 : :
306 : : /** Return a termlist which returns the words which are spelling
307 : : * correction targets.
308 : : *
309 : : * If there are no spelling correction targets, returns NULL.
310 : : */
311 : : virtual TermList * open_spelling_wordlist() const;
312 : :
313 : : /** Return the number of times @a word was added as a spelling. */
314 : : virtual Xapian::doccount get_spelling_frequency(const string & word) const;
315 : :
316 : : /** Add a word to the spelling dictionary.
317 : : *
318 : : * If the word is already present, its frequency is increased.
319 : : *
320 : : * @param word The word to add.
321 : : * @param freqinc How much to increase its frequency by.
322 : : */
323 : : virtual void add_spelling(const string & word,
324 : : Xapian::termcount freqinc) const;
325 : :
326 : : /** Remove a word from the spelling dictionary.
327 : : *
328 : : * The word's frequency is decreased, and if would become zero or less
329 : : * then the word is removed completely.
330 : : *
331 : : * @param word The word to remove.
332 : : * @param freqdec How much to decrease its frequency by.
333 : : */
334 : : virtual void remove_spelling(const string & word,
335 : : Xapian::termcount freqdec) const;
336 : :
337 : : /** Open a termlist returning synonyms for a term.
338 : : *
339 : : * If @a term has no synonyms, returns NULL.
340 : : */
341 : : virtual TermList * open_synonym_termlist(const string & term) const;
342 : :
343 : : /** Open a termlist returning each term which has synonyms.
344 : : *
345 : : * @param prefix If non-empty, only terms with this prefix are
346 : : * returned.
347 : : */
348 : : virtual TermList * open_synonym_keylist(const string & prefix) const;
349 : :
350 : : /** Add a synonym for a term.
351 : : *
352 : : * If @a synonym is already a synonym for @a term, then no action is
353 : : * taken.
354 : : */
355 : : virtual void add_synonym(const string & term, const string & synonym) const;
356 : :
357 : : /** Remove a synonym for a term.
358 : : *
359 : : * If @a synonym isn't a synonym for @a term, then no action is taken.
360 : : */
361 : : virtual void remove_synonym(const string & term, const string & synonym) const;
362 : :
363 : : /** Clear all synonyms for a term.
364 : : *
365 : : * If @a term has no synonyms, no action is taken.
366 : : */
367 : : virtual void clear_synonyms(const string & term) const;
368 : :
369 : : /** Get the metadata associated with a given key.
370 : : *
371 : : * See Database::get_metadata() for more information.
372 : : */
373 : : virtual string get_metadata(const string & key) const;
374 : :
375 : : /** Open a termlist returning each metadata key.
376 : : *
377 : : * Only metadata keys which are associated with a non-empty value will
378 : : * be returned.
379 : : *
380 : : * @param prefix If non-empty, only keys with this prefix are
381 : : * returned.
382 : : */
383 : : virtual TermList * open_metadata_keylist(const std::string &prefix) const;
384 : :
385 : : /** Set the metadata associated with a given key.
386 : : *
387 : : * See WritableDatabase::set_metadata() for more information.
388 : : */
389 : : virtual void set_metadata(const string & key, const string & value);
390 : :
391 : : /** Reopen the database to the latest available revision.
392 : : *
393 : : * Database backends which don't support simultaneous update and
394 : : * reading probably don't need to do anything here.
395 : : */
396 : : virtual void reopen();
397 : :
398 : : /** Close the database
399 : : */
400 : : virtual void close() = 0;
401 : :
402 : : //////////////////////////////////////////////////////////////////
403 : : // Modifying the database:
404 : : // =======================
405 : :
406 : : /** Commit pending modifications to the database.
407 : : *
408 : : * See WritableDatabase::commit() for more information.
409 : : */
410 : : virtual void commit();
411 : :
412 : : /** Cancel pending modifications to the database. */
413 : : virtual void cancel();
414 : :
415 : : /** Begin a transaction.
416 : : *
417 : : * See WritableDatabase::begin_transaction() for more information.
418 : : */
419 : : void begin_transaction(bool flushed);
420 : :
421 : : /** Commit a transaction.
422 : : *
423 : : * See WritableDatabase::commit_transaction() for more information.
424 : : */
425 : : void commit_transaction();
426 : :
427 : : /** Cancel a transaction.
428 : : *
429 : : * See WritableDatabase::cancel_transaction() for more information.
430 : : */
431 : : void cancel_transaction();
432 : :
433 : : /** Add a new document to the database.
434 : : *
435 : : * See WritableDatabase::add_document() for more information.
436 : : */
437 : : virtual Xapian::docid add_document(const Xapian::Document & document);
438 : :
439 : : /** Delete a document in the database.
440 : : *
441 : : * See WritableDatabase::delete_document() for more information.
442 : : */
443 : : virtual void delete_document(Xapian::docid did);
444 : :
445 : : /** Delete any documents indexed by a term from the database.
446 : : *
447 : : * See WritableDatabase::delete_document() for more information.
448 : : */
449 : : virtual void delete_document(const string & unique_term);
450 : :
451 : : /** Replace a given document in the database.
452 : : *
453 : : * See WritableDatabase::replace_document() for more information.
454 : : */
455 : : virtual void replace_document(Xapian::docid did,
456 : : const Xapian::Document & document);
457 : :
458 : : /** Replace any documents matching a term.
459 : : *
460 : : * See WritableDatabase::replace_document() for more information.
461 : : */
462 : : virtual Xapian::docid replace_document(const string & unique_term,
463 : : const Xapian::Document & document);
464 : :
465 : : /** Request and later collect a document from the database.
466 : : * Multiple documents can be requested with request_document(),
467 : : * and then collected with collect_document(). Allows the backend
468 : : * to optimise (e.g. the remote backend can start requests for all
469 : : * the documents so they fetch in parallel).
470 : : *
471 : : * If a backend doesn't support this, request_document() can be a
472 : : * no-op and collect_document() the same as open_document().
473 : : */
474 : : //@{
475 : : virtual void request_document(Xapian::docid /*did*/) const;
476 : :
477 : : virtual Xapian::Document::Internal * collect_document(Xapian::docid did) const;
478 : : //@}
479 : :
480 : : /** Write a set of changesets to a file descriptor.
481 : : *
482 : : * This call may reopen the database, leaving it pointing to a more
483 : : * recent version of the database.
484 : : */
485 : : virtual void write_changesets_to_fd(int fd,
486 : : const std::string & start_revision,
487 : : bool need_whole_db,
488 : : Xapian::ReplicationInfo * info);
489 : :
490 : : /// Get a string describing the current revision of the database.
491 : : virtual string get_revision_info() const;
492 : :
493 : : /** Get a UUID for the database.
494 : : *
495 : : * The UUID will persist for the lifetime of the database.
496 : : *
497 : : * Replicas (eg, made with the replication protocol, or by copying all
498 : : * the database files) will have the same UUID. However, copies (made
499 : : * with copydatabase, or xapian-compact) will have different UUIDs.
500 : : *
501 : : * If the backend does not support UUIDs the empty string is returned.
502 : : */
503 : : virtual string get_uuid() const;
504 : :
505 : : /** Notify the database that document is no longer valid.
506 : : *
507 : : * This is used to invalidate references to a document kept by a
508 : : * database for doing lazy updates. If we moved to using a weak_ptr
509 : : * instead we wouldn't need a special method for this, but it would
510 : : * involve a fair bit of reorganising of other parts of the code.
511 : : */
512 : : virtual void invalidate_doc_object(Xapian::Document::Internal * obj) const;
513 : :
514 : : //////////////////////////////////////////////////////////////////
515 : : // Introspection methods:
516 : : // ======================
517 : :
518 : : /** Return a pointer to this object as a RemoteDatabase, or NULL.
519 : : *
520 : : * This method is used by MultiMatch to decide whether to use a
521 : : * LocalSubMatch or a RemoteSubMatch to perform a search over the
522 : : * database.
523 : : */
524 : : virtual RemoteDatabase * as_remotedatabase();
525 : : };
526 : :
527 : : }
528 : :
529 : : #endif /* OM_HGUARD_DATABASE_H */
|