Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members  

/include/xapian/enquire.h

Go to the documentation of this file.
00001 
00004 /* ----START-LICENCE----
00005  * Copyright 1999,2000,2001 BrightStation PLC
00006  * Copyright 2001,2002 Ananova Ltd
00007  * Copyright 2002,2003,2004 Olly Betts
00008  *
00009  * This program is free software; you can redistribute it and/or
00010  * modify it under the terms of the GNU General Public License as
00011  * published by the Free Software Foundation; either version 2 of the
00012  * License, or (at your option) any later version.
00013  *
00014  * This program is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  * GNU General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU General Public License
00020  * along with this program; if not, write to the Free Software
00021  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00022  * USA
00023  * -----END-LICENCE-----
00024  */
00025 
00026 #ifndef XAPIAN_INCLUDED_ENQUIRE_H
00027 #define XAPIAN_INCLUDED_ENQUIRE_H
00028 
00029 #include <string>
00030 #include <time.h> // for time_t
00031 
00032 #include <xapian/base.h>
00033 #include <xapian/types.h>
00034 
00035 namespace Xapian {
00036 
00037 class Database;
00038 class Document;
00039 class ErrorHandler;
00040 class MSetIterator;
00041 class Query;
00042 class TermIterator;
00043 class Weight;
00044 
00048 class MSet {
00049     public:
00050         class Internal;
00052         Xapian::Internal::RefCntPtr<Internal> internal;
00053 
00054     public:
00055         // FIXME: public for now, private would be better
00057         explicit MSet(MSet::Internal * internal_);
00058 
00060         MSet();
00061 
00063         ~MSet();
00064 
00066         MSet(const MSet & other);
00067 
00069         void operator=(const MSet &other);
00070 
00086         void fetch(const MSetIterator &begin, const MSetIterator &end) const;
00087 
00090         void fetch(const MSetIterator &item) const;
00091 
00094         void fetch() const;
00095 
00100         Xapian::percent convert_to_percent(Xapian::weight wt) const;
00101 
00103         Xapian::percent convert_to_percent(const MSetIterator &it) const;
00104 
00112         Xapian::doccount get_termfreq(const std::string &tname) const;
00113 
00121         Xapian::weight get_termweight(const std::string &tname) const;
00122 
00130         Xapian::doccount get_firstitem() const;
00131 
00141         Xapian::doccount get_matches_lower_bound() const;
00142 
00155         Xapian::doccount get_matches_estimated() const;
00156 
00166         Xapian::doccount get_matches_upper_bound() const;
00167 
00173         Xapian::weight get_max_possible() const;
00174 
00188         Xapian::weight get_max_attained() const;
00189 
00191         Xapian::termcount size() const;
00192 
00193         Xapian::doccount max_size() const;
00194 
00196         bool empty() const;
00197 
00199         void swap(MSet & other);
00200 
00202         MSetIterator begin() const;
00203 
00205         MSetIterator end() const;
00206 
00208         MSetIterator back() const;
00209         
00219         MSetIterator operator[](Xapian::doccount i) const;
00220 
00222 
00223         typedef MSetIterator value_type; // FIXME: not assignable...
00224         typedef MSetIterator iterator;
00225         typedef MSetIterator const_iterator;
00226         typedef MSetIterator & reference; // Hmm
00227         typedef MSetIterator & const_reference;
00228         typedef MSetIterator * pointer; // Hmm
00229         typedef Xapian::doccount_diff difference_type;
00230         typedef Xapian::doccount size_type;
00232         
00236         std::string get_description() const;
00237 };
00238 
00242 class MSetIterator {
00243     private:
00244         friend class MSet;
00245         friend bool operator==(const MSetIterator &a, const MSetIterator &b);
00246         friend bool operator!=(const MSetIterator &a, const MSetIterator &b);
00247 
00248         MSetIterator(Xapian::doccount index_, const MSet & mset_)
00249             : index(index_), mset(mset_) { }
00250 
00251         Xapian::doccount index;
00252         MSet mset;
00253 
00254     public:
00258         MSetIterator() : index(0), mset() { }
00259 
00260         ~MSetIterator() { }
00261 
00263         MSetIterator(const MSetIterator &other) {
00264             index = other.index;
00265             mset = other.mset;
00266         }
00267 
00269         void operator=(const MSetIterator &other) {
00270             index = other.index;
00271             mset = other.mset;
00272         }
00273 
00275         MSetIterator & operator++() {
00276             ++index;
00277             return *this;
00278         }
00279 
00281         MSetIterator operator++(int) {
00282             MSetIterator tmp = *this;
00283             ++index;
00284             return tmp;
00285         }
00286 
00288         MSetIterator & operator--() {
00289             --index;
00290             return *this;
00291         }
00292 
00294         MSetIterator operator--(int) {
00295             MSetIterator tmp = *this;
00296             --index;
00297             return tmp;
00298         }
00299 
00301         Xapian::docid operator*() const;
00302 
00321         Xapian::Document get_document() const;
00322 
00329         Xapian::doccount get_rank() const {
00330             return mset.get_firstitem() + index;
00331         }
00332 
00334         Xapian::weight get_weight() const;
00335 
00352         Xapian::doccount get_collapse_count() const;
00353 
00359         Xapian::percent get_percent() const;
00360 
00364         std::string get_description() const;
00365 
00367 
00368         typedef std::bidirectional_iterator_tag iterator_category; // FIXME: could enhance to be a randomaccess_iterator
00369         typedef Xapian::docid value_type;
00370         typedef Xapian::doccount_diff difference_type;
00371         typedef Xapian::docid * pointer;
00372         typedef Xapian::docid & reference;
00374 };
00375 
00376 inline bool operator==(const MSetIterator &a, const MSetIterator &b)
00377 {
00378     return (a.index == b.index);
00379 }
00380 
00381 inline bool operator!=(const MSetIterator &a, const MSetIterator &b)
00382 {
00383     return (a.index != b.index);
00384 }
00385 
00386 class ESetIterator;
00387 
00392 class ESet {
00393     public:
00394         class Internal;
00396         Xapian::Internal::RefCntPtr<Internal> internal;
00397 
00399         ESet();
00400 
00402         ~ESet();
00403 
00405         ESet(const ESet & other);
00406 
00408         void operator=(const ESet &other);
00409 
00414         Xapian::termcount get_ebound() const;
00415 
00417         Xapian::termcount size() const;
00418 
00420         bool empty() const;
00421 
00423         void swap(ESet & other);
00424 
00426         ESetIterator begin() const;
00427 
00429         ESetIterator end() const;
00430 
00432         ESetIterator back() const;
00433 
00435         ESetIterator operator[](Xapian::doccount i) const;
00436 
00441         std::string get_description() const;
00442 };
00443 
00445 class ESetIterator {
00446     private:
00447         friend class ESet;
00448         friend bool operator==(const ESetIterator &a, const ESetIterator &b);
00449         friend bool operator!=(const ESetIterator &a, const ESetIterator &b);
00450 
00451         ESetIterator(Xapian::termcount index_, const ESet & eset_)
00452             : index(index_), eset(eset_) { }
00453 
00454         Xapian::termcount index;
00455         ESet eset;
00456 
00457     public:
00461         ESetIterator() : index(0), eset() { }
00462 
00463         ~ESetIterator() { }
00464 
00466         ESetIterator(const ESetIterator &other) {
00467             index = other.index;
00468             eset = other.eset;
00469         }
00470 
00472         void operator=(const ESetIterator &other) {
00473             index = other.index;
00474             eset = other.eset;
00475         }
00476 
00478         ESetIterator & operator++() {
00479             ++index;
00480             return *this;
00481         }
00482 
00484         ESetIterator operator++(int) {
00485             ESetIterator tmp = *this;
00486             ++index;
00487             return tmp;
00488         }
00489 
00491         ESetIterator & operator--() {
00492             --index;
00493             return *this;
00494         }
00495 
00497         ESetIterator operator--(int) {
00498             ESetIterator tmp = *this;
00499             --index;
00500             return tmp;
00501         }
00502 
00504         const std::string & operator *() const;
00505 
00507         Xapian::weight get_weight() const;
00508 
00512         std::string get_description() const;
00513 
00515 
00516         typedef std::bidirectional_iterator_tag iterator_category; // FIXME: go for randomaccess_iterator!
00517         typedef std::string value_type;
00518         typedef Xapian::termcount_diff difference_type;
00519         typedef std::string * pointer;
00520         typedef std::string & reference;
00522 };
00523 
00524 inline bool operator==(const ESetIterator &a, const ESetIterator &b)
00525 {
00526     return (a.index == b.index);
00527 }
00528 
00529 inline bool operator!=(const ESetIterator &a, const ESetIterator &b)
00530 {
00531     return (a.index != b.index);
00532 }
00533 
00538 class RSet {
00539     public:
00541         class Internal;
00542 
00544         Internal *internal;
00545 
00547         RSet(const RSet &rset);
00548 
00550         void operator=(const RSet &rset);
00551 
00553         RSet();
00554 
00556         ~RSet();
00557 
00559         Xapian::doccount size() const;
00560 
00562         bool empty() const;
00563 
00565         void add_document(Xapian::docid did);
00566         
00568         void add_document(const Xapian::MSetIterator & i) { add_document(*i); }
00569 
00571         void remove_document(Xapian::docid did);
00572 
00574         void remove_document(const Xapian::MSetIterator & i) { remove_document(*i); }
00575 
00577         bool contains(Xapian::docid did) const;
00578 
00580         bool contains(const Xapian::MSetIterator & i) { return contains(*i); }
00581 
00586         std::string get_description() const;
00587 };
00588 
00591 class MatchDecider {
00592     public:
00595         virtual int operator()(const Xapian::Document &doc) const = 0;
00596 
00598         virtual ~MatchDecider() {}
00599 };
00600 
00603 class ExpandDecider {
00604     public:
00607         virtual int operator()(const std::string & tname) const = 0;
00608 
00610         virtual ~ExpandDecider() {}
00611 };
00612 
00623 class Enquire {
00624     private:
00626         Enquire(const Enquire &);
00627 
00629         void operator=(const Enquire &);
00630 
00631     public:
00632         class Internal;
00634         Xapian::Internal::RefCntPtr<Internal> internal;
00635 
00651         Enquire(const Database &databases, ErrorHandler * errorhandler_ = 0);
00652 
00655         ~Enquire();
00656 
00663         void set_query(const Xapian::Query & query_);
00664 
00671         const Xapian::Query & get_query();
00672 
00679         void set_weighting_scheme(const Weight &weight_);
00680 
00707         void set_collapse_key(Xapian::valueno collapse_key);
00708 
00715         void set_sort_forward(bool sort_forward);
00716 
00734         void set_cutoff(Xapian::percent percent_cutoff, Xapian::weight weight_cutoff = 0);
00735 
00752         void set_sorting(Xapian::valueno sort_key, int sort_bands,
00753                          bool sort_by_relevance = false);
00754 
00766         void set_bias(Xapian::weight bias_weight, time_t bias_halflife);
00767 
00793         MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00794                       Xapian::doccount checkatleast = 0,
00795                       const RSet * omrset = 0,
00796                       const MatchDecider * mdecider = 0) const;
00797         MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00798                       const RSet * omrset,
00799                       const MatchDecider * mdecider = 0) const {
00800             return get_mset(first, maxitems, 0, omrset, mdecider);
00801         }
00802 
00803         static const int include_query_terms = 1;
00804         static const int use_exact_termfreq = 2;
00827         ESet get_eset(Xapian::termcount maxitems,
00828                         const RSet & omrset,
00829                         int flags = 0,
00830                         double k = 1.0,
00831                         const Xapian::ExpandDecider * edecider = 0) const;
00832 
00846         inline ESet get_eset(Xapian::termcount maxitems, const RSet & omrset,
00847                                const Xapian::ExpandDecider * edecider) const {
00848             return get_eset(maxitems, omrset, 0, 1.0, edecider);
00849         }
00850 
00879         TermIterator get_matching_terms_begin(Xapian::docid did) const;
00880 
00882         TermIterator get_matching_terms_end(Xapian::docid did) const;
00883 
00906         TermIterator get_matching_terms_begin(const MSetIterator &it) const;
00907 
00909         TermIterator get_matching_terms_end(const MSetIterator &it) const;
00910 
00913         void register_match_decider(const std::string &name,
00914                                     const MatchDecider *mdecider = NULL);
00915 
00919         std::string get_description() const;
00920 };
00921 
00922 }
00923 
00924 class SocketServer;
00925 
00926 namespace Xapian {
00927 
00929 class Weight {
00930     friend class Enquire; // So Enquire can clone us
00931     friend class ::SocketServer; // So SocketServer can clone us - FIXME
00932     public:
00933         class Internal;
00934     protected:
00935         Weight(const Weight &);
00936     private:
00937         void operator=(Weight &);
00938 
00946         virtual Weight * clone() const = 0;
00947 
00948     protected:
00949         const Internal * internal; // Weight::Internal == StatsSource
00950         Xapian::doclength querysize;
00951         Xapian::termcount wqf;
00952         std::string tname;
00953 
00954     public:
00955         Weight() { }
00956         virtual ~Weight() { }
00957 
00970         Weight * create(const Internal * internal_, Xapian::doclength querysize_,
00971                           Xapian::termcount wqf_, std::string tname_) const {
00972             Weight * wt = clone();
00973             wt->internal = internal_;
00974             wt->querysize = querysize_;
00975             wt->wqf = wqf_;
00976             wt->tname = tname_;
00977             return wt;
00978         }
00979 
00984         virtual std::string name() const = 0;
00985 
00987         virtual std::string serialise() const = 0;
00988 
00990         virtual Weight * unserialise(const std::string &s) const = 0;
00991 
00999         virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
01000                                       Xapian::doclength len) const = 0;
01001 
01007         virtual Xapian::weight get_maxpart() const = 0;
01008 
01017         virtual Xapian::weight get_sumextra(Xapian::doclength len) const = 0;
01018 
01022         virtual Xapian::weight get_maxextra() const = 0;
01023 
01025         virtual bool get_sumpart_needs_doclength() const { return true; }
01026 };
01027 
01029 class BoolWeight : public Weight {
01030     public:
01031         Weight * clone() const {
01032             return new BoolWeight;
01033         }
01034         BoolWeight() { }
01035         ~BoolWeight() { }
01036         std::string name() const { return "Bool"; }
01037         std::string serialise() const { return ""; }
01038         Weight * unserialise(const std::string & /*s*/) const {
01039             return new BoolWeight;
01040         }
01041         Xapian::weight get_sumpart(Xapian::termcount /*wdf*/, Xapian::doclength /*len*/) const { return 0; }
01042         Xapian::weight get_maxpart() const { return 0; }
01043 
01044         Xapian::weight get_sumextra(Xapian::doclength /*len*/) const { return 0; }
01045         Xapian::weight get_maxextra() const { return 0; }
01046 
01047         bool get_sumpart_needs_doclength() const { return false; }      
01048 };
01049 
01062 class BM25Weight : public Weight {
01063     private:
01064         mutable Xapian::weight termweight;
01065         mutable Xapian::doclength lenpart;
01066         mutable double BD;
01067 
01068         double A, B, C, D;
01069         Xapian::doclength min_normlen;
01070 
01071         mutable bool weight_calculated;
01072 
01073         void calc_termweight() const;
01074 
01075     public:
01094         BM25Weight(double A_, double B_, double C_, double D_,
01095                    double min_normlen_)
01096                 : A(A_), B(B_), C(C_), D(D_), min_normlen(min_normlen_),
01097                   weight_calculated(false)
01098         {
01099             if (A < 0) A = 0;
01100             if (B < 0) B = 0;
01101             if (C < 0) C = 0;
01102             if (D < 0) D = 0; else if (D > 1) D = 1;
01103         }
01104         BM25Weight() : A(1), B(1), C(0), D(0.5), min_normlen(0.5),
01105                        weight_calculated(false) { }
01106 
01107         Weight * clone() const {
01108             return new BM25Weight(A, B, C, D, min_normlen);
01109         }
01110         ~BM25Weight() { }
01111         std::string name() const { return "BM25"; }
01112         std::string serialise() const;
01113         Weight * unserialise(const std::string & s) const;
01114         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01115         Xapian::weight get_maxpart() const;
01116 
01117         Xapian::weight get_sumextra(Xapian::doclength len) const;
01118         Xapian::weight get_maxextra() const;
01119 
01120         bool get_sumpart_needs_doclength() const { return (lenpart != 0); }
01121 };
01122 
01135 class TradWeight : public Weight {
01136     private:
01137         mutable Xapian::weight termweight;
01138         mutable Xapian::doclength lenpart;
01139 
01140         double param_k;
01141 
01142         mutable bool weight_calculated;
01143 
01144         void calc_termweight() const;
01145 
01146     public:
01154         explicit TradWeight(double k) : param_k(k), weight_calculated(false) {
01155             if (param_k < 0) param_k = 0;
01156         }
01157 
01158         TradWeight() : param_k(1.0), weight_calculated(false) { }
01159         
01160         Weight * clone() const {
01161             return new TradWeight(param_k);
01162         }
01163         ~TradWeight() { }
01164         std::string name() const { return "Trad"; }
01165         std::string serialise() const;
01166         Weight * unserialise(const std::string & s) const;
01167         
01168         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01169         Xapian::weight get_maxpart() const;
01170 
01171         Xapian::weight get_sumextra(Xapian::doclength len) const;
01172         Xapian::weight get_maxextra() const;
01173 
01174         bool get_sumpart_needs_doclength() const { return (lenpart != 0); }
01175 };
01176 
01177 }
01178 
01179 #endif /* XAPIAN_INCLUDED_ENQUIRE_H */

Documentation for Xapian (version 0.8.3).
Generated on 20 Sep 2004 by Doxygen 1.2.15.