Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members  

/include/xapian/enquire.h

Go to the documentation of this file.
00001 
00004 /* ----START-LICENCE----
00005  * Copyright 1999,2000,2001 BrightStation PLC
00006  * Copyright 2001,2002 Ananova Ltd
00007  * Copyright 2002,2003,2004 Olly Betts
00008  *
00009  * This program is free software; you can redistribute it and/or
00010  * modify it under the terms of the GNU General Public License as
00011  * published by the Free Software Foundation; either version 2 of the
00012  * License, or (at your option) any later version.
00013  *
00014  * This program is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  * GNU General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU General Public License
00020  * along with this program; if not, write to the Free Software
00021  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00022  * USA
00023  * -----END-LICENCE-----
00024  */
00025 
00026 #ifndef XAPIAN_INCLUDED_ENQUIRE_H
00027 #define XAPIAN_INCLUDED_ENQUIRE_H
00028 
00029 #include <string>
00030 #include <time.h> // for time_t
00031 
00032 #include <xapian/base.h>
00033 #include <xapian/types.h>
00034 
00035 namespace Xapian {
00036 
00037 class Database;
00038 class Document;
00039 class ErrorHandler;
00040 class MSetIterator;
00041 class Query;
00042 class TermIterator;
00043 class Weight;
00044 
00048 class MSet {
00049     public:
00050         class Internal;
00052         Xapian::Internal::RefCntPtr<Internal> internal;
00053 
00054     public:
00055         // FIXME: public for now, private would be better
00057         explicit MSet(MSet::Internal * internal_);
00058 
00060         MSet();
00061 
00063         ~MSet();
00064 
00066         MSet(const MSet & other);
00067 
00069         void operator=(const MSet &other);
00070 
00086         void fetch(const MSetIterator &begin, const MSetIterator &end) const;
00087 
00090         void fetch(const MSetIterator &item) const;
00091 
00094         void fetch() const;
00095 
00100         Xapian::percent convert_to_percent(Xapian::weight wt) const;
00101 
00103         Xapian::percent convert_to_percent(const MSetIterator &it) const;
00104 
00112         Xapian::doccount get_termfreq(const std::string &tname) const;
00113 
00121         Xapian::weight get_termweight(const std::string &tname) const;
00122 
00130         Xapian::doccount get_firstitem() const;
00131 
00138         Xapian::doccount get_matches_lower_bound() const;
00139 
00149         Xapian::doccount get_matches_estimated() const;
00150 
00157         Xapian::doccount get_matches_upper_bound() const;
00158 
00164         Xapian::weight get_max_possible() const;
00165 
00179         Xapian::weight get_max_attained() const;
00180 
00181         Xapian::doccount size() const;
00182 
00183         Xapian::doccount max_size() const;
00184 
00185         bool empty() const;
00186 
00187         void swap(MSet & other);
00188 
00189         MSetIterator begin() const;
00190 
00191         MSetIterator end() const;
00192 
00193         MSetIterator back() const;
00194         
00204         MSetIterator operator[](Xapian::doccount i) const;
00205 
00207 
00208         typedef MSetIterator value_type; // FIXME: not assignable...
00209         typedef MSetIterator iterator;
00210         typedef MSetIterator const_iterator;
00211         typedef MSetIterator & reference; // Hmm
00212         typedef MSetIterator & const_reference;
00213         typedef MSetIterator * pointer; // Hmm
00214         typedef Xapian::doccount_diff difference_type;
00215         typedef Xapian::doccount size_type;
00217         
00221         std::string get_description() const;
00222 };
00223 
00227 class MSetIterator {
00228     private:
00229         friend class MSet;
00230         friend bool operator==(const MSetIterator &a, const MSetIterator &b);
00231         friend bool operator!=(const MSetIterator &a, const MSetIterator &b);
00232 
00233         MSetIterator(Xapian::doccount index_, const MSet & mset_)
00234             : index(index_), mset(mset_) { }
00235 
00236         Xapian::doccount index;
00237         MSet mset;
00238 
00239     public:
00243         MSetIterator() : index(0), mset() { }
00244 
00245         ~MSetIterator() { }
00246 
00248         MSetIterator(const MSetIterator &other) {
00249             index = other.index;
00250             mset = other.mset;
00251         }
00252 
00254         void operator=(const MSetIterator &other) {
00255             index = other.index;
00256             mset = other.mset;
00257         }
00258 
00260         MSetIterator & operator++() {
00261             ++index;
00262             return *this;
00263         }
00264 
00265         void operator++(int) {
00266             ++index;
00267         }
00268 
00270         Xapian::docid operator*() const;
00271 
00290         Xapian::Document get_document() const;
00291 
00298         Xapian::doccount get_rank() const {
00299             return mset.get_firstitem() + index;
00300         }
00301 
00303         Xapian::weight get_weight() const;
00304 
00321         Xapian::doccount get_collapse_count() const;
00322 
00328         Xapian::percent get_percent() const;
00329 
00333         std::string get_description() const;
00334 
00336 
00337         typedef std::input_iterator_tag iterator_category; // FIXME: better than input_iterator!
00338         typedef Xapian::docid value_type;
00339         typedef Xapian::doccount_diff difference_type;
00340         typedef Xapian::docid * pointer;
00341         typedef Xapian::docid & reference;
00343 };
00344 
00345 inline bool operator==(const MSetIterator &a, const MSetIterator &b)
00346 {
00347     return (a.index == b.index);
00348 }
00349 
00350 inline bool operator!=(const MSetIterator &a, const MSetIterator &b)
00351 {
00352     return (a.index != b.index);
00353 }
00354 
00355 class ESetIterator;
00356 
00361 class ESet {
00362     public:
00363         class Internal;
00365         Xapian::Internal::RefCntPtr<Internal> internal;
00366 
00368         ESet();
00369 
00371         ~ESet();
00372 
00374         ESet(const ESet & other);
00375 
00377         void operator=(const ESet &other);
00378 
00383         Xapian::termcount get_ebound() const;
00384 
00386         Xapian::termcount size() const;
00387 
00389         bool empty() const;
00390 
00392         ESetIterator begin() const;
00393 
00395         ESetIterator end() const;
00396 
00401         std::string get_description() const;
00402 };
00403 
00405 class ESetIterator {
00406     private:
00407         friend class ESet;
00408         friend bool operator==(const ESetIterator &a, const ESetIterator &b);
00409         friend bool operator!=(const ESetIterator &a, const ESetIterator &b);
00410 
00411         ESetIterator(Xapian::termcount index_, const ESet & eset_)
00412             : index(index_), eset(eset_) { }
00413 
00414         Xapian::termcount index;
00415         ESet eset;
00416 
00417     public:
00421         ESetIterator() : index(0), eset() { }
00422 
00423         ~ESetIterator() { }
00424 
00426         ESetIterator(const ESetIterator &other) {
00427             index = other.index;
00428             eset = other.eset;
00429         }
00430 
00432         void operator=(const ESetIterator &other) {
00433             index = other.index;
00434             eset = other.eset;
00435         }
00436 
00438         ESetIterator & operator++() {
00439             ++index;
00440             return *this;
00441         }
00442 
00443         void operator++(int) {
00444             ++index;
00445         }
00446 
00448         const std::string & operator *() const;
00449 
00451         Xapian::weight get_weight() const;
00452 
00456         std::string get_description() const;
00457 
00459 
00460         typedef std::input_iterator_tag iterator_category; // FIXME: better than input_iterator!
00461         typedef std::string value_type;
00462         typedef Xapian::termcount_diff difference_type;
00463         typedef std::string * pointer;
00464         typedef std::string & reference;
00466 };
00467 
00468 inline bool operator==(const ESetIterator &a, const ESetIterator &b)
00469 {
00470     return (a.index == b.index);
00471 }
00472 
00473 inline bool operator!=(const ESetIterator &a, const ESetIterator &b)
00474 {
00475     return (a.index != b.index);
00476 }
00477 
00482 class RSet {
00483     public:
00485         class Internal;
00486 
00488         Internal *internal;
00489 
00491         RSet(const RSet &rset);
00492 
00494         void operator=(const RSet &rset);
00495 
00497         RSet();
00498 
00500         ~RSet();
00501 
00503         Xapian::doccount size() const;
00504 
00506         bool empty() const;
00507 
00509         void add_document(Xapian::docid did);
00510         
00512         void add_document(const Xapian::MSetIterator & i) { add_document(*i); }
00513 
00515         void remove_document(Xapian::docid did);
00516 
00518         void remove_document(const Xapian::MSetIterator & i) { remove_document(*i); }
00519 
00521         bool contains(Xapian::docid did) const;
00522 
00524         bool contains(const Xapian::MSetIterator & i) { return contains(*i); }
00525 
00530         std::string get_description() const;
00531 };
00532 
00535 class MatchDecider {
00536     public:
00539         virtual int operator()(const Xapian::Document &doc) const = 0;
00540 
00542         virtual ~MatchDecider() {}
00543 };
00544 
00547 class ExpandDecider {
00548     public:
00551         virtual int operator()(const std::string & tname) const = 0;
00552 
00554         virtual ~ExpandDecider() {}
00555 };
00556 
00570 class Enquire {
00571     private:
00573         Enquire(const Enquire &);
00574 
00576         void operator=(const Enquire &);
00577 
00578     public:
00579         class Internal;
00581         Xapian::Internal::RefCntPtr<Internal> internal;
00582 
00598         Enquire(const Database &databases, ErrorHandler * errorhandler_ = 0);
00599 
00602         ~Enquire();
00603 
00611         void set_query(const Xapian::Query & query_);
00612 
00619         const Xapian::Query & get_query();
00620 
00627         void set_weighting_scheme(const Weight &weight_);
00628 
00655         void set_collapse_key(Xapian::valueno collapse_key);
00656 
00663         void set_sort_forward(bool sort_forward);
00664 
00682         void set_cutoff(Xapian::percent percent_cutoff, Xapian::weight weight_cutoff = 0);
00683 
00697         void set_sorting(Xapian::valueno sort_key, int sort_bands);
00698 
00710         void set_bias(Xapian::weight bias_weight, time_t bias_halflife);
00711 
00732         MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00733                       const RSet * omrset = 0,
00734                       const MatchDecider * mdecider = 0) const;
00735 
00736         static const int include_query_terms = 1;
00737         static const int use_exact_termfreq = 2;
00761         ESet get_eset(Xapian::termcount maxitems,
00762                         const RSet & omrset,
00763                         int flags = 0,
00764                         double k = 1.0,
00765                         const Xapian::ExpandDecider * edecider = 0) const;
00766 
00781         inline ESet get_eset(Xapian::termcount maxitems, const RSet & omrset,
00782                                const Xapian::ExpandDecider * edecider) const {
00783             return get_eset(maxitems, omrset, 0, 1.0, edecider);
00784         }
00785 
00815         TermIterator get_matching_terms_begin(Xapian::docid did) const;
00816 
00818         TermIterator get_matching_terms_end(Xapian::docid did) const;
00819 
00843         TermIterator get_matching_terms_begin(const MSetIterator &it) const;
00844 
00846         TermIterator get_matching_terms_end(const MSetIterator &it) const;
00847 
00850         void register_match_decider(const std::string &name,
00851                                     const MatchDecider *mdecider = NULL);
00852 
00856         std::string get_description() const;
00857 };
00858 
00859 }
00860 
00861 class SocketServer;
00862 
00863 namespace Xapian {
00864 
00866 class Weight {
00867     friend class Enquire; // So Enquire can clone us
00868     friend class ::SocketServer; // So SocketServer can clone us - FIXME
00869     public:
00870         class Internal;
00871     protected:
00872         Weight(const Weight &);
00873     private:
00874         void operator=(Weight &);
00875 
00883         virtual Weight * clone() const = 0;
00884 
00885     protected:
00886         const Internal * internal; // Weight::Internal == StatsSource
00887         Xapian::doclength querysize;
00888         Xapian::termcount wqf;
00889         std::string tname;
00890 
00891     public:
00892         Weight() { }
00893         virtual ~Weight() { }
00894 
00907         Weight * create(const Internal * internal_, Xapian::doclength querysize_,
00908                           Xapian::termcount wqf_, std::string tname_) const {
00909             Weight * wt = clone();
00910             wt->internal = internal_;
00911             wt->querysize = querysize_;
00912             wt->wqf = wqf_;
00913             wt->tname = tname_;
00914             return wt;
00915         }
00916 
00921         virtual std::string name() const = 0;
00922 
00924         virtual std::string serialise() const = 0;
00925 
00927         virtual Weight * unserialise(const std::string &s) const = 0;
00928 
00936         virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
00937                                       Xapian::doclength len) const = 0;
00938 
00944         virtual Xapian::weight get_maxpart() const = 0;
00945 
00954         virtual Xapian::weight get_sumextra(Xapian::doclength len) const = 0;
00955 
00959         virtual Xapian::weight get_maxextra() const = 0;
00960 
00962         virtual bool get_sumpart_needs_doclength() const { return true; }
00963 };
00964 
00966 class BoolWeight : public Weight {
00967     public:
00968         Weight * clone() const {
00969             return new BoolWeight;
00970         }
00971         BoolWeight() { }
00972         ~BoolWeight() { }
00973         std::string name() const { return "Bool"; }
00974         std::string serialise() const { return ""; }
00975         Weight * unserialise(const std::string & /*s*/) const {
00976             return new BoolWeight;
00977         }
00978         Xapian::weight get_sumpart(Xapian::termcount /*wdf*/, Xapian::doclength /*len*/) const { return 0; }
00979         Xapian::weight get_maxpart() const { return 0; }
00980 
00981         Xapian::weight get_sumextra(Xapian::doclength /*len*/) const { return 0; }
00982         Xapian::weight get_maxextra() const { return 0; }
00983 
00984         bool get_sumpart_needs_doclength() const { return false; }      
00985 };
00986 
00999 class BM25Weight : public Weight {
01000     private:
01001         mutable Xapian::weight termweight;
01002         mutable Xapian::doclength lenpart;
01003         mutable double BD;
01004 
01005         double A, B, C, D;
01006         Xapian::doclength min_normlen;
01007 
01008         mutable bool weight_calculated;
01009 
01010         void calc_termweight() const;
01011 
01012     public:
01031         BM25Weight(double A_, double B_, double C_, double D_,
01032                    double min_normlen_)
01033                 : A(A_), B(B_), C(C_), D(D_), min_normlen(min_normlen_),
01034                   weight_calculated(false)
01035         {
01036             if (A < 0) A = 0;
01037             if (B < 0) B = 0;
01038             if (C < 0) C = 0;
01039             if (D < 0) D = 0; else if (D > 1) D = 1;
01040         }
01041         BM25Weight() : A(1), B(1), C(0), D(0.5), min_normlen(0.5),
01042                        weight_calculated(false) { }
01043 
01044         Weight * clone() const {
01045             return new BM25Weight(A, B, C, D, min_normlen);
01046         }
01047         ~BM25Weight() { }
01048         std::string name() const { return "BM25"; }
01049         std::string serialise() const;
01050         Weight * unserialise(const std::string & s) const;
01051         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01052         Xapian::weight get_maxpart() const;
01053 
01054         Xapian::weight get_sumextra(Xapian::doclength len) const;
01055         Xapian::weight get_maxextra() const;
01056 
01057         bool get_sumpart_needs_doclength() const { return (lenpart != 0); }
01058 };
01059 
01072 class TradWeight : public Weight {
01073     private:
01074         mutable Xapian::weight termweight;
01075         mutable Xapian::doclength lenpart;
01076 
01077         double param_k;
01078 
01079         mutable bool weight_calculated;
01080 
01081         void calc_termweight() const;
01082 
01083     public:
01091         explicit TradWeight(double k) : param_k(k), weight_calculated(false) {
01092             if (param_k < 0) param_k = 0;
01093         }
01094 
01095         TradWeight() : param_k(1.0), weight_calculated(false) { }
01096         
01097         Weight * clone() const {
01098             return new TradWeight(param_k);
01099         }
01100         ~TradWeight() { }
01101         std::string name() const { return "Trad"; }
01102         std::string serialise() const;
01103         Weight * unserialise(const std::string & s) const;
01104         
01105         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01106         Xapian::weight get_maxpart() const;
01107 
01108         Xapian::weight get_sumextra(Xapian::doclength len) const;
01109         Xapian::weight get_maxextra() const;
01110 
01111         bool get_sumpart_needs_doclength() const { return (lenpart != 0); }
01112 };
01113 
01114 }
01115 
01116 #endif /* XAPIAN_INCLUDED_ENQUIRE_H */

Documentation for Xapian (version 0.8.0).
Generated on 20 Apr 2004 by Doxygen 1.2.15.