Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members  

/include/xapian/enquire.h

Go to the documentation of this file.
00001 
00004 /* ----START-LICENCE----
00005  * Copyright 1999,2000,2001 BrightStation PLC
00006  * Copyright 2001,2002 Ananova Ltd
00007  * Copyright 2002,2003 Olly Betts
00008  *
00009  * This program is free software; you can redistribute it and/or
00010  * modify it under the terms of the GNU General Public License as
00011  * published by the Free Software Foundation; either version 2 of the
00012  * License, or (at your option) any later version.
00013  *
00014  * This program is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  * GNU General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU General Public License
00020  * along with this program; if not, write to the Free Software
00021  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00022  * USA
00023  * -----END-LICENCE-----
00024  */
00025 
00026 #ifndef XAPIAN_INCLUDED_ENQUIRE_H
00027 #define XAPIAN_INCLUDED_ENQUIRE_H
00028 
00029 #include <string>
00030 #include <time.h> // for time_t
00031 
00032 #include <xapian/base.h>
00033 #include <xapian/types.h>
00034 
00035 namespace Xapian {
00036 
00037 class Database;
00038 class Document;
00039 class ErrorHandler;
00040 class MSetIterator;
00041 class Query;
00042 class TermIterator;
00043 class Weight;
00044 
00048 class MSet {
00049     public:
00050         class Internal;
00052         Xapian::Internal::RefCntPtr<Internal> internal;
00053 
00054     public:
00055         // FIXME: public for now, private would be better
00057         MSet(MSet::Internal * internal_);
00058 
00060         MSet();
00061 
00063         ~MSet();
00064 
00066         MSet(const MSet & other);
00067 
00069         void operator=(const MSet &other);
00070 
00086         void fetch(const MSetIterator &begin, const MSetIterator &end) const;
00087 
00090         void fetch(const MSetIterator &item) const;
00091 
00094         void fetch() const;
00095 
00100         Xapian::percent convert_to_percent(Xapian::weight wt) const;
00101 
00103         Xapian::percent convert_to_percent(const MSetIterator &it) const;
00104 
00112         Xapian::doccount get_termfreq(const std::string &tname) const;
00113 
00121         Xapian::weight get_termweight(const std::string &tname) const;
00122 
00130         Xapian::doccount get_firstitem() const;
00131 
00138         Xapian::doccount get_matches_lower_bound() const;
00139 
00149         Xapian::doccount get_matches_estimated() const;
00150 
00157         Xapian::doccount get_matches_upper_bound() const;
00158 
00164         Xapian::weight get_max_possible() const;
00165 
00179         Xapian::weight get_max_attained() const;
00180 
00181         Xapian::doccount size() const;
00182 
00183         Xapian::doccount max_size() const;
00184 
00185         bool empty() const;
00186 
00187         void swap(MSet & other);
00188 
00189         MSetIterator begin() const;
00190 
00191         MSetIterator end() const;
00192 
00193         MSetIterator back() const;
00194         
00204         MSetIterator operator[](Xapian::doccount i) const;
00205 
00207 
00208         typedef MSetIterator value_type; // FIXME: not assignable...
00209         typedef MSetIterator iterator;
00210         typedef MSetIterator const_iterator;
00211         typedef MSetIterator & reference; // Hmm
00212         typedef MSetIterator & const_reference;
00213         typedef MSetIterator * pointer; // Hmm
00214         typedef Xapian::doccount_diff difference_type;
00215         typedef Xapian::doccount size_type;
00217         
00221         std::string get_description() const;
00222 };
00223 
00227 class MSetIterator {
00228     private:
00229         friend class MSet;
00230         friend bool operator==(const MSetIterator &a, const MSetIterator &b);
00231         friend bool operator!=(const MSetIterator &a, const MSetIterator &b);
00232 
00233         MSetIterator(Xapian::doccount index_, const MSet & mset_)
00234             : index(index_), mset(mset_) { }
00235 
00236         Xapian::doccount index;
00237         MSet mset;
00238 
00239     public:
00243         MSetIterator() : index(0), mset() { }
00244 
00245         ~MSetIterator() { }
00246 
00248         MSetIterator(const MSetIterator &other) {
00249             index = other.index;
00250             mset = other.mset;
00251         }
00252 
00254         void operator=(const MSetIterator &other) {
00255             index = other.index;
00256             mset = other.mset;
00257         }
00258 
00260         MSetIterator & operator++() {
00261             ++index;
00262             return *this;
00263         }
00264 
00265         void operator++(int) {
00266             ++index;
00267         }
00268 
00270         Xapian::docid operator*() const;
00271 
00290         Xapian::Document get_document() const;
00291 
00298         Xapian::doccount get_rank() const {
00299             return mset.get_firstitem() + index;
00300         }
00301 
00303         Xapian::weight get_weight() const;
00304 
00321         Xapian::doccount get_collapse_count() const;
00322 
00328         Xapian::percent get_percent() const;
00329 
00333         std::string get_description() const;
00334 
00336 
00337         typedef std::input_iterator_tag iterator_category; // FIXME: better than input_iterator!
00338         typedef Xapian::docid value_type;
00339         typedef Xapian::doccount_diff difference_type;
00340         typedef Xapian::docid * pointer;
00341         typedef Xapian::docid & reference;
00343 };
00344 
00345 inline bool operator==(const MSetIterator &a, const MSetIterator &b)
00346 {
00347     return (a.index == b.index);
00348 }
00349 
00350 inline bool operator!=(const MSetIterator &a, const MSetIterator &b)
00351 {
00352     return (a.index != b.index);
00353 }
00354 
00355 class ESetIterator;
00356 
00361 class ESet {
00362     public:
00363         class Internal;
00365         Xapian::Internal::RefCntPtr<Internal> internal;
00366 
00368         ESet();
00369 
00371         ~ESet();
00372 
00374         ESet(const ESet & other);
00375 
00377         void operator=(const ESet &other);
00378 
00383         Xapian::termcount get_ebound() const;
00384 
00386         Xapian::termcount size() const;
00387 
00389         bool empty() const;
00390 
00392         ESetIterator begin() const;
00393 
00395         ESetIterator end() const;
00396 
00401         std::string get_description() const;
00402 };
00403 
00405 class ESetIterator {
00406     private:
00407         friend class ESet;
00408         friend bool operator==(const ESetIterator &a, const ESetIterator &b);
00409         friend bool operator!=(const ESetIterator &a, const ESetIterator &b);
00410 
00411         ESetIterator(Xapian::termcount index_, const ESet & eset_)
00412             : index(index_), eset(eset_) { }
00413 
00414         Xapian::termcount index;
00415         ESet eset;
00416 
00417     public:
00421         ESetIterator() : index(0), eset() { }
00422 
00423         ~ESetIterator() { }
00424 
00426         ESetIterator(const ESetIterator &other) {
00427             index = other.index;
00428             eset = other.eset;
00429         }
00430 
00432         void operator=(const ESetIterator &other) {
00433             index = other.index;
00434             eset = other.eset;
00435         }
00436 
00438         ESetIterator & operator++() {
00439             ++index;
00440             return *this;
00441         }
00442 
00443         void operator++(int) {
00444             ++index;
00445         }
00446 
00448         const std::string & operator *() const;
00449 
00451         Xapian::weight get_weight() const;
00452 
00456         std::string get_description() const;
00457 
00459 
00460         typedef std::input_iterator_tag iterator_category; // FIXME: better than input_iterator!
00461         typedef std::string value_type;
00462         typedef Xapian::termcount_diff difference_type;
00463         typedef std::string * pointer;
00464         typedef std::string & reference;
00466 };
00467 
00468 inline bool operator==(const ESetIterator &a, const ESetIterator &b)
00469 {
00470     return (a.index == b.index);
00471 }
00472 
00473 inline bool operator!=(const ESetIterator &a, const ESetIterator &b)
00474 {
00475     return (a.index != b.index);
00476 }
00477 
00482 class RSet {
00483     public:
00485         class Internal;
00486 
00488         Internal *internal;
00489 
00491         RSet(const RSet &rset);
00492 
00494         void operator=(const RSet &rset);
00495 
00497         RSet();
00498 
00500         ~RSet();
00501 
00503         Xapian::doccount size() const;
00504 
00506         bool empty() const;
00507 
00509         void add_document(Xapian::docid did);
00510         
00512         void add_document(const Xapian::MSetIterator & i) { add_document(*i); }
00513 
00515         void remove_document(Xapian::docid did);
00516 
00518         void remove_document(const Xapian::MSetIterator & i) { remove_document(*i); }
00519 
00521         bool contains(Xapian::docid did) const;
00522 
00524         bool contains(const Xapian::MSetIterator & i) { return contains(*i); }
00525 
00530         std::string get_description() const;
00531 };
00532 
00535 class MatchDecider {
00536     public:
00539         virtual int operator()(const Xapian::Document &doc) const = 0;
00540 
00542         virtual ~MatchDecider() {}
00543 };
00544 
00547 class ExpandDecider {
00548     public:
00551         virtual int operator()(const std::string & tname) const = 0;
00552 
00554         virtual ~ExpandDecider() {}
00555 };
00556 
00570 class Enquire {
00571     private:
00573         Enquire(const Enquire &);
00574 
00576         void operator=(const Enquire &);
00577 
00578     public:
00579         class Internal;
00581         Xapian::Internal::RefCntPtr<Internal> internal;
00582 
00598         Enquire(const Database &databases, ErrorHandler * errorhandler_ = 0);
00599 
00602         ~Enquire();
00603 
00611         void set_query(const Xapian::Query & query_);
00612 
00619         const Xapian::Query & get_query();
00620 
00627         void set_weighting_scheme(const Weight &weight_);
00628 
00655         void set_collapse_key(Xapian::valueno collapse_key);
00656 
00663         void set_sort_forward(bool sort_forward);
00664 
00682         void set_cutoff(Xapian::percent percent_cutoff, Xapian::weight weight_cutoff = 0);
00683 
00697         void set_sorting(Xapian::valueno sort_key, int sort_bands);
00698 
00710         void set_bias(Xapian::weight bias_weight, time_t bias_halflife);
00711 
00732         MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00733                       const RSet * omrset = 0,
00734                       const MatchDecider * mdecider = 0) const;
00735 
00736         static const int include_query_terms = 1;
00737         static const int use_exact_termfreq = 2;
00761         ESet get_eset(Xapian::termcount maxitems,
00762                         const RSet & omrset,
00763                         int flags = 0,
00764                         double k = 1.0,
00765                         const Xapian::ExpandDecider * edecider = 0) const;
00766 
00781         inline ESet get_eset(Xapian::termcount maxitems, const RSet & omrset,
00782                                const Xapian::ExpandDecider * edecider) const {
00783             return get_eset(maxitems, omrset, 0, 1.0, edecider);
00784         }
00785 
00815         TermIterator get_matching_terms_begin(Xapian::docid did) const;
00816 
00818         TermIterator get_matching_terms_end(Xapian::docid did) const;
00819 
00843         TermIterator get_matching_terms_begin(const MSetIterator &it) const;
00844 
00846         TermIterator get_matching_terms_end(const MSetIterator &it) const;
00847 
00850         void register_match_decider(const std::string &name,
00851                                     const MatchDecider *mdecider = NULL);
00852 
00856         std::string get_description() const;
00857 };
00858 
00859 }
00860 
00861 class SocketServer;
00862 
00863 namespace Xapian {
00864 
00866 class Weight {
00867     friend class Enquire; // So Enquire can clone us
00868     friend class SocketServer; // So SocketServer can clone us - FIXME
00869     public:
00870         class Internal;
00871     private:
00872         Weight(const Weight &);
00873         void operator=(Weight &);
00874 
00882         virtual Weight * clone() const = 0;
00883 
00884     protected:
00885         const Internal * internal; // Weight::Internal == StatsSource
00886         Xapian::doclength querysize;
00887         Xapian::termcount wqf;
00888         std::string tname;
00889 
00890     public:
00891         Weight() { }
00892         virtual ~Weight() { }
00893 
00906         Weight * create(const Internal * internal_, Xapian::doclength querysize_,
00907                           Xapian::termcount wqf_, std::string tname_) const {
00908             Weight * wt = clone();
00909             wt->internal = internal_;
00910             wt->querysize = querysize_;
00911             wt->wqf = wqf_;
00912             wt->tname = tname_;
00913             return wt;
00914         }
00915 
00920         virtual std::string name() const = 0;
00921 
00923         virtual std::string serialise() const = 0;
00924 
00926         virtual Weight * Weight::unserialise(const std::string &s) const = 0;
00927 
00935         virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
00936                                       Xapian::doclength len) const = 0;
00937 
00943         virtual Xapian::weight get_maxpart() const = 0;
00944 
00953         virtual Xapian::weight get_sumextra(Xapian::doclength len) const = 0;
00954 
00958         virtual Xapian::weight get_maxextra() const = 0;
00959 
00961         virtual bool get_sumpart_needs_doclength() const { return true; }
00962 };
00963 
00965 class BoolWeight : public Weight {
00966     public:
00967         Weight * clone() const {
00968             return new BoolWeight;
00969         }
00970         BoolWeight() { }
00971         ~BoolWeight() { }
00972         std::string name() const { return "Bool"; }
00973         std::string serialise() const { return ""; }
00974         Weight * unserialise(const std::string & /*s*/) const {
00975             return new BoolWeight;
00976         }
00977         Xapian::weight get_sumpart(Xapian::termcount /*wdf*/, Xapian::doclength /*len*/) const { return 0; }
00978         Xapian::weight get_maxpart() const { return 0; }
00979 
00980         Xapian::weight get_sumextra(Xapian::doclength /*len*/) const { return 0; }
00981         Xapian::weight get_maxextra() const { return 0; }
00982 
00983         bool get_sumpart_needs_doclength() const { return false; }      
00984 };
00985 
00998 class BM25Weight : public Weight {
00999     private:
01000         mutable Xapian::weight termweight;
01001         mutable Xapian::doclength lenpart;
01002         mutable double BD;
01003 
01004         double A, B, C, D;
01005         Xapian::doclength min_normlen;
01006 
01007         mutable bool weight_calculated;
01008 
01009         void calc_termweight() const;
01010 
01011     public:
01030         BM25Weight(double A_, double B_, double C_, double D_,
01031                    double min_normlen_)
01032                 : A(A_), B(B_), C(C_), D(D_), min_normlen(min_normlen_),
01033                   weight_calculated(false)
01034         {
01035             if (A < 0) A = 0;
01036             if (B < 0) B = 0;
01037             if (C < 0) C = 0;
01038             if (D < 0) D = 0; else if (D > 1) D = 1;
01039         }
01040         BM25Weight() : A(1), B(1), C(0), D(0.5), min_normlen(0.5),
01041                        weight_calculated(false) { }
01042 
01043         Weight * clone() const {
01044             return new BM25Weight(A, B, C, D, min_normlen);
01045         }
01046         ~BM25Weight() { }
01047         std::string name() const { return "BM25"; }
01048         std::string serialise() const;
01049         Weight * unserialise(const std::string & s) const;
01050         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01051         Xapian::weight get_maxpart() const;
01052 
01053         Xapian::weight get_sumextra(Xapian::doclength len) const;
01054         Xapian::weight get_maxextra() const;
01055 
01056         bool get_sumpart_needs_doclength() const { return (lenpart != 0); }
01057 };
01058 
01071 class TradWeight : public Weight {
01072     private:
01073         mutable Xapian::weight termweight;
01074         mutable Xapian::doclength lenpart;
01075 
01076         double param_k;
01077 
01078         mutable bool weight_calculated;
01079 
01080         void calc_termweight() const;
01081 
01082     public:
01090         TradWeight(double k = 1) : param_k(k), weight_calculated(false) {
01091             if (param_k < 0) param_k = 0;
01092         }
01093         Weight * clone() const {
01094             return new TradWeight(param_k);
01095         }
01096         ~TradWeight() { }
01097         std::string name() const { return "Trad"; }
01098         std::string serialise() const;
01099         Weight * unserialise(const std::string & s) const;
01100         
01101         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01102         Xapian::weight get_maxpart() const;
01103 
01104         Xapian::weight get_sumextra(Xapian::doclength len) const;
01105         Xapian::weight get_maxextra() const;
01106 
01107         bool get_sumpart_needs_doclength() const { return (lenpart != 0); }
01108 };
01109 
01110 }
01111 
01112 #endif /* XAPIAN_INCLUDED_ENQUIRE_H */

Documentation for Xapian (version 0.7.5).
Generated on 26 Nov 2003 by Doxygen 1.2.15.