Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members  

/include/xapian/enquire.h

Go to the documentation of this file.
00001 
00004 /* ----START-LICENCE----
00005  * Copyright 1999,2000,2001 BrightStation PLC
00006  * Copyright 2001,2002 Ananova Ltd
00007  * Copyright 2002,2003,2004 Olly Betts
00008  *
00009  * This program is free software; you can redistribute it and/or
00010  * modify it under the terms of the GNU General Public License as
00011  * published by the Free Software Foundation; either version 2 of the
00012  * License, or (at your option) any later version.
00013  *
00014  * This program is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  * GNU General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU General Public License
00020  * along with this program; if not, write to the Free Software
00021  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00022  * USA
00023  * -----END-LICENCE-----
00024  */
00025 
00026 #ifndef XAPIAN_INCLUDED_ENQUIRE_H
00027 #define XAPIAN_INCLUDED_ENQUIRE_H
00028 
00029 #include <string>
00030 #include <time.h> // for time_t
00031 
00032 #include <xapian/base.h>
00033 #include <xapian/types.h>
00034 
00035 namespace Xapian {
00036 
00037 class Database;
00038 class Document;
00039 class ErrorHandler;
00040 class MSetIterator;
00041 class Query;
00042 class TermIterator;
00043 class Weight;
00044 
00048 class MSet {
00049     public:
00050         class Internal;
00052         Xapian::Internal::RefCntPtr<Internal> internal;
00053 
00054     public:
00055         // FIXME: public for now, private would be better
00057         explicit MSet(MSet::Internal * internal_);
00058 
00060         MSet();
00061 
00063         ~MSet();
00064 
00066         MSet(const MSet & other);
00067 
00069         void operator=(const MSet &other);
00070 
00086         void fetch(const MSetIterator &begin, const MSetIterator &end) const;
00087 
00090         void fetch(const MSetIterator &item) const;
00091 
00094         void fetch() const;
00095 
00100         Xapian::percent convert_to_percent(Xapian::weight wt) const;
00101 
00103         Xapian::percent convert_to_percent(const MSetIterator &it) const;
00104 
00112         Xapian::doccount get_termfreq(const std::string &tname) const;
00113 
00121         Xapian::weight get_termweight(const std::string &tname) const;
00122 
00130         Xapian::doccount get_firstitem() const;
00131 
00141         Xapian::doccount get_matches_lower_bound() const;
00142 
00155         Xapian::doccount get_matches_estimated() const;
00156 
00166         Xapian::doccount get_matches_upper_bound() const;
00167 
00173         Xapian::weight get_max_possible() const;
00174 
00188         Xapian::weight get_max_attained() const;
00189 
00190         Xapian::doccount size() const;
00191 
00192         Xapian::doccount max_size() const;
00193 
00194         bool empty() const;
00195 
00196         void swap(MSet & other);
00197 
00198         MSetIterator begin() const;
00199 
00200         MSetIterator end() const;
00201 
00202         MSetIterator back() const;
00203         
00213         MSetIterator operator[](Xapian::doccount i) const;
00214 
00216 
00217         typedef MSetIterator value_type; // FIXME: not assignable...
00218         typedef MSetIterator iterator;
00219         typedef MSetIterator const_iterator;
00220         typedef MSetIterator & reference; // Hmm
00221         typedef MSetIterator & const_reference;
00222         typedef MSetIterator * pointer; // Hmm
00223         typedef Xapian::doccount_diff difference_type;
00224         typedef Xapian::doccount size_type;
00226         
00230         std::string get_description() const;
00231 };
00232 
00236 class MSetIterator {
00237     private:
00238         friend class MSet;
00239         friend bool operator==(const MSetIterator &a, const MSetIterator &b);
00240         friend bool operator!=(const MSetIterator &a, const MSetIterator &b);
00241 
00242         MSetIterator(Xapian::doccount index_, const MSet & mset_)
00243             : index(index_), mset(mset_) { }
00244 
00245         Xapian::doccount index;
00246         MSet mset;
00247 
00248     public:
00252         MSetIterator() : index(0), mset() { }
00253 
00254         ~MSetIterator() { }
00255 
00257         MSetIterator(const MSetIterator &other) {
00258             index = other.index;
00259             mset = other.mset;
00260         }
00261 
00263         void operator=(const MSetIterator &other) {
00264             index = other.index;
00265             mset = other.mset;
00266         }
00267 
00269         MSetIterator & operator++() {
00270             ++index;
00271             return *this;
00272         }
00273 
00275         MSetIterator operator++(int) {
00276             MSetIterator tmp = *this;
00277             ++index;
00278             return tmp;
00279         }
00280 
00282         MSetIterator & operator--() {
00283             --index;
00284             return *this;
00285         }
00286 
00288         MSetIterator operator--(int) {
00289             MSetIterator tmp = *this;
00290             --index;
00291             return tmp;
00292         }
00293 
00295         Xapian::docid operator*() const;
00296 
00315         Xapian::Document get_document() const;
00316 
00323         Xapian::doccount get_rank() const {
00324             return mset.get_firstitem() + index;
00325         }
00326 
00328         Xapian::weight get_weight() const;
00329 
00346         Xapian::doccount get_collapse_count() const;
00347 
00353         Xapian::percent get_percent() const;
00354 
00358         std::string get_description() const;
00359 
00361 
00362         typedef std::bidirectional_iterator_tag iterator_category; // FIXME: could enhance to be a randomaccess_iterator
00363         typedef Xapian::docid value_type;
00364         typedef Xapian::doccount_diff difference_type;
00365         typedef Xapian::docid * pointer;
00366         typedef Xapian::docid & reference;
00368 };
00369 
00370 inline bool operator==(const MSetIterator &a, const MSetIterator &b)
00371 {
00372     return (a.index == b.index);
00373 }
00374 
00375 inline bool operator!=(const MSetIterator &a, const MSetIterator &b)
00376 {
00377     return (a.index != b.index);
00378 }
00379 
00380 class ESetIterator;
00381 
00386 class ESet {
00387     public:
00388         class Internal;
00390         Xapian::Internal::RefCntPtr<Internal> internal;
00391 
00393         ESet();
00394 
00396         ~ESet();
00397 
00399         ESet(const ESet & other);
00400 
00402         void operator=(const ESet &other);
00403 
00408         Xapian::termcount get_ebound() const;
00409 
00411         Xapian::termcount size() const;
00412 
00414         bool empty() const;
00415 
00417         ESetIterator begin() const;
00418 
00420         ESetIterator end() const;
00421 
00426         std::string get_description() const;
00427 };
00428 
00430 class ESetIterator {
00431     private:
00432         friend class ESet;
00433         friend bool operator==(const ESetIterator &a, const ESetIterator &b);
00434         friend bool operator!=(const ESetIterator &a, const ESetIterator &b);
00435 
00436         ESetIterator(Xapian::termcount index_, const ESet & eset_)
00437             : index(index_), eset(eset_) { }
00438 
00439         Xapian::termcount index;
00440         ESet eset;
00441 
00442     public:
00446         ESetIterator() : index(0), eset() { }
00447 
00448         ~ESetIterator() { }
00449 
00451         ESetIterator(const ESetIterator &other) {
00452             index = other.index;
00453             eset = other.eset;
00454         }
00455 
00457         void operator=(const ESetIterator &other) {
00458             index = other.index;
00459             eset = other.eset;
00460         }
00461 
00463         ESetIterator & operator++() {
00464             ++index;
00465             return *this;
00466         }
00467 
00469         ESetIterator operator++(int) {
00470             ESetIterator tmp = *this;
00471             ++index;
00472             return tmp;
00473         }
00474 
00476         ESetIterator & operator--() {
00477             --index;
00478             return *this;
00479         }
00480 
00482         ESetIterator operator--(int) {
00483             ESetIterator tmp = *this;
00484             --index;
00485             return tmp;
00486         }
00487 
00489         const std::string & operator *() const;
00490 
00492         Xapian::weight get_weight() const;
00493 
00497         std::string get_description() const;
00498 
00500 
00501         typedef std::bidirectional_iterator_tag iterator_category; // FIXME: go for randomaccess_iterator!
00502         typedef std::string value_type;
00503         typedef Xapian::termcount_diff difference_type;
00504         typedef std::string * pointer;
00505         typedef std::string & reference;
00507 };
00508 
00509 inline bool operator==(const ESetIterator &a, const ESetIterator &b)
00510 {
00511     return (a.index == b.index);
00512 }
00513 
00514 inline bool operator!=(const ESetIterator &a, const ESetIterator &b)
00515 {
00516     return (a.index != b.index);
00517 }
00518 
00523 class RSet {
00524     public:
00526         class Internal;
00527 
00529         Internal *internal;
00530 
00532         RSet(const RSet &rset);
00533 
00535         void operator=(const RSet &rset);
00536 
00538         RSet();
00539 
00541         ~RSet();
00542 
00544         Xapian::doccount size() const;
00545 
00547         bool empty() const;
00548 
00550         void add_document(Xapian::docid did);
00551         
00553         void add_document(const Xapian::MSetIterator & i) { add_document(*i); }
00554 
00556         void remove_document(Xapian::docid did);
00557 
00559         void remove_document(const Xapian::MSetIterator & i) { remove_document(*i); }
00560 
00562         bool contains(Xapian::docid did) const;
00563 
00565         bool contains(const Xapian::MSetIterator & i) { return contains(*i); }
00566 
00571         std::string get_description() const;
00572 };
00573 
00576 class MatchDecider {
00577     public:
00580         virtual int operator()(const Xapian::Document &doc) const = 0;
00581 
00583         virtual ~MatchDecider() {}
00584 };
00585 
00588 class ExpandDecider {
00589     public:
00592         virtual int operator()(const std::string & tname) const = 0;
00593 
00595         virtual ~ExpandDecider() {}
00596 };
00597 
00611 class Enquire {
00612     private:
00614         Enquire(const Enquire &);
00615 
00617         void operator=(const Enquire &);
00618 
00619     public:
00620         class Internal;
00622         Xapian::Internal::RefCntPtr<Internal> internal;
00623 
00639         Enquire(const Database &databases, ErrorHandler * errorhandler_ = 0);
00640 
00643         ~Enquire();
00644 
00652         void set_query(const Xapian::Query & query_);
00653 
00660         const Xapian::Query & get_query();
00661 
00668         void set_weighting_scheme(const Weight &weight_);
00669 
00696         void set_collapse_key(Xapian::valueno collapse_key);
00697 
00704         void set_sort_forward(bool sort_forward);
00705 
00723         void set_cutoff(Xapian::percent percent_cutoff, Xapian::weight weight_cutoff = 0);
00724 
00738         void set_sorting(Xapian::valueno sort_key, int sort_bands);
00739 
00751         void set_bias(Xapian::weight bias_weight, time_t bias_halflife);
00752 
00773         MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00774                       const RSet * omrset = 0,
00775                       const MatchDecider * mdecider = 0) const;
00776 
00777         static const int include_query_terms = 1;
00778         static const int use_exact_termfreq = 2;
00802         ESet get_eset(Xapian::termcount maxitems,
00803                         const RSet & omrset,
00804                         int flags = 0,
00805                         double k = 1.0,
00806                         const Xapian::ExpandDecider * edecider = 0) const;
00807 
00822         inline ESet get_eset(Xapian::termcount maxitems, const RSet & omrset,
00823                                const Xapian::ExpandDecider * edecider) const {
00824             return get_eset(maxitems, omrset, 0, 1.0, edecider);
00825         }
00826 
00856         TermIterator get_matching_terms_begin(Xapian::docid did) const;
00857 
00859         TermIterator get_matching_terms_end(Xapian::docid did) const;
00860 
00884         TermIterator get_matching_terms_begin(const MSetIterator &it) const;
00885 
00887         TermIterator get_matching_terms_end(const MSetIterator &it) const;
00888 
00891         void register_match_decider(const std::string &name,
00892                                     const MatchDecider *mdecider = NULL);
00893 
00897         std::string get_description() const;
00898 };
00899 
00900 }
00901 
00902 class SocketServer;
00903 
00904 namespace Xapian {
00905 
00907 class Weight {
00908     friend class Enquire; // So Enquire can clone us
00909     friend class ::SocketServer; // So SocketServer can clone us - FIXME
00910     public:
00911         class Internal;
00912     protected:
00913         Weight(const Weight &);
00914     private:
00915         void operator=(Weight &);
00916 
00924         virtual Weight * clone() const = 0;
00925 
00926     protected:
00927         const Internal * internal; // Weight::Internal == StatsSource
00928         Xapian::doclength querysize;
00929         Xapian::termcount wqf;
00930         std::string tname;
00931 
00932     public:
00933         Weight() { }
00934         virtual ~Weight() { }
00935 
00948         Weight * create(const Internal * internal_, Xapian::doclength querysize_,
00949                           Xapian::termcount wqf_, std::string tname_) const {
00950             Weight * wt = clone();
00951             wt->internal = internal_;
00952             wt->querysize = querysize_;
00953             wt->wqf = wqf_;
00954             wt->tname = tname_;
00955             return wt;
00956         }
00957 
00962         virtual std::string name() const = 0;
00963 
00965         virtual std::string serialise() const = 0;
00966 
00968         virtual Weight * unserialise(const std::string &s) const = 0;
00969 
00977         virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
00978                                       Xapian::doclength len) const = 0;
00979 
00985         virtual Xapian::weight get_maxpart() const = 0;
00986 
00995         virtual Xapian::weight get_sumextra(Xapian::doclength len) const = 0;
00996 
01000         virtual Xapian::weight get_maxextra() const = 0;
01001 
01003         virtual bool get_sumpart_needs_doclength() const { return true; }
01004 };
01005 
01007 class BoolWeight : public Weight {
01008     public:
01009         Weight * clone() const {
01010             return new BoolWeight;
01011         }
01012         BoolWeight() { }
01013         ~BoolWeight() { }
01014         std::string name() const { return "Bool"; }
01015         std::string serialise() const { return ""; }
01016         Weight * unserialise(const std::string & /*s*/) const {
01017             return new BoolWeight;
01018         }
01019         Xapian::weight get_sumpart(Xapian::termcount /*wdf*/, Xapian::doclength /*len*/) const { return 0; }
01020         Xapian::weight get_maxpart() const { return 0; }
01021 
01022         Xapian::weight get_sumextra(Xapian::doclength /*len*/) const { return 0; }
01023         Xapian::weight get_maxextra() const { return 0; }
01024 
01025         bool get_sumpart_needs_doclength() const { return false; }      
01026 };
01027 
01040 class BM25Weight : public Weight {
01041     private:
01042         mutable Xapian::weight termweight;
01043         mutable Xapian::doclength lenpart;
01044         mutable double BD;
01045 
01046         double A, B, C, D;
01047         Xapian::doclength min_normlen;
01048 
01049         mutable bool weight_calculated;
01050 
01051         void calc_termweight() const;
01052 
01053     public:
01072         BM25Weight(double A_, double B_, double C_, double D_,
01073                    double min_normlen_)
01074                 : A(A_), B(B_), C(C_), D(D_), min_normlen(min_normlen_),
01075                   weight_calculated(false)
01076         {
01077             if (A < 0) A = 0;
01078             if (B < 0) B = 0;
01079             if (C < 0) C = 0;
01080             if (D < 0) D = 0; else if (D > 1) D = 1;
01081         }
01082         BM25Weight() : A(1), B(1), C(0), D(0.5), min_normlen(0.5),
01083                        weight_calculated(false) { }
01084 
01085         Weight * clone() const {
01086             return new BM25Weight(A, B, C, D, min_normlen);
01087         }
01088         ~BM25Weight() { }
01089         std::string name() const { return "BM25"; }
01090         std::string serialise() const;
01091         Weight * unserialise(const std::string & s) const;
01092         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01093         Xapian::weight get_maxpart() const;
01094 
01095         Xapian::weight get_sumextra(Xapian::doclength len) const;
01096         Xapian::weight get_maxextra() const;
01097 
01098         bool get_sumpart_needs_doclength() const { return (lenpart != 0); }
01099 };
01100 
01113 class TradWeight : public Weight {
01114     private:
01115         mutable Xapian::weight termweight;
01116         mutable Xapian::doclength lenpart;
01117 
01118         double param_k;
01119 
01120         mutable bool weight_calculated;
01121 
01122         void calc_termweight() const;
01123 
01124     public:
01132         explicit TradWeight(double k) : param_k(k), weight_calculated(false) {
01133             if (param_k < 0) param_k = 0;
01134         }
01135 
01136         TradWeight() : param_k(1.0), weight_calculated(false) { }
01137         
01138         Weight * clone() const {
01139             return new TradWeight(param_k);
01140         }
01141         ~TradWeight() { }
01142         std::string name() const { return "Trad"; }
01143         std::string serialise() const;
01144         Weight * unserialise(const std::string & s) const;
01145         
01146         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01147         Xapian::weight get_maxpart() const;
01148 
01149         Xapian::weight get_sumextra(Xapian::doclength len) const;
01150         Xapian::weight get_maxextra() const;
01151 
01152         bool get_sumpart_needs_doclength() const { return (lenpart != 0); }
01153 };
01154 
01155 }
01156 
01157 #endif /* XAPIAN_INCLUDED_ENQUIRE_H */

Documentation for Xapian (version 0.8.1).
Generated on 30 Jun 2004 by Doxygen 1.2.15.