include/xapian/enquire.h

Go to the documentation of this file.
00001 
00004 /* ----START-LICENCE----
00005  * Copyright 1999,2000,2001 BrightStation PLC
00006  * Copyright 2001,2002 Ananova Ltd
00007  * Copyright 2002,2003,2004,2005 Olly Betts
00008  *
00009  * This program is free software; you can redistribute it and/or
00010  * modify it under the terms of the GNU General Public License as
00011  * published by the Free Software Foundation; either version 2 of the
00012  * License, or (at your option) any later version.
00013  *
00014  * This program is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  * GNU General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU General Public License
00020  * along with this program; if not, write to the Free Software
00021  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00022  * USA
00023  * -----END-LICENCE-----
00024  */
00025 
00026 #ifndef XAPIAN_INCLUDED_ENQUIRE_H
00027 #define XAPIAN_INCLUDED_ENQUIRE_H
00028 
00029 #include <string>
00030 #include <time.h> // for time_t
00031 
00032 #include <xapian/base.h>
00033 #include <xapian/error.h>
00034 #include <xapian/types.h>
00035 
00036 namespace Xapian {
00037 
00038 class Database;
00039 class Document;
00040 class ErrorHandler;
00041 class MSetIterator;
00042 class Query;
00043 class TermIterator;
00044 class Weight;
00045 
00049 class MSet {
00050     public:
00051         class Internal;
00053         Xapian::Internal::RefCntPtr<Internal> internal;
00054 
00056         explicit MSet(MSet::Internal * internal_);
00057 
00059         MSet();
00060 
00062         ~MSet();
00063 
00065         MSet(const MSet & other);
00066 
00068         void operator=(const MSet &other);
00069 
00085         void fetch(const MSetIterator &begin, const MSetIterator &end) const;
00086 
00089         void fetch(const MSetIterator &item) const;
00090 
00093         void fetch() const;
00094 
00099         Xapian::percent convert_to_percent(Xapian::weight wt) const;
00100 
00102         Xapian::percent convert_to_percent(const MSetIterator &it) const;
00103 
00111         Xapian::doccount get_termfreq(const std::string &tname) const;
00112 
00120         Xapian::weight get_termweight(const std::string &tname) const;
00121 
00129         Xapian::doccount get_firstitem() const;
00130 
00140         Xapian::doccount get_matches_lower_bound() const;
00141 
00154         Xapian::doccount get_matches_estimated() const;
00155 
00165         Xapian::doccount get_matches_upper_bound() const;
00166 
00172         Xapian::weight get_max_possible() const;
00173 
00187         Xapian::weight get_max_attained() const;
00188 
00190         Xapian::doccount size() const;
00191 
00193         Xapian::doccount max_size() const { return size(); }
00194 
00196         bool empty() const;
00197 
00199         void swap(MSet & other);
00200 
00202         MSetIterator begin() const;
00203 
00205         MSetIterator end() const;
00206 
00208         MSetIterator back() const;
00209 
00219         MSetIterator operator[](Xapian::doccount i) const;
00220 
00222 
00223         typedef MSetIterator value_type; // FIXME: not assignable...
00224         typedef MSetIterator iterator;
00225         typedef MSetIterator const_iterator;
00226         typedef MSetIterator & reference; // Hmm
00227         typedef MSetIterator & const_reference;
00228         typedef MSetIterator * pointer; // Hmm
00229         typedef Xapian::doccount_diff difference_type;
00230         typedef Xapian::doccount size_type;
00232 
00236         std::string get_description() const;
00237 };
00238 
00242 class MSetIterator {
00243     private:
00244         friend class MSet;
00245         friend bool operator==(const MSetIterator &a, const MSetIterator &b);
00246         friend bool operator!=(const MSetIterator &a, const MSetIterator &b);
00247 
00248         MSetIterator(Xapian::doccount index_, const MSet & mset_)
00249             : index(index_), mset(mset_) { }
00250 
00251         Xapian::doccount index;
00252         MSet mset;
00253 
00254     public:
00258         MSetIterator() : index(0), mset() { }
00259 
00260         ~MSetIterator() { }
00261 
00263         MSetIterator(const MSetIterator &other) {
00264             index = other.index;
00265             mset = other.mset;
00266         }
00267 
00269         void operator=(const MSetIterator &other) {
00270             index = other.index;
00271             mset = other.mset;
00272         }
00273 
00275         MSetIterator & operator++() {
00276             ++index;
00277             return *this;
00278         }
00279 
00281         MSetIterator operator++(int) {
00282             MSetIterator tmp = *this;
00283             ++index;
00284             return tmp;
00285         }
00286 
00288         MSetIterator & operator--() {
00289             --index;
00290             return *this;
00291         }
00292 
00294         MSetIterator operator--(int) {
00295             MSetIterator tmp = *this;
00296             --index;
00297             return tmp;
00298         }
00299 
00301         Xapian::docid operator*() const;
00302 
00321         Xapian::Document get_document() const;
00322 
00329         Xapian::doccount get_rank() const {
00330             return mset.get_firstitem() + index;
00331         }
00332 
00334         Xapian::weight get_weight() const;
00335 
00352         Xapian::doccount get_collapse_count() const;
00353 
00359         Xapian::percent get_percent() const;
00360 
00364         std::string get_description() const;
00365 
00367 
00368         typedef std::bidirectional_iterator_tag iterator_category; // FIXME: could enhance to be a randomaccess_iterator
00369         typedef Xapian::docid value_type;
00370         typedef Xapian::doccount_diff difference_type;
00371         typedef Xapian::docid * pointer;
00372         typedef Xapian::docid & reference;
00374 };
00375 
00376 inline bool operator==(const MSetIterator &a, const MSetIterator &b)
00377 {
00378     return (a.index == b.index);
00379 }
00380 
00381 inline bool operator!=(const MSetIterator &a, const MSetIterator &b)
00382 {
00383     return (a.index != b.index);
00384 }
00385 
00386 class ESetIterator;
00387 
00392 class ESet {
00393     public:
00394         class Internal;
00396         Xapian::Internal::RefCntPtr<Internal> internal;
00397 
00399         ESet();
00400 
00402         ~ESet();
00403 
00405         ESet(const ESet & other);
00406 
00408         void operator=(const ESet &other);
00409 
00414         Xapian::termcount get_ebound() const;
00415 
00417         Xapian::termcount size() const;
00418 
00420         Xapian::termcount max_size() const { return size(); }
00421 
00423         bool empty() const;
00424 
00426         void swap(ESet & other);
00427 
00429         ESetIterator begin() const;
00430 
00432         ESetIterator end() const;
00433 
00435         ESetIterator back() const;
00436 
00438         ESetIterator operator[](Xapian::termcount i) const;
00439 
00444         std::string get_description() const;
00445 };
00446 
00448 class ESetIterator {
00449     private:
00450         friend class ESet;
00451         friend bool operator==(const ESetIterator &a, const ESetIterator &b);
00452         friend bool operator!=(const ESetIterator &a, const ESetIterator &b);
00453 
00454         ESetIterator(Xapian::termcount index_, const ESet & eset_)
00455             : index(index_), eset(eset_) { }
00456 
00457         Xapian::termcount index;
00458         ESet eset;
00459 
00460     public:
00464         ESetIterator() : index(0), eset() { }
00465 
00466         ~ESetIterator() { }
00467 
00469         ESetIterator(const ESetIterator &other) {
00470             index = other.index;
00471             eset = other.eset;
00472         }
00473 
00475         void operator=(const ESetIterator &other) {
00476             index = other.index;
00477             eset = other.eset;
00478         }
00479 
00481         ESetIterator & operator++() {
00482             ++index;
00483             return *this;
00484         }
00485 
00487         ESetIterator operator++(int) {
00488             ESetIterator tmp = *this;
00489             ++index;
00490             return tmp;
00491         }
00492 
00494         ESetIterator & operator--() {
00495             --index;
00496             return *this;
00497         }
00498 
00500         ESetIterator operator--(int) {
00501             ESetIterator tmp = *this;
00502             --index;
00503             return tmp;
00504         }
00505 
00507         const std::string & operator *() const;
00508 
00510         Xapian::weight get_weight() const;
00511 
00515         std::string get_description() const;
00516 
00518 
00519         typedef std::bidirectional_iterator_tag iterator_category; // FIXME: go for randomaccess_iterator!
00520         typedef std::string value_type;
00521         typedef Xapian::termcount_diff difference_type;
00522         typedef std::string * pointer;
00523         typedef std::string & reference;
00525 };
00526 
00527 inline bool operator==(const ESetIterator &a, const ESetIterator &b)
00528 {
00529     return (a.index == b.index);
00530 }
00531 
00532 inline bool operator!=(const ESetIterator &a, const ESetIterator &b)
00533 {
00534     return (a.index != b.index);
00535 }
00536 
00541 class RSet {
00542     public:
00544         class Internal;
00545 
00547         Xapian::Internal::RefCntPtr<Internal> internal;
00548 
00550         RSet(const RSet &rset);
00551 
00553         void operator=(const RSet &rset);
00554 
00556         RSet();
00557 
00559         ~RSet();
00560 
00562         Xapian::doccount size() const;
00563 
00565         bool empty() const;
00566 
00568         void add_document(Xapian::docid did);
00569 
00571         void add_document(const Xapian::MSetIterator & i) { add_document(*i); }
00572 
00574         void remove_document(Xapian::docid did);
00575 
00577         void remove_document(const Xapian::MSetIterator & i) { remove_document(*i); }
00578 
00580         bool contains(Xapian::docid did) const;
00581 
00583         bool contains(const Xapian::MSetIterator & i) { return contains(*i); }
00584 
00589         std::string get_description() const;
00590 };
00591 
00594 class MatchDecider {
00595     public:
00598         virtual int operator()(const Xapian::Document &doc) const = 0;
00599 
00601         virtual ~MatchDecider() {}
00602 };
00603 
00606 class ExpandDecider {
00607     public:
00610         virtual int operator()(const std::string & tname) const = 0;
00611 
00613         virtual ~ExpandDecider() {}
00614 };
00615 
00626 class Enquire {
00627     private:
00629         Enquire(const Enquire &);
00630 
00632         void operator=(const Enquire &);
00633 
00634     public:
00635         class Internal;
00637         Xapian::Internal::RefCntPtr<Internal> internal;
00638 
00654         Enquire(const Database &databases, ErrorHandler * errorhandler_ = 0);
00655 
00658         ~Enquire();
00659 
00666         void set_query(const Xapian::Query & query, Xapian::termcount qlen = 0);
00667 
00674         const Xapian::Query & get_query();
00675 
00682         void set_weighting_scheme(const Weight &weight_);
00683 
00710         void set_collapse_key(Xapian::valueno collapse_key);
00711 
00712         typedef enum {
00713             ASCENDING = 1,
00714             DESCENDING = 0,
00715             DONT_CARE = 2
00716         } docid_order;
00717 
00741         void set_docid_order(docid_order order);
00742 
00747         void set_sort_forward(bool sort_forward) {
00748             set_docid_order(sort_forward ? ASCENDING : DESCENDING);
00749         }
00750 
00769         void set_cutoff(Xapian::percent percent_cutoff, Xapian::weight weight_cutoff = 0);
00770 
00776         void set_sorting(Xapian::valueno sort_key, int sort_bands,
00777                          bool sort_by_relevance = false) {
00778             if (sort_bands > 1) {
00779                 throw Xapian::UnimplementedError("sort bands are no longer supported");
00780             }
00781             if (sort_bands == 0 || sort_key == Xapian::valueno(-1)) {
00782                 set_sort_by_relevance();
00783             } else if (!sort_by_relevance) {
00784                 set_sort_by_value(sort_key);
00785             } else {
00786                 set_sort_by_value_then_relevance(sort_key);
00787             }
00788         }
00789 
00792         void set_sort_by_relevance();
00793 
00804         void set_sort_by_value(Xapian::valueno sort_key, bool ascending = true);
00805         void set_sort_by_value_then_relevance(Xapian::valueno sort_key,
00806                                               bool ascending = true);
00807 // FIXME: consider implementing this:
00808 //      void set_sort_by_relevance_then_value(Xapian::valueno sort_key,
00809 //                                            bool ascending);
00810 
00822         void set_bias(Xapian::weight bias_weight, time_t bias_halflife);
00823 
00849         MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00850                       Xapian::doccount checkatleast = 0,
00851                       const RSet * omrset = 0,
00852                       const MatchDecider * mdecider = 0) const;
00853         MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00854                       const RSet * omrset,
00855                       const MatchDecider * mdecider = 0) const {
00856             return get_mset(first, maxitems, 0, omrset, mdecider);
00857         }
00858 
00859         static const int include_query_terms = 1;
00860         static const int use_exact_termfreq = 2;
00883         ESet get_eset(Xapian::termcount maxitems,
00884                         const RSet & omrset,
00885                         int flags = 0,
00886                         double k = 1.0,
00887                         const Xapian::ExpandDecider * edecider = 0) const;
00888 
00902         inline ESet get_eset(Xapian::termcount maxitems, const RSet & omrset,
00903                                const Xapian::ExpandDecider * edecider) const {
00904             return get_eset(maxitems, omrset, 0, 1.0, edecider);
00905         }
00906 
00935         TermIterator get_matching_terms_begin(Xapian::docid did) const;
00936 
00938         TermIterator get_matching_terms_end(Xapian::docid did) const;
00939 
00962         TermIterator get_matching_terms_begin(const MSetIterator &it) const;
00963 
00965         TermIterator get_matching_terms_end(const MSetIterator &it) const;
00966 
00973         void register_match_decider(const std::string &name,
00974                                     const MatchDecider *mdecider = NULL);
00975 
00979         std::string get_description() const;
00980 };
00981 
00982 }
00983 
00984 class SocketServer;
00985 
00986 namespace Xapian {
00987 
00989 class Weight {
00990     friend class Enquire; // So Enquire can clone us
00991     friend class ::SocketServer; // So SocketServer can clone us - FIXME
00992     public:
00993         class Internal;
00994     protected:
00995         Weight(const Weight &);
00996     private:
00997         void operator=(Weight &);
00998 
01008         virtual Weight * clone() const = 0;
01009 
01010     protected:
01011         const Internal * internal; // Weight::Internal == StatsSource
01012         Xapian::doclength querysize;
01013         Xapian::termcount wqf;
01014         std::string tname;
01015 
01016     public:
01017         Weight() { }
01018         virtual ~Weight() { }
01019 
01032         Weight * create(const Internal * internal_, Xapian::doclength querysize_,
01033                           Xapian::termcount wqf_, std::string tname_) const {
01034             Weight * wt = clone();
01035             wt->internal = internal_;
01036             wt->querysize = querysize_;
01037             wt->wqf = wqf_;
01038             wt->tname = tname_;
01039             return wt;
01040         }
01041 
01046         virtual std::string name() const = 0;
01047 
01049         virtual std::string serialise() const = 0;
01050 
01052         virtual Weight * unserialise(const std::string &s) const = 0;
01053 
01061         virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
01062                                       Xapian::doclength len) const = 0;
01063 
01069         virtual Xapian::weight get_maxpart() const = 0;
01070 
01079         virtual Xapian::weight get_sumextra(Xapian::doclength len) const = 0;
01080 
01084         virtual Xapian::weight get_maxextra() const = 0;
01085 
01087         virtual bool get_sumpart_needs_doclength() const { return true; }
01088 };
01089 
01091 class BoolWeight : public Weight {
01092     public:
01093         BoolWeight * clone() const {
01094             return new BoolWeight;
01095         }
01096         BoolWeight() { }
01097         ~BoolWeight() { }
01098         std::string name() const { return "Bool"; }
01099         std::string serialise() const { return ""; }
01100         BoolWeight * unserialise(const std::string & /*s*/) const {
01101             return new BoolWeight;
01102         }
01103         Xapian::weight get_sumpart(Xapian::termcount /*wdf*/, Xapian::doclength /*len*/) const { return 0; }
01104         Xapian::weight get_maxpart() const { return 0; }
01105 
01106         Xapian::weight get_sumextra(Xapian::doclength /*len*/) const { return 0; }
01107         Xapian::weight get_maxextra() const { return 0; }
01108 
01109         bool get_sumpart_needs_doclength() const { return false; }
01110 };
01111 
01124 class BM25Weight : public Weight {
01125     private:
01126         mutable Xapian::weight termweight;
01127         mutable Xapian::doclength lenpart;
01128 
01129         double k1, k2, k3, b;
01130         Xapian::doclength min_normlen;
01131 
01132         mutable bool weight_calculated;
01133 
01134         void calc_termweight() const;
01135 
01136     public:
01155         BM25Weight(double k1_, double k2_, double k3_, double b_,
01156                    double min_normlen_)
01157                 : k1(k1_), k2(k2_), k3(k3_), b(b_), min_normlen(min_normlen_),
01158                   weight_calculated(false)
01159         {
01160             if (k1 < 0) k1 = 0;
01161             if (k2 < 0) k2 = 0;
01162             if (k3 < 0) k3 = 0;
01163             if (b < 0) b = 0; else if (b > 1) b = 1;
01164         }
01165         BM25Weight() : k1(1), k2(0), k3(1), b(0.5), min_normlen(0.5),
01166                        weight_calculated(false) { }
01167 
01168         BM25Weight * clone() const;
01169         ~BM25Weight() { }
01170         std::string name() const;
01171         std::string serialise() const;
01172         BM25Weight * unserialise(const std::string & s) const;
01173         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01174         Xapian::weight get_maxpart() const;
01175 
01176         Xapian::weight get_sumextra(Xapian::doclength len) const;
01177         Xapian::weight get_maxextra() const;
01178 
01179         bool get_sumpart_needs_doclength() const;
01180 };
01181 
01195 class TradWeight : public Weight {
01196     private:
01197         mutable Xapian::weight termweight;
01198         mutable Xapian::doclength lenpart;
01199 
01200         double param_k;
01201 
01202         mutable bool weight_calculated;
01203 
01204         void calc_termweight() const;
01205 
01206     public:
01214         explicit TradWeight(double k) : param_k(k), weight_calculated(false) {
01215             if (param_k < 0) param_k = 0;
01216         }
01217 
01218         TradWeight() : param_k(1.0), weight_calculated(false) { }
01219 
01220         TradWeight * clone() const;
01221         ~TradWeight() { }
01222         std::string name() const;
01223         std::string serialise() const;
01224         TradWeight * unserialise(const std::string & s) const;
01225 
01226         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01227         Xapian::weight get_maxpart() const;
01228 
01229         Xapian::weight get_sumextra(Xapian::doclength len) const;
01230         Xapian::weight get_maxextra() const;
01231 
01232         bool get_sumpart_needs_doclength() const;
01233 };
01234 
01235 }
01236 
01237 #endif /* XAPIAN_INCLUDED_ENQUIRE_H */

Documentation for Xapian (version 0.9.3).
Generated on 16 Feb 2006 by Doxygen 1.4.6.