00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #ifndef XAPIAN_INCLUDED_ENQUIRE_H
00027 #define XAPIAN_INCLUDED_ENQUIRE_H
00028
00029 #include <string>
00030 #include <time.h>
00031
00032 #include <xapian/base.h>
00033 #include <xapian/types.h>
00034
00035 namespace Xapian {
00036
00037 class Database;
00038 class Document;
00039 class ErrorHandler;
00040 class MSetIterator;
00041 class Query;
00042 class TermIterator;
00043 class Weight;
00044
00048 class MSet {
00049 public:
00050 class Internal;
00052 Xapian::Internal::RefCntPtr<Internal> internal;
00053
00054 public:
00055
00057
00058
00060 MSet();
00061
00063 ~MSet();
00064
00066 MSet(const MSet & other);
00067
00069 void operator=(const MSet &other);
00070
00086 void fetch(const MSetIterator &begin, const MSetIterator &end) const;
00087
00090 void fetch(const MSetIterator &item) const;
00091
00094 void fetch() const;
00095
00100 Xapian::percent convert_to_percent(Xapian::weight wt) const;
00101
00103 Xapian::percent convert_to_percent(const MSetIterator &it) const;
00104
00112 Xapian::doccount get_termfreq(const std::string &tname) const;
00113
00121 Xapian::weight get_termweight(const std::string &tname) const;
00122
00130 Xapian::doccount get_firstitem() const;
00131
00141 Xapian::doccount get_matches_lower_bound() const;
00142
00155 Xapian::doccount get_matches_estimated() const;
00156
00166 Xapian::doccount get_matches_upper_bound() const;
00167
00173 Xapian::weight get_max_possible() const;
00174
00188 Xapian::weight get_max_attained() const;
00189
00190 Xapian::doccount size() const;
00191
00192 Xapian::doccount max_size() const;
00193
00194 bool empty() const;
00195
00196 void swap(MSet & other);
00197
00198 MSetIterator begin() const;
00199
00200 MSetIterator end() const;
00201
00202 MSetIterator back() const;
00203
00213 MSetIterator operator[](Xapian::doccount i) const;
00214
00216
00217 typedef MSetIterator value_type;
00218 typedef MSetIterator iterator;
00219 typedef MSetIterator const_iterator;
00220 typedef MSetIterator & reference;
00221 typedef MSetIterator & const_reference;
00222 typedef MSetIterator * pointer;
00223 typedef Xapian::doccount_diff difference_type;
00224 typedef Xapian::doccount size_type;
00226
00230 std::string get_description() const;
00231 };
00232
00236 class MSetIterator {
00237 private:
00238 friend class MSet;
00239 friend bool operator==(const MSetIterator &a, const MSetIterator &b);
00240 friend bool operator!=(const MSetIterator &a, const MSetIterator &b);
00241
00242 MSetIterator(Xapian::doccount index_, const MSet & mset_)
00243 : index(index_), mset(mset_) { }
00244
00245 Xapian::doccount index;
00246 MSet mset;
00247
00248 public:
00252 MSetIterator() : index(0), mset() { }
00253
00254 ~MSetIterator() { }
00255
00257 MSetIterator(const MSetIterator &other) {
00258 index = other.index;
00259 mset = other.mset;
00260 }
00261
00263 void operator=(const MSetIterator &other) {
00264 index = other.index;
00265 mset = other.mset;
00266 }
00267
00269 MSetIterator & operator++() {
00270 ++index;
00271 return *this;
00272 }
00273
00275 MSetIterator operator++(int) {
00276 MSetIterator tmp = *this;
00277 ++index;
00278 return tmp;
00279 }
00280
00282 MSetIterator & operator--() {
00283 --index;
00284 return *this;
00285 }
00286
00288 MSetIterator operator--(int) {
00289 MSetIterator tmp = *this;
00290 --index;
00291 return tmp;
00292 }
00293
00295 Xapian::docid operator*() const;
00296
00315 Xapian::Document get_document() const;
00316
00323 Xapian::doccount get_rank() const {
00324 return mset.get_firstitem() + index;
00325 }
00326
00328 Xapian::weight get_weight() const;
00329
00346 Xapian::doccount get_collapse_count() const;
00347
00353 Xapian::percent get_percent() const;
00354
00358 std::string get_description() const;
00359
00361
00362 typedef std::bidirectional_iterator_tag iterator_category;
00363 typedef Xapian::docid value_type;
00364 typedef Xapian::doccount_diff difference_type;
00365 typedef Xapian::docid * pointer;
00366 typedef Xapian::docid & reference;
00368 };
00369
00370 inline bool operator==(const MSetIterator &a, const MSetIterator &b)
00371 {
00372 return (a.index == b.index);
00373 }
00374
00375 inline bool operator!=(const MSetIterator &a, const MSetIterator &b)
00376 {
00377 return (a.index != b.index);
00378 }
00379
00380 class ESetIterator;
00381
00386 class ESet {
00387 public:
00388 class Internal;
00390 Xapian::Internal::RefCntPtr<Internal> internal;
00391
00393 ESet();
00394
00396 ~ESet();
00397
00399 ESet(const ESet & other);
00400
00402 void operator=(const ESet &other);
00403
00408 Xapian::termcount get_ebound() const;
00409
00411 Xapian::termcount size() const;
00412
00414 bool empty() const;
00415
00417 ESetIterator begin() const;
00418
00420 ESetIterator end() const;
00421
00426 std::string get_description() const;
00427 };
00428
00430 class ESetIterator {
00431 private:
00432 friend class ESet;
00433 friend bool operator==(const ESetIterator &a, const ESetIterator &b);
00434 friend bool operator!=(const ESetIterator &a, const ESetIterator &b);
00435
00436 ESetIterator(Xapian::termcount index_, const ESet & eset_)
00437 : index(index_), eset(eset_) { }
00438
00439 Xapian::termcount index;
00440 ESet eset;
00441
00442 public:
00446 ESetIterator() : index(0), eset() { }
00447
00448 ~ESetIterator() { }
00449
00451 ESetIterator(const ESetIterator &other) {
00452 index = other.index;
00453 eset = other.eset;
00454 }
00455
00457 void operator=(const ESetIterator &other) {
00458 index = other.index;
00459 eset = other.eset;
00460 }
00461
00463 ESetIterator & operator++() {
00464 ++index;
00465 return *this;
00466 }
00467
00469 ESetIterator operator++(int) {
00470 ESetIterator tmp = *this;
00471 ++index;
00472 return tmp;
00473 }
00474
00476 ESetIterator & operator--() {
00477 --index;
00478 return *this;
00479 }
00480
00482 ESetIterator operator--(int) {
00483 ESetIterator tmp = *this;
00484 --index;
00485 return tmp;
00486 }
00487
00489 const std::string & operator *() const;
00490
00492 Xapian::weight get_weight() const;
00493
00497 std::string get_description() const;
00498
00500
00501 typedef std::bidirectional_iterator_tag iterator_category;
00502 typedef std::string value_type;
00503 typedef Xapian::termcount_diff difference_type;
00504 typedef std::string * pointer;
00505 typedef std::string & reference;
00507 };
00508
00509 inline bool operator==(const ESetIterator &a, const ESetIterator &b)
00510 {
00511 return (a.index == b.index);
00512 }
00513
00514 inline bool operator!=(const ESetIterator &a, const ESetIterator &b)
00515 {
00516 return (a.index != b.index);
00517 }
00518
00523 class RSet {
00524 public:
00526 class Internal;
00527
00529 Internal *internal;
00530
00532 RSet(const RSet &rset);
00533
00535 void operator=(const RSet &rset);
00536
00538 RSet();
00539
00541 ~RSet();
00542
00544 Xapian::doccount size() const;
00545
00547 bool empty() const;
00548
00550 void add_document(Xapian::docid did);
00551
00553 void add_document(const Xapian::MSetIterator & i) { add_document(*i); }
00554
00556 void remove_document(Xapian::docid did);
00557
00559 void remove_document(const Xapian::MSetIterator & i) { remove_document(*i); }
00560
00562 bool contains(Xapian::docid did) const;
00563
00565 bool contains(const Xapian::MSetIterator & i) { return contains(*i); }
00566
00571 std::string get_description() const;
00572 };
00573
00576 class MatchDecider {
00577 public:
00580 virtual int operator()(const Xapian::Document &doc) const = 0;
00581
00583 virtual ~MatchDecider() {}
00584 };
00585
00588 class ExpandDecider {
00589 public:
00592 virtual int operator()(const std::string & tname) const = 0;
00593
00595 virtual ~ExpandDecider() {}
00596 };
00597
00611 class Enquire {
00612 private:
00614 Enquire(const Enquire &);
00615
00617 void operator=(const Enquire &);
00618
00619 public:
00620 class Internal;
00622 Xapian::Internal::RefCntPtr<Internal> internal;
00623
00639 Enquire(const Database &databases, ErrorHandler * errorhandler_ = 0);
00640
00643 ~Enquire();
00644
00652 void set_query(const Xapian::Query & query_);
00653
00660 const Xapian::Query & get_query();
00661
00668 void set_weighting_scheme(const Weight &weight_);
00669
00696 void set_collapse_key(Xapian::valueno collapse_key);
00697
00704 void set_sort_forward(bool sort_forward);
00705
00723 void set_cutoff(Xapian::percent percent_cutoff, Xapian::weight weight_cutoff = 0);
00724
00738 void set_sorting(Xapian::valueno sort_key, int sort_bands);
00739
00751 void set_bias(Xapian::weight bias_weight, time_t bias_halflife);
00752
00773 MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00774 const RSet * omrset = 0,
00775 const MatchDecider * mdecider = 0) const;
00776
00777 static const int include_query_terms = 1;
00778 static const int use_exact_termfreq = 2;
00802 ESet get_eset(Xapian::termcount maxitems,
00803 const RSet & omrset,
00804 int flags = 0,
00805 double k = 1.0,
00806 const Xapian::ExpandDecider * edecider = 0) const;
00807
00822 inline ESet get_eset(Xapian::termcount maxitems, const RSet & omrset,
00823 const Xapian::ExpandDecider * edecider) const {
00824 return get_eset(maxitems, omrset, 0, 1.0, edecider);
00825 }
00826
00856 TermIterator get_matching_terms_begin(Xapian::docid did) const;
00857
00859 TermIterator get_matching_terms_end(Xapian::docid did) const;
00860
00884 TermIterator get_matching_terms_begin(const MSetIterator &it) const;
00885
00887 TermIterator get_matching_terms_end(const MSetIterator &it) const;
00888
00891 void register_match_decider(const std::string &name,
00892 const MatchDecider *mdecider = NULL);
00893
00897 std::string get_description() const;
00898 };
00899
00900 }
00901
00902 class SocketServer;
00903
00904 namespace Xapian {
00905
00907 class Weight {
00908 friend class Enquire;
00909 friend class ::SocketServer;
00910 public:
00911 class Internal;
00912 protected:
00913 Weight(const Weight &);
00914 private:
00915 void operator=(Weight &);
00916
00924 virtual Weight * clone() const = 0;
00925
00926 protected:
00927 const Internal * internal;
00928 Xapian::doclength querysize;
00929 Xapian::termcount wqf;
00930 std::string tname;
00931
00932 public:
00933 Weight() { }
00934 virtual ~Weight() { }
00935
00948 Weight * create(const Internal * internal_, Xapian::doclength querysize_,
00949 Xapian::termcount wqf_, std::string tname_) const {
00950 Weight * wt = clone();
00951 wt->internal = internal_;
00952 wt->querysize = querysize_;
00953 wt->wqf = wqf_;
00954 wt->tname = tname_;
00955 return wt;
00956 }
00957
00962 virtual std::string name() const = 0;
00963
00965 virtual std::string serialise() const = 0;
00966
00968 virtual Weight * unserialise(const std::string &s) const = 0;
00969
00977 virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
00978 Xapian::doclength len) const = 0;
00979
00985 virtual Xapian::weight get_maxpart() const = 0;
00986
00995 virtual Xapian::weight get_sumextra(Xapian::doclength len) const = 0;
00996
01000 virtual Xapian::weight get_maxextra() const = 0;
01001
01003 virtual bool get_sumpart_needs_doclength() const { return true; }
01004 };
01005
01007 class BoolWeight : public Weight {
01008 public:
01009 Weight * clone() const {
01010 return new BoolWeight;
01011 }
01012 BoolWeight() { }
01013 ~BoolWeight() { }
01014 std::string name() const { return "Bool"; }
01015 std::string serialise() const { return ""; }
01016 Weight * unserialise(const std::string & ) const {
01017 return new BoolWeight;
01018 }
01019 Xapian::weight get_sumpart(Xapian::termcount , Xapian::doclength ) const { return 0; }
01020 Xapian::weight get_maxpart() const { return 0; }
01021
01022 Xapian::weight get_sumextra(Xapian::doclength ) const { return 0; }
01023 Xapian::weight get_maxextra() const { return 0; }
01024
01025 bool get_sumpart_needs_doclength() const { return false; }
01026 };
01027
01040 class BM25Weight : public Weight {
01041 private:
01042 mutable Xapian::weight termweight;
01043 mutable Xapian::doclength lenpart;
01044 mutable double BD;
01045
01046 double A, B, C, D;
01047 Xapian::doclength min_normlen;
01048
01049 mutable bool weight_calculated;
01050
01051 void calc_termweight() const;
01052
01053 public:
01072 BM25Weight(double A_, double B_, double C_, double D_,
01073 double min_normlen_)
01074 : A(A_), B(B_), C(C_), D(D_), min_normlen(min_normlen_),
01075 weight_calculated(false)
01076 {
01077 if (A < 0) A = 0;
01078 if (B < 0) B = 0;
01079 if (C < 0) C = 0;
01080 if (D < 0) D = 0; else if (D > 1) D = 1;
01081 }
01082 BM25Weight() : A(1), B(1), C(0), D(0.5), min_normlen(0.5),
01083 weight_calculated(false) { }
01084
01085 Weight * clone() const {
01086 return new BM25Weight(A, B, C, D, min_normlen);
01087 }
01088 ~BM25Weight() { }
01089 std::string name() const { return "BM25"; }
01090 std::string serialise() const;
01091 Weight * unserialise(const std::string & s) const;
01092 Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01093 Xapian::weight get_maxpart() const;
01094
01095 Xapian::weight get_sumextra(Xapian::doclength len) const;
01096 Xapian::weight get_maxextra() const;
01097
01098 bool get_sumpart_needs_doclength() const { return (lenpart != 0); }
01099 };
01100
01113 class TradWeight : public Weight {
01114 private:
01115 mutable Xapian::weight termweight;
01116 mutable Xapian::doclength lenpart;
01117
01118 double param_k;
01119
01120 mutable bool weight_calculated;
01121
01122 void calc_termweight() const;
01123
01124 public:
01132 explicit TradWeight(double k) : param_k(k), weight_calculated(false) {
01133 if (param_k < 0) param_k = 0;
01134 }
01135
01136 TradWeight() : param_k(1.0), weight_calculated(false) { }
01137
01138 Weight * clone() const {
01139 return new TradWeight(param_k);
01140 }
01141 ~TradWeight() { }
01142 std::string name() const { return "Trad"; }
01143 std::string serialise() const;
01144 Weight * unserialise(const std::string & s) const;
01145
01146 Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01147 Xapian::weight get_maxpart() const;
01148
01149 Xapian::weight get_sumextra(Xapian::doclength len) const;
01150 Xapian::weight get_maxextra() const;
01151
01152 bool get_sumpart_needs_doclength() const { return (lenpart != 0); }
01153 };
01154
01155 }
01156
01157 #endif