00001
00025 #ifndef XAPIAN_INCLUDED_ENQUIRE_H
00026 #define XAPIAN_INCLUDED_ENQUIRE_H
00027
00028 #include <string>
00029 #include <xapian/types.h>
00030
00031 namespace Xapian {
00032
00033 class Document;
00034 class Database;
00035 class ErrorHandler;
00036
00037 class Query;
00038 class Weight;
00039
00044 class MSetIterator {
00045 private:
00046 MSetIterator(Internal *internal_);
00047
00048 public:
00049 friend class MSet;
00050
00051 class Internal;
00053 Internal *internal;
00054
00055 friend bool operator==(const MSetIterator &a, const MSetIterator &b);
00056
00060 MSetIterator();
00061
00062 ~MSetIterator();
00063
00065 MSetIterator(const MSetIterator &other);
00066
00068 void operator=(const MSetIterator &other);
00069
00071 MSetIterator & operator++();
00072
00073 void operator++(int);
00074
00076 docid operator *() const;
00077
00094 Document get_document() const;
00095
00102 doccount get_rank() const;
00103
00105 weight get_weight() const;
00106
00111 percent get_percent() const;
00112
00116 std::string get_description() const;
00117
00119
00120 typedef std::input_iterator_tag iterator_category;
00121 typedef docid value_type;
00122 typedef doccount_diff difference_type;
00123 typedef docid * pointer;
00124 typedef docid & reference;
00126 };
00127
00128 inline bool operator!=(const MSetIterator &a, const MSetIterator &b)
00129 {
00130 return !(a == b);
00131 }
00132
00136 class MSet {
00137 public:
00138 class Internal;
00140 Internal *internal;
00141
00142
00144
00145
00147 MSet();
00148
00150 ~MSet();
00151
00153 MSet(const MSet & other);
00154
00156 void operator=(const MSet &other);
00157
00173 void fetch(const MSetIterator &begin, const MSetIterator &end) const;
00174
00177 void fetch(const MSetIterator &item) const;
00178
00181 void fetch() const;
00182
00187 percent convert_to_percent(weight wt) const;
00188
00190 percent convert_to_percent(const MSetIterator &it) const;
00191
00199 doccount get_termfreq(const termname &tname) const;
00200
00208 weight get_termweight(const termname &tname) const;
00209
00216 doccount get_firstitem() const;
00217
00224 doccount get_matches_lower_bound() const;
00225
00235 doccount get_matches_estimated() const;
00236
00243 doccount get_matches_upper_bound() const;
00244
00250 weight get_max_possible() const;
00251
00265 weight get_max_attained() const;
00266
00267 doccount size() const;
00268
00269 doccount max_size() const;
00270
00271 bool empty() const;
00272
00273 void swap(MSet & other);
00274
00275 MSetIterator begin() const;
00276
00277 MSetIterator end() const;
00278
00279 MSetIterator back() const;
00280
00290 MSetIterator operator[](doccount i) const;
00291
00293
00294 typedef std::input_iterator_tag iterator_category;
00295 typedef MSetIterator value_type;
00296 typedef MSetIterator iterator;
00297 typedef MSetIterator const_iterator;
00298 typedef MSetIterator & reference;
00299 typedef MSetIterator & const_reference;
00300 typedef MSetIterator * pointer;
00301 typedef doccount_diff difference_type;
00302 typedef doccount size_type;
00304
00308 std::string get_description() const;
00309 };
00310
00312 class ESetIterator {
00313 private:
00314
00315 ESetIterator(Internal *internal_);
00316
00317 public:
00318 friend class ESet;
00319 class Internal;
00321 Internal *internal;
00322
00323 friend bool operator==(const ESetIterator &a, const ESetIterator &b);
00324
00328 ESetIterator();
00329
00331 ~ESetIterator();
00332
00334 ESetIterator(const ESetIterator &other);
00335
00337 void operator=(const ESetIterator &other);
00338
00339 ESetIterator & operator++();
00340
00341 void operator++(int);
00342
00344 const termname & operator *() const;
00345
00347 weight get_weight() const;
00348
00352 std::string get_description() const;
00353
00355
00356 typedef std::input_iterator_tag iterator_category;
00357 typedef termname value_type;
00358 typedef termcount_diff difference_type;
00359 typedef termname * pointer;
00360 typedef termname & reference;
00362 };
00363
00364 inline bool
00365 operator!=(const ESetIterator &a, const ESetIterator &b)
00366 {
00367 return !(a == b);
00368 }
00369
00375 class ESet {
00376 public:
00377 class Internal;
00379 Internal *internal;
00380
00382 ESet();
00383
00385 ~ESet();
00386
00388 ESet(const ESet & other);
00389
00391 void operator=(const ESet &other);
00392
00397 termcount get_ebound() const;
00398
00400 termcount size() const;
00401
00403 bool empty() const;
00404
00406 ESetIterator begin() const;
00407
00409 ESetIterator end() const;
00410
00415 std::string get_description() const;
00416 };
00417
00422 class RSet {
00423 public:
00425 class Internal;
00427 Internal *internal;
00428
00430 RSet(const RSet &rset);
00431
00433 void operator=(const RSet &rset);
00434
00436 RSet();
00437
00439 ~RSet();
00440
00442 doccount size() const;
00443
00445 bool empty() const;
00446
00448 void add_document(docid did);
00449
00451 void add_document(const MSetIterator & i) { add_document(*i); }
00452
00454 void remove_document(docid did);
00455
00457 void remove_document(const MSetIterator & i) { remove_document(*i); }
00458
00460 bool contains(docid did) const;
00461
00463 bool contains(const MSetIterator & i) { return contains(*i); }
00464
00469 std::string get_description() const;
00470 };
00471
00474 class MatchDecider {
00475 public:
00478 virtual int operator()(const Document &doc) const = 0;
00479
00481 virtual ~MatchDecider() {}
00482 };
00483
00486 class ExpandDecider {
00487 public:
00490 virtual int operator()(const termname & tname) const = 0;
00491
00493 virtual ~ExpandDecider() {}
00494 };
00495
00509 class Enquire {
00510 private:
00512 Enquire(const Enquire &);
00513
00515 void operator=(const Enquire &);
00516
00517 public:
00518 class Internal;
00520 Internal *internal;
00521
00537 Enquire(const Database &databases, ErrorHandler * errorhandler_ = 0);
00538
00547 ~Enquire();
00548
00556 void set_query(const Query & query_);
00557
00564 const Query & get_query();
00565
00572 void set_weighting_scheme(const Weight &weight_);
00573
00580 void set_collapse_key(valueno collapse_key);
00581
00588 void set_sort_forward(bool sort_forward);
00589
00607 void set_cutoff(int percent_cutoff, weight weight_cutoff = 0);
00608
00622 void set_sorting(valueno sort_key, int sort_bands);
00623
00635 void set_bias(weight bias_weight, time_t bias_halflife);
00636
00657 MSet get_mset(doccount first, doccount maxitems, const RSet * rset = 0,
00658 const MatchDecider * mdecider = 0) const;
00659
00660 static const int include_query_terms = 1;
00661 static const int use_exact_termfreq = 2;
00685 ESet get_eset(termcount maxitems, const RSet & rset, int flags = 0,
00686 double k = 1.0, const ExpandDecider * edecider = 0) const;
00687
00702 ESet get_eset(termcount maxitems, const RSet & rset,
00703 const ExpandDecider * edecider) const {
00704 return get_eset(maxitems, rset, 0, 1.0, edecider);
00705 }
00706
00736 TermIterator get_matching_terms_begin(docid did) const;
00737
00739 TermIterator get_matching_terms_end(docid did) const;
00740
00764 TermIterator get_matching_terms_begin(const MSetIterator &it) const;
00765
00767 TermIterator get_matching_terms_end(const MSetIterator &it) const;
00768
00771 void register_match_decider(const std::string &name,
00772 const MatchDecider *mdecider = NULL);
00773
00777 std::string get_description() const;
00778 };
00779
00780 class SocketServer;
00781
00783 class Weight {
00784 public:
00785 class Internal;
00786 friend class Enquire;
00787 friend class SocketServer;
00788 private:
00789 Weight(const Weight &);
00790 void operator=(Weight &);
00791
00793
00794
00795
00796
00797
00798 virtual Weight * clone() const = 0;
00799
00800 protected:
00801 const Internal * internal;
00802 doclength querysize;
00803 termcount wqf;
00804 termname tname;
00805
00806 public:
00807 Weight() { }
00808 virtual ~Weight() { }
00809
00821 Weight * create(const Internal * internal_, doclength querysize_,
00822 termcount wqf_, termname tname_) const {
00823 Weight * wt = clone();
00824 wt->internal = internal_;
00825 wt->querysize = querysize_;
00826 wt->wqf = wqf_;
00827 wt->tname = tname_;
00828 return wt;
00829 }
00830
00832
00833
00834 virtual std::string name() const = 0;
00835
00837 virtual std::string serialise() const = 0;
00838
00840 virtual Weight * Weight::unserialise(const std::string &s) const = 0;
00841
00849 virtual weight get_sumpart(termcount wdf, doclength len) const = 0;
00850
00856 virtual weight get_maxpart() const = 0;
00857
00866 virtual weight get_sumextra(doclength len) const = 0;
00867
00871 virtual weight get_maxextra() const = 0;
00872
00874 virtual bool get_sumpart_needs_doclength() const { return true; }
00875 };
00876
00878 class BoolWeight : public Weight {
00879 public:
00880 Weight * clone() const {
00881 return new BoolWeight;
00882 }
00883 BoolWeight() { }
00884 ~BoolWeight() { }
00885 std::string name() const { return "Bool"; }
00886 std::string serialise() const { return ""; }
00887 Weight * unserialise(const std::string & ) const {
00888 return new BoolWeight;
00889 }
00890 weight get_sumpart(termcount , doclength ) const {
00891 return 0;
00892 }
00893 weight get_maxpart() const { return 0; }
00894
00895 weight get_sumextra(doclength ) const { return 0; }
00896 weight get_maxextra() const { return 0; }
00897
00898 bool get_sumpart_needs_doclength() const { return false; }
00899 };
00900
00902
00903
00904
00905
00906
00907
00908
00909
00910
00911
00912 class BM25Weight : public Weight {
00913 private:
00914 mutable weight termweight;
00915 mutable doclength lenpart;
00916 mutable double BD;
00917
00918 double A, B, C, D;
00919 doclength min_normlen;
00920
00921 mutable bool weight_calculated;
00922
00923 void calc_termweight() const;
00924
00925 public:
00944 BM25Weight(double A_, double B_, double C_, double D_,
00945 double min_normlen_)
00946 : A(A_), B(B_), C(C_), D(D_), min_normlen(min_normlen_),
00947 weight_calculated(false)
00948 {
00949 if (A < 0) A = 0;
00950 if (B < 0) B = 0;
00951 if (C < 0) C = 0;
00952 if (D < 0) D = 0; else if (D > 1) D = 1;
00953 }
00954 BM25Weight() : A(1), B(1), C(0), D(0.5), min_normlen(0.5),
00955 weight_calculated(false) { }
00956
00957 Weight * clone() const {
00958 return new BM25Weight(A, B, C, D, min_normlen);
00959 }
00960 ~BM25Weight() { }
00961 std::string name() const { return "BM25"; }
00962 std::string serialise() const;
00963 Weight * unserialise(const std::string & s) const;
00964 weight get_sumpart(termcount wdf, doclength len) const;
00965 weight get_maxpart() const;
00966
00967 weight get_sumextra(doclength len) const;
00968 weight get_maxextra() const;
00969
00970 bool get_sumpart_needs_doclength() const { return (lenpart != 0); }
00971 };
00972
00974
00975
00976
00977
00978
00979
00980
00981
00982
00983
00984 class TradWeight : public Weight {
00985 private:
00986 mutable weight termweight;
00987 mutable doclength lenpart;
00988
00989 double param_k;
00990
00991 mutable bool weight_calculated;
00992
00993 void calc_termweight() const;
00994
00995 public:
00997
00998
00999
01000
01001
01002 TradWeight(double k = 1) : param_k(k), weight_calculated(false) {
01003 if (param_k < 0) param_k = 0;
01004 }
01005 Weight * clone() const {
01006 return new TradWeight(param_k);
01007 }
01008 ~TradWeight() { }
01009 std::string name() const { return "Trad"; }
01010 std::string serialise() const;
01011 Weight * unserialise(const std::string & s) const;
01012
01013 weight get_sumpart(termcount wdf, doclength len) const;
01014 weight get_maxpart() const;
01015
01016 weight get_sumextra(doclength len) const;
01017 weight get_maxextra() const;
01018
01019 bool get_sumpart_needs_doclength() const { return (lenpart != 0); }
01020 };
01021
01022 };
01023
01024 #endif