00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #ifndef OM_HGUARD_OMENQUIRE_H
00027 #define OM_HGUARD_OMENQUIRE_H
00028
00029 #include "om/omtypes.h"
00030 #include "om/omdocument.h"
00031 #include "om/omdatabase.h"
00032 #include "om/omerror.h"
00033 #include <string>
00034 #include <time.h>
00035
00036 class OmQuery;
00037 class OmErrorHandler;
00038 class OmWeight;
00039
00043 class OmMSetIterator {
00044 public:
00045 friend class OmMSet;
00046
00047 class Internal;
00049 Internal *internal;
00050
00051 friend bool operator==(const OmMSetIterator &a,
00052 const OmMSetIterator &b);
00053
00054 private:
00055 OmMSetIterator(Internal *internal_);
00056
00057 public:
00061 OmMSetIterator();
00062
00063 ~OmMSetIterator();
00064
00066 OmMSetIterator(const OmMSetIterator &other);
00067
00069 void operator=(const OmMSetIterator &other);
00070
00072 OmMSetIterator & operator++();
00073
00074 void operator++(int);
00075
00077 om_docid operator *() const;
00078
00097 OmDocument get_document() const;
00098
00105 om_doccount get_rank() const;
00106
00108 om_weight get_weight() const;
00109
00139 om_doccount get_collapse_count() const;
00140
00145 om_percent get_percent() const;
00146
00150 std::string get_description() const;
00151
00153
00154 typedef std::input_iterator_tag iterator_category;
00155 typedef om_docid value_type;
00156 typedef om_doccount_diff difference_type;
00157 typedef om_docid * pointer;
00158 typedef om_docid & reference;
00160 };
00161
00162 inline bool operator!=(const OmMSetIterator &a,
00163 const OmMSetIterator &b)
00164 {
00165 return !(a == b);
00166 }
00167
00171 class OmMSet {
00172 public:
00173 class Internal;
00175 Internal *internal;
00176
00177 public:
00178
00180
00181
00183 OmMSet();
00184
00186 ~OmMSet();
00187
00189 OmMSet(const OmMSet & other);
00190
00192 void operator=(const OmMSet &other);
00193
00209 void fetch(const OmMSetIterator &begin,
00210 const OmMSetIterator &end) const;
00211
00214 void fetch(const OmMSetIterator &item) const;
00215
00218 void fetch() const;
00219
00224 om_percent convert_to_percent(om_weight wt) const;
00225
00227 om_percent convert_to_percent(const OmMSetIterator &it) const;
00228
00236 om_doccount get_termfreq(const om_termname &tname) const;
00237
00245 om_weight get_termweight(const om_termname &tname) const;
00246
00253 om_doccount get_firstitem() const;
00254
00261 om_doccount get_matches_lower_bound() const;
00262
00272 om_doccount get_matches_estimated() const;
00273
00280 om_doccount get_matches_upper_bound() const;
00281
00287 om_weight get_max_possible() const;
00288
00302 om_weight get_max_attained() const;
00303
00304 om_doccount size() const;
00305
00306 om_doccount max_size() const;
00307
00308 bool empty() const;
00309
00310 void swap(OmMSet & other);
00311
00312 OmMSetIterator begin() const;
00313
00314 OmMSetIterator end() const;
00315
00316 OmMSetIterator back() const;
00317
00327 OmMSetIterator operator[](om_doccount i) const;
00328
00330
00331 typedef std::input_iterator_tag iterator_category;
00332 typedef OmMSetIterator value_type;
00333 typedef OmMSetIterator iterator;
00334 typedef OmMSetIterator const_iterator;
00335 typedef OmMSetIterator & reference;
00336 typedef OmMSetIterator & const_reference;
00337 typedef OmMSetIterator * pointer;
00338 typedef om_doccount_diff difference_type;
00339 typedef om_doccount size_type;
00341
00345 std::string get_description() const;
00346 };
00347
00349 class OmESetIterator {
00350 public:
00351 friend class OmESet;
00352 class Internal;
00354 Internal *internal;
00355
00356 friend bool operator==(const OmESetIterator &a,
00357 const OmESetIterator &b);
00358
00359 private:
00360
00361 OmESetIterator(Internal *internal_);
00362
00363 public:
00367 OmESetIterator();
00368
00370 ~OmESetIterator();
00371
00373 OmESetIterator(const OmESetIterator &other);
00374
00376 void operator=(const OmESetIterator &other);
00377
00378 OmESetIterator & operator++();
00379
00380 void operator++(int);
00381
00383 const om_termname & operator *() const;
00384
00386 om_weight get_weight() const;
00387
00391 std::string get_description() const;
00392
00394
00395 typedef std::input_iterator_tag iterator_category;
00396 typedef om_termname value_type;
00397 typedef om_termcount_diff difference_type;
00398 typedef om_termname * pointer;
00399 typedef om_termname & reference;
00401 };
00402
00403 inline bool
00404 operator!=(const OmESetIterator &a, const OmESetIterator &b)
00405 {
00406 return !(a == b);
00407 }
00408
00413 class OmESet {
00414 public:
00415 class Internal;
00417 Internal *internal;
00418
00420 OmESet();
00421
00423 ~OmESet();
00424
00426 OmESet(const OmESet & other);
00427
00429 void operator=(const OmESet &other);
00430
00435 om_termcount get_ebound() const;
00436
00438 om_termcount size() const;
00439
00441 bool empty() const;
00442
00444 OmESetIterator begin() const;
00445
00447 OmESetIterator end() const;
00448
00453 std::string get_description() const;
00454 };
00455
00460 class OmRSet {
00461 public:
00463 class Internal;
00465 Internal *internal;
00466
00468 OmRSet(const OmRSet &rset);
00469
00471 void operator=(const OmRSet &rset);
00472
00474 OmRSet();
00475
00477 ~OmRSet();
00478
00480 om_doccount size() const;
00481
00483 bool empty() const;
00484
00486 void add_document(om_docid did);
00487
00489 void add_document(const OmMSetIterator & i) { add_document(*i); }
00490
00492 void remove_document(om_docid did);
00493
00495 void remove_document(const OmMSetIterator & i) { remove_document(*i); }
00496
00498 bool contains(om_docid did) const;
00499
00501 bool contains(const OmMSetIterator & i) { return contains(*i); }
00502
00507 std::string get_description() const;
00508 };
00509
00512 class OmMatchDecider {
00513 public:
00516 virtual int operator()(const OmDocument &doc) const = 0;
00517
00519 virtual ~OmMatchDecider() {}
00520 };
00521
00524 class OmExpandDecider {
00525 public:
00528 virtual int operator()(const om_termname & tname) const = 0;
00529
00531 virtual ~OmExpandDecider() {}
00532 };
00533
00547 class OmEnquire {
00548 private:
00550 OmEnquire(const OmEnquire &);
00551
00553 void operator=(const OmEnquire &);
00554
00555 public:
00556 class Internal;
00558 Internal *internal;
00559
00575 OmEnquire(const OmDatabase &databases,
00576 OmErrorHandler * errorhandler_ = 0);
00577
00587 ~OmEnquire();
00588
00596 void set_query(const OmQuery & query_);
00597
00604 const OmQuery & get_query();
00605
00612 void set_weighting_scheme(const OmWeight &weight_);
00613
00620 void set_collapse_key(om_valueno collapse_key);
00621
00628 void set_sort_forward(bool sort_forward);
00629
00647 void set_cutoff(int percent_cutoff, om_weight weight_cutoff = 0);
00648
00662 void set_sorting(om_valueno sort_key, int sort_bands);
00663
00675 void set_bias(om_weight bias_weight, time_t bias_halflife);
00676
00697 OmMSet get_mset(om_doccount first,
00698 om_doccount maxitems,
00699 const OmRSet * omrset = 0,
00700 const OmMatchDecider * mdecider = 0) const;
00701
00702 static const int include_query_terms = 1;
00703 static const int use_exact_termfreq = 2;
00727 OmESet get_eset(om_termcount maxitems,
00728 const OmRSet & omrset,
00729 int flags = 0,
00730 double k = 1.0,
00731 const OmExpandDecider * edecider = 0) const;
00732
00747 inline OmESet get_eset(om_termcount maxitems, const OmRSet & omrset,
00748 const OmExpandDecider * edecider) const {
00749 return get_eset(maxitems, omrset, 0, 1.0, edecider);
00750 }
00751
00781 OmTermIterator get_matching_terms_begin(om_docid did) const;
00782
00784 OmTermIterator get_matching_terms_end(om_docid did) const;
00785
00809 OmTermIterator get_matching_terms_begin(const OmMSetIterator &it) const;
00810
00812 OmTermIterator get_matching_terms_end(const OmMSetIterator &it) const;
00813
00816 void register_match_decider(const std::string &name,
00817 const OmMatchDecider *mdecider = NULL);
00818
00822 std::string get_description() const;
00823 };
00824
00825 class SocketServer;
00826
00828 class OmWeight {
00829 friend class OmEnquire;
00830 friend class SocketServer;
00831 public:
00832 class Internal;
00833 private:
00834 OmWeight(const OmWeight &);
00835 void operator=(OmWeight &);
00836
00838
00839
00840
00841
00842
00843 virtual OmWeight * clone() const = 0;
00844
00845 protected:
00846 const Internal * internal;
00847 om_doclength querysize;
00848 om_termcount wqf;
00849 om_termname tname;
00850
00851 public:
00852 OmWeight() { }
00853 virtual ~OmWeight() { }
00854
00866 OmWeight * create(const Internal * internal_, om_doclength querysize_,
00867 om_termcount wqf_, om_termname tname_) const {
00868 OmWeight * wt = clone();
00869 wt->internal = internal_;
00870 wt->querysize = querysize_;
00871 wt->wqf = wqf_;
00872 wt->tname = tname_;
00873 return wt;
00874 }
00875
00877
00878
00879 virtual std::string name() const = 0;
00880
00882 virtual std::string serialise() const = 0;
00883
00885 virtual OmWeight * OmWeight::unserialise(const std::string &s) const = 0;
00886
00894 virtual om_weight get_sumpart(om_termcount wdf,
00895 om_doclength len) const = 0;
00896
00902 virtual om_weight get_maxpart() const = 0;
00903
00912 virtual om_weight get_sumextra(om_doclength len) const = 0;
00913
00917 virtual om_weight get_maxextra() const = 0;
00918
00920 virtual bool get_sumpart_needs_doclength() const { return true; }
00921 };
00922
00924 class BoolWeight : public OmWeight {
00925 public:
00926 OmWeight * clone() const {
00927 return new BoolWeight;
00928 }
00929 BoolWeight() { }
00930 ~BoolWeight() { }
00931 std::string name() const { return "Bool"; }
00932 std::string serialise() const { return ""; }
00933 OmWeight * unserialise(const std::string & ) const {
00934 return new BoolWeight;
00935 }
00936 om_weight get_sumpart(om_termcount , om_doclength ) const { return 0; }
00937 om_weight get_maxpart() const { return 0; }
00938
00939 om_weight get_sumextra(om_doclength ) const { return 0; }
00940 om_weight get_maxextra() const { return 0; }
00941
00942 bool get_sumpart_needs_doclength() const { return false; }
00943 };
00944
00946
00947
00948
00949
00950
00951
00952
00953
00954
00955
00956 class BM25Weight : public OmWeight {
00957 private:
00958 mutable om_weight termweight;
00959 mutable om_doclength lenpart;
00960 mutable double BD;
00961
00962 double A, B, C, D;
00963 om_doclength min_normlen;
00964
00965 mutable bool weight_calculated;
00966
00967 void calc_termweight() const;
00968
00969 public:
00988 BM25Weight(double A_, double B_, double C_, double D_,
00989 double min_normlen_)
00990 : A(A_), B(B_), C(C_), D(D_), min_normlen(min_normlen_),
00991 weight_calculated(false)
00992 {
00993 if (A < 0) A = 0;
00994 if (B < 0) B = 0;
00995 if (C < 0) C = 0;
00996 if (D < 0) D = 0; else if (D > 1) D = 1;
00997 }
00998 BM25Weight() : A(1), B(1), C(0), D(0.5), min_normlen(0.5),
00999 weight_calculated(false) { }
01000
01001 OmWeight * clone() const {
01002 return new BM25Weight(A, B, C, D, min_normlen);
01003 }
01004 ~BM25Weight() { }
01005 std::string name() const { return "BM25"; }
01006 std::string serialise() const;
01007 OmWeight * unserialise(const std::string & s) const;
01008 om_weight get_sumpart(om_termcount wdf, om_doclength len) const;
01009 om_weight get_maxpart() const;
01010
01011 om_weight get_sumextra(om_doclength len) const;
01012 om_weight get_maxextra() const;
01013
01014 bool get_sumpart_needs_doclength() const { return (lenpart != 0); }
01015 };
01016
01018
01019
01020
01021
01022
01023
01024
01025
01026
01027
01028 class TradWeight : public OmWeight {
01029 private:
01030 mutable om_weight termweight;
01031 mutable om_doclength lenpart;
01032
01033 double param_k;
01034
01035 mutable bool weight_calculated;
01036
01037 void calc_termweight() const;
01038
01039 public:
01041
01042
01043
01044
01045
01046 TradWeight(double k = 1) : param_k(k), weight_calculated(false) {
01047 if (param_k < 0) param_k = 0;
01048 }
01049 OmWeight * clone() const {
01050 return new TradWeight(param_k);
01051 }
01052 ~TradWeight() { }
01053 std::string name() const { return "Trad"; }
01054 std::string serialise() const;
01055 OmWeight * unserialise(const std::string & s) const;
01056
01057 om_weight get_sumpart(om_termcount wdf, om_doclength len) const;
01058 om_weight get_maxpart() const;
01059
01060 om_weight get_sumextra(om_doclength len) const;
01061 om_weight get_maxextra() const;
01062
01063 bool get_sumpart_needs_doclength() const { return (lenpart != 0); }
01064 };
01065
01066 #endif