00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef XAPIAN_INCLUDED_WEIGHT_H
00022 #define XAPIAN_INCLUDED_WEIGHT_H
00023
00024 #include <string>
00025
00026 #include <xapian/types.h>
00027 #include <xapian/visibility.h>
00028
00029 namespace Xapian {
00030
00032 class XAPIAN_VISIBILITY_DEFAULT Weight {
00033 protected:
00035 typedef enum {
00036 COLLECTION_SIZE = 1,
00037 RSET_SIZE = 2,
00038 AVERAGE_LENGTH = 4,
00039 TERMFREQ = 8,
00040 RELTERMFREQ = 16,
00041 QUERY_LENGTH = 32,
00042 WQF = 64,
00043 WDF = 128,
00044 DOC_LENGTH = 256,
00045 DOC_LENGTH_MIN = 512,
00046 DOC_LENGTH_MAX = 1024,
00047 WDF_MAX = 2048
00048 } stat_flags;
00049
00059 void need_stat(stat_flags flag) {
00060 stats_needed = stat_flags(stats_needed | flag);
00061 }
00062
00067 virtual void init(double factor) = 0;
00068
00069 private:
00071 void operator=(const Weight &);
00072
00082 virtual Weight * clone() const = 0;
00083
00085 stat_flags stats_needed;
00086
00088 Xapian::doccount collection_size_;
00089
00091 Xapian::doccount rset_size_;
00092
00094 Xapian::doclength average_length_;
00095
00097 Xapian::doccount termfreq_;
00098
00100 Xapian::doccount reltermfreq_;
00101
00103 Xapian::termcount query_length_;
00104
00106 Xapian::termcount wqf_;
00107
00109 Xapian::termcount doclength_lower_bound_;
00110
00112 Xapian::termcount doclength_upper_bound_;
00113
00115 Xapian::termcount wdf_upper_bound_;
00116
00117 public:
00118 class Internal;
00119
00121 virtual ~Weight();
00122
00137 virtual std::string name() const = 0;
00138
00145 virtual std::string serialise() const = 0;
00146
00156 virtual Weight * unserialise(const std::string & s) const = 0;
00157
00166 virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
00167 Xapian::termcount doclen) const = 0;
00168
00174 virtual Xapian::weight get_maxpart() const = 0;
00175
00183 virtual Xapian::weight get_sumextra(Xapian::termcount doclen) const = 0;
00184
00191 virtual Xapian::weight get_maxextra() const = 0;
00192
00200 Weight * clone_() const { return clone(); }
00201
00211 void init_(const Internal & stats, Xapian::termcount query_len_,
00212 const std::string & term, Xapian::termcount wqf_,
00213 double factor);
00214
00221 void init_(const Internal & stats, Xapian::termcount query_len_);
00222
00229 bool get_sumpart_needs_doclength_() const {
00230 return stats_needed & DOC_LENGTH;
00231 }
00232
00233 protected:
00235 Weight(const Weight &);
00236
00238 Weight() : stats_needed() { }
00239
00241 Xapian::doccount get_collection_size() const { return collection_size_; }
00242
00244 Xapian::doccount get_rset_size() const { return rset_size_; }
00245
00247 Xapian::doclength get_average_length() const { return average_length_; }
00248
00250 Xapian::doccount get_termfreq() const { return termfreq_; }
00251
00253 Xapian::doccount get_reltermfreq() const { return reltermfreq_; }
00254
00256 Xapian::termcount get_query_length() const { return query_length_; }
00257
00259 Xapian::termcount get_wqf() const { return wqf_; }
00260
00265 Xapian::termcount get_doclength_upper_bound() const {
00266 return doclength_upper_bound_;
00267 }
00268
00273 Xapian::termcount get_doclength_lower_bound() const {
00274 return doclength_lower_bound_;
00275 }
00276
00281 Xapian::termcount get_wdf_upper_bound() const {
00282 return wdf_upper_bound_;
00283 }
00284 };
00285
00290 class XAPIAN_VISIBILITY_DEFAULT BoolWeight : public Weight {
00291 BoolWeight * clone() const;
00292
00293 void init(double factor);
00294
00295 public:
00297 BoolWeight() { }
00298
00299 std::string name() const;
00300
00301 std::string serialise() const;
00302 BoolWeight * unserialise(const std::string & s) const;
00303
00304 Xapian::weight get_sumpart(Xapian::termcount wdf,
00305 Xapian::termcount doclen) const;
00306 Xapian::weight get_maxpart() const;
00307
00308 Xapian::weight get_sumextra(Xapian::termcount doclen) const;
00309 Xapian::weight get_maxextra() const;
00310 };
00311
00313 class XAPIAN_VISIBILITY_DEFAULT BM25Weight : public Weight {
00315 mutable Xapian::doclength len_factor;
00316
00318 mutable Xapian::weight termweight;
00319
00321 double param_k1, param_k2, param_k3, param_b;
00322
00324 Xapian::doclength param_min_normlen;
00325
00326 BM25Weight * clone() const;
00327
00328 void init(double factor);
00329
00330 public:
00358 BM25Weight(double k1, double k2, double k3, double b, double min_normlen)
00359 : param_k1(k1), param_k2(k2), param_k3(k3), param_b(b),
00360 param_min_normlen(min_normlen)
00361 {
00362 if (param_k1 < 0) param_k1 = 0;
00363 if (param_k2 < 0) param_k2 = 0;
00364 if (param_k3 < 0) param_k3 = 0;
00365 if (param_b < 0) {
00366 param_b = 0;
00367 } else if (param_b > 1) {
00368 param_b = 1;
00369 }
00370 need_stat(COLLECTION_SIZE);
00371 need_stat(RSET_SIZE);
00372 need_stat(TERMFREQ);
00373 need_stat(RELTERMFREQ);
00374 need_stat(WDF_MAX);
00375 if (param_k2 != 0 || (param_k1 != 0 && param_b != 0)) {
00376 need_stat(DOC_LENGTH_MIN);
00377 need_stat(AVERAGE_LENGTH);
00378 }
00379 if (param_k1 != 0 && param_b != 0) need_stat(DOC_LENGTH);
00380 if (param_k2 != 0) need_stat(QUERY_LENGTH);
00381 if (param_k3 != 0) need_stat(WQF);
00382 }
00383
00384 BM25Weight()
00385 : param_k1(1), param_k2(0), param_k3(1), param_b(0.5),
00386 param_min_normlen(0.5)
00387 {
00388 need_stat(COLLECTION_SIZE);
00389 need_stat(RSET_SIZE);
00390 need_stat(TERMFREQ);
00391 need_stat(RELTERMFREQ);
00392 need_stat(WDF_MAX);
00393 need_stat(DOC_LENGTH_MIN);
00394 need_stat(AVERAGE_LENGTH);
00395 need_stat(DOC_LENGTH);
00396 need_stat(WQF);
00397 }
00398
00399 std::string name() const;
00400
00401 std::string serialise() const;
00402 BM25Weight * unserialise(const std::string & s) const;
00403
00404 Xapian::weight get_sumpart(Xapian::termcount wdf,
00405 Xapian::termcount doclen) const;
00406 Xapian::weight get_maxpart() const;
00407
00408 Xapian::weight get_sumextra(Xapian::termcount doclen) const;
00409 Xapian::weight get_maxextra() const;
00410 };
00411
00421 class XAPIAN_VISIBILITY_DEFAULT TradWeight : public Weight {
00423 mutable Xapian::doclength len_factor;
00424
00426 mutable Xapian::weight termweight;
00427
00429 double param_k;
00430
00431 TradWeight * clone() const;
00432
00433 void init(double factor);
00434
00435 public:
00443 explicit TradWeight(double k = 1.0) : param_k(k) {
00444 if (param_k < 0) param_k = 0;
00445 if (param_k != 0.0) {
00446 need_stat(AVERAGE_LENGTH);
00447 need_stat(DOC_LENGTH);
00448 }
00449 need_stat(COLLECTION_SIZE);
00450 need_stat(RSET_SIZE);
00451 need_stat(TERMFREQ);
00452 need_stat(RELTERMFREQ);
00453 need_stat(DOC_LENGTH_MIN);
00454 need_stat(WDF_MAX);
00455 }
00456
00457 std::string name() const;
00458
00459 std::string serialise() const;
00460 TradWeight * unserialise(const std::string & s) const;
00461
00462 Xapian::weight get_sumpart(Xapian::termcount wdf,
00463 Xapian::termcount doclen) const;
00464 Xapian::weight get_maxpart() const;
00465
00466 Xapian::weight get_sumextra(Xapian::termcount doclen) const;
00467 Xapian::weight get_maxextra() const;
00468 };
00469
00470 }
00471
00472 #endif // XAPIAN_INCLUDED_WEIGHT_H