include/xapian/weight.h

Go to the documentation of this file.
00001 
00004 /* Copyright (C) 2007,2008,2009 Olly Betts
00005  *
00006  * This program is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU General Public License as
00008  * published by the Free Software Foundation; either version 2 of the
00009  * License, or (at your option) any later version.
00010  *
00011  * This program is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License
00017  * along with this program; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
00019  */
00020 
00021 #ifndef XAPIAN_INCLUDED_WEIGHT_H
00022 #define XAPIAN_INCLUDED_WEIGHT_H
00023 
00024 #include <string>
00025 
00026 #include <xapian/types.h>
00027 #include <xapian/visibility.h>
00028 
00029 namespace Xapian {
00030 
00032 class XAPIAN_VISIBILITY_DEFAULT Weight {
00033   protected:
00035     typedef enum {
00036         COLLECTION_SIZE = 1,
00037         RSET_SIZE = 2,
00038         AVERAGE_LENGTH = 4,
00039         TERMFREQ = 8,
00040         RELTERMFREQ = 16,
00041         QUERY_LENGTH = 32,
00042         WQF = 64,
00043         WDF = 128,
00044         DOC_LENGTH = 256,
00045         DOC_LENGTH_MIN = 512,
00046         DOC_LENGTH_MAX = 1024,
00047         WDF_MAX = 2048
00048     } stat_flags;
00049 
00059     void need_stat(stat_flags flag) {
00060         stats_needed = stat_flags(stats_needed | flag);
00061     }
00062 
00067     virtual void init(double factor) = 0;
00068 
00069   private:
00071     void operator=(const Weight &);
00072 
00082     virtual Weight * clone() const = 0;
00083 
00085     stat_flags stats_needed;
00086 
00088     Xapian::doccount collection_size_;
00089 
00091     Xapian::doccount rset_size_;
00092 
00094     Xapian::doclength average_length_;
00095 
00097     Xapian::doccount termfreq_;
00098 
00100     Xapian::doccount reltermfreq_;
00101 
00103     Xapian::termcount query_length_;
00104 
00106     Xapian::termcount wqf_;
00107 
00109     Xapian::termcount doclength_lower_bound_;
00110 
00112     Xapian::termcount doclength_upper_bound_;
00113 
00115     Xapian::termcount wdf_upper_bound_;
00116 
00117   public:
00118     class Internal;
00119 
00121     virtual ~Weight();
00122 
00137     virtual std::string name() const = 0;
00138 
00145     virtual std::string serialise() const = 0;
00146 
00156     virtual Weight * unserialise(const std::string & s) const = 0;
00157 
00166     virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
00167                                        Xapian::termcount doclen) const = 0;
00168 
00174     virtual Xapian::weight get_maxpart() const = 0;
00175 
00183     virtual Xapian::weight get_sumextra(Xapian::termcount doclen) const = 0;
00184 
00191     virtual Xapian::weight get_maxextra() const = 0;
00192 
00200     Weight * clone_() const { return clone(); }
00201 
00211     void init_(const Internal & stats, Xapian::termcount query_len_,
00212                const std::string & term, Xapian::termcount wqf_,
00213                double factor);
00214 
00221     void init_(const Internal & stats, Xapian::termcount query_len_);
00222 
00229     bool get_sumpart_needs_doclength_() const {
00230         return stats_needed & DOC_LENGTH;
00231     }
00232 
00233   protected:
00235     Weight(const Weight &);
00236 
00238     Weight() : stats_needed() { }
00239 
00241     Xapian::doccount get_collection_size() const { return collection_size_; }
00242 
00244     Xapian::doccount get_rset_size() const { return rset_size_; }
00245 
00247     Xapian::doclength get_average_length() const { return average_length_; }
00248 
00250     Xapian::doccount get_termfreq() const { return termfreq_; }
00251 
00253     Xapian::doccount get_reltermfreq() const { return reltermfreq_; }
00254 
00256     Xapian::termcount get_query_length() const { return query_length_; }
00257 
00259     Xapian::termcount get_wqf() const { return wqf_; }
00260 
00265     Xapian::termcount get_doclength_upper_bound() const {
00266         return doclength_upper_bound_;
00267     }
00268 
00273     Xapian::termcount get_doclength_lower_bound() const {
00274         return doclength_lower_bound_;
00275     }
00276 
00281     Xapian::termcount get_wdf_upper_bound() const {
00282         return wdf_upper_bound_;
00283     }
00284 };
00285 
00290 class XAPIAN_VISIBILITY_DEFAULT BoolWeight : public Weight {
00291     BoolWeight * clone() const;
00292 
00293     void init(double factor);
00294 
00295   public:
00297     BoolWeight() { }
00298 
00299     std::string name() const;
00300 
00301     std::string serialise() const;
00302     BoolWeight * unserialise(const std::string & s) const;
00303 
00304     Xapian::weight get_sumpart(Xapian::termcount wdf,
00305                                Xapian::termcount doclen) const;
00306     Xapian::weight get_maxpart() const;
00307 
00308     Xapian::weight get_sumextra(Xapian::termcount doclen) const;
00309     Xapian::weight get_maxextra() const;
00310 };
00311 
00313 class XAPIAN_VISIBILITY_DEFAULT BM25Weight : public Weight {
00315     mutable Xapian::doclength len_factor;
00316 
00318     mutable Xapian::weight termweight;
00319 
00321     double param_k1, param_k2, param_k3, param_b;
00322 
00324     Xapian::doclength param_min_normlen;
00325 
00326     BM25Weight * clone() const;
00327 
00328     void init(double factor);
00329 
00330   public:
00358     BM25Weight(double k1, double k2, double k3, double b, double min_normlen)
00359         : param_k1(k1), param_k2(k2), param_k3(k3), param_b(b),
00360           param_min_normlen(min_normlen)
00361     {
00362         if (param_k1 < 0) param_k1 = 0;
00363         if (param_k2 < 0) param_k2 = 0;
00364         if (param_k3 < 0) param_k3 = 0;
00365         if (param_b < 0) {
00366             param_b = 0;
00367         } else if (param_b > 1) {
00368             param_b = 1;
00369         }
00370         need_stat(COLLECTION_SIZE);
00371         need_stat(RSET_SIZE);
00372         need_stat(TERMFREQ);
00373         need_stat(RELTERMFREQ);
00374         need_stat(WDF_MAX);
00375         if (param_k2 != 0 || (param_k1 != 0 && param_b != 0)) {
00376             need_stat(DOC_LENGTH_MIN);
00377             need_stat(AVERAGE_LENGTH);
00378         }
00379         if (param_k1 != 0 && param_b != 0) need_stat(DOC_LENGTH);
00380         if (param_k2 != 0) need_stat(QUERY_LENGTH);
00381         if (param_k3 != 0) need_stat(WQF);
00382     }
00383 
00384     BM25Weight()
00385         : param_k1(1), param_k2(0), param_k3(1), param_b(0.5),
00386           param_min_normlen(0.5)
00387     {
00388         need_stat(COLLECTION_SIZE);
00389         need_stat(RSET_SIZE);
00390         need_stat(TERMFREQ);
00391         need_stat(RELTERMFREQ);
00392         need_stat(WDF_MAX);
00393         need_stat(DOC_LENGTH_MIN);
00394         need_stat(AVERAGE_LENGTH);
00395         need_stat(DOC_LENGTH);
00396         need_stat(WQF);
00397     }
00398 
00399     std::string name() const;
00400 
00401     std::string serialise() const;
00402     BM25Weight * unserialise(const std::string & s) const;
00403 
00404     Xapian::weight get_sumpart(Xapian::termcount wdf,
00405                                Xapian::termcount doclen) const;
00406     Xapian::weight get_maxpart() const;
00407 
00408     Xapian::weight get_sumextra(Xapian::termcount doclen) const;
00409     Xapian::weight get_maxextra() const;
00410 };
00411 
00421 class XAPIAN_VISIBILITY_DEFAULT TradWeight : public Weight {
00423     mutable Xapian::doclength len_factor;
00424 
00426     mutable Xapian::weight termweight;
00427 
00429     double param_k;
00430 
00431     TradWeight * clone() const;
00432 
00433     void init(double factor);
00434 
00435   public:
00443     explicit TradWeight(double k = 1.0) : param_k(k) {
00444         if (param_k < 0) param_k = 0;
00445         if (param_k != 0.0) {
00446             need_stat(AVERAGE_LENGTH);
00447             need_stat(DOC_LENGTH);
00448         }
00449         need_stat(COLLECTION_SIZE);
00450         need_stat(RSET_SIZE);
00451         need_stat(TERMFREQ);
00452         need_stat(RELTERMFREQ);
00453         need_stat(DOC_LENGTH_MIN);
00454         need_stat(WDF_MAX);
00455     }
00456 
00457     std::string name() const;
00458 
00459     std::string serialise() const;
00460     TradWeight * unserialise(const std::string & s) const;
00461 
00462     Xapian::weight get_sumpart(Xapian::termcount wdf,
00463                                Xapian::termcount doclen) const;
00464     Xapian::weight get_maxpart() const;
00465 
00466     Xapian::weight get_sumextra(Xapian::termcount doclen) const;
00467     Xapian::weight get_maxextra() const;
00468 };
00469 
00470 }
00471 
00472 #endif // XAPIAN_INCLUDED_WEIGHT_H

Documentation for Xapian (version 1.1.0).
Generated on 22 Apr 2009 by Doxygen 1.5.2.