Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

include/xapian/enquire.h

Go to the documentation of this file.
00001 00004 /* ----START-LICENCE---- 00005 * Copyright 1999,2000,2001 BrightStation PLC 00006 * Copyright 2001,2002 Ananova Ltd 00007 * Copyright 2002,2003,2004,2005 Olly Betts 00008 * 00009 * This program is free software; you can redistribute it and/or 00010 * modify it under the terms of the GNU General Public License as 00011 * published by the Free Software Foundation; either version 2 of the 00012 * License, or (at your option) any later version. 00013 * 00014 * This program is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 * GNU General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU General Public License 00020 * along with this program; if not, write to the Free Software 00021 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00022 * USA 00023 * -----END-LICENCE----- 00024 */ 00025 00026 #ifndef XAPIAN_INCLUDED_ENQUIRE_H 00027 #define XAPIAN_INCLUDED_ENQUIRE_H 00028 00029 #include <string> 00030 #include <time.h> // for time_t 00031 00032 #include <xapian/base.h> 00033 #include <xapian/error.h> 00034 #include <xapian/types.h> 00035 00036 namespace Xapian { 00037 00038 class Database; 00039 class Document; 00040 class ErrorHandler; 00041 class MSetIterator; 00042 class Query; 00043 class TermIterator; 00044 class Weight; 00045 00049 class MSet { 00050 public: 00051 class Internal; 00053 Xapian::Internal::RefCntPtr<Internal> internal; 00054 00056 explicit MSet(MSet::Internal * internal_); 00057 00059 MSet(); 00060 00062 ~MSet(); 00063 00065 MSet(const MSet & other); 00066 00068 void operator=(const MSet &other); 00069 00085 void fetch(const MSetIterator &begin, const MSetIterator &end) const; 00086 00089 void fetch(const MSetIterator &item) const; 00090 00093 void fetch() const; 00094 00099 Xapian::percent convert_to_percent(Xapian::weight wt) const; 00100 00102 Xapian::percent convert_to_percent(const MSetIterator &it) const; 00103 00111 Xapian::doccount get_termfreq(const std::string &tname) const; 00112 00120 Xapian::weight get_termweight(const std::string &tname) const; 00121 00129 Xapian::doccount get_firstitem() const; 00130 00140 Xapian::doccount get_matches_lower_bound() const; 00141 00154 Xapian::doccount get_matches_estimated() const; 00155 00165 Xapian::doccount get_matches_upper_bound() const; 00166 00172 Xapian::weight get_max_possible() const; 00173 00187 Xapian::weight get_max_attained() const; 00188 00190 Xapian::doccount size() const; 00191 00193 Xapian::doccount max_size() const { return size(); } 00194 00196 bool empty() const; 00197 00199 void swap(MSet & other); 00200 00202 MSetIterator begin() const; 00203 00205 MSetIterator end() const; 00206 00208 MSetIterator back() const; 00209 00219 MSetIterator operator[](Xapian::doccount i) const; 00220 00222 00223 typedef MSetIterator value_type; // FIXME: not assignable... 00224 typedef MSetIterator iterator; 00225 typedef MSetIterator const_iterator; 00226 typedef MSetIterator & reference; // Hmm 00227 typedef MSetIterator & const_reference; 00228 typedef MSetIterator * pointer; // Hmm 00229 typedef Xapian::doccount_diff difference_type; 00230 typedef Xapian::doccount size_type; 00232 00236 std::string get_description() const; 00237 }; 00238 00242 class MSetIterator { 00243 private: 00244 friend class MSet; 00245 friend bool operator==(const MSetIterator &a, const MSetIterator &b); 00246 friend bool operator!=(const MSetIterator &a, const MSetIterator &b); 00247 00248 MSetIterator(Xapian::doccount index_, const MSet & mset_) 00249 : index(index_), mset(mset_) { } 00250 00251 Xapian::doccount index; 00252 MSet mset; 00253 00254 public: 00258 MSetIterator() : index(0), mset() { } 00259 00260 ~MSetIterator() { } 00261 00263 MSetIterator(const MSetIterator &other) { 00264 index = other.index; 00265 mset = other.mset; 00266 } 00267 00269 void operator=(const MSetIterator &other) { 00270 index = other.index; 00271 mset = other.mset; 00272 } 00273 00275 MSetIterator & operator++() { 00276 ++index; 00277 return *this; 00278 } 00279 00281 MSetIterator operator++(int) { 00282 MSetIterator tmp = *this; 00283 ++index; 00284 return tmp; 00285 } 00286 00288 MSetIterator & operator--() { 00289 --index; 00290 return *this; 00291 } 00292 00294 MSetIterator operator--(int) { 00295 MSetIterator tmp = *this; 00296 --index; 00297 return tmp; 00298 } 00299 00301 Xapian::docid operator*() const; 00302 00321 Xapian::Document get_document() const; 00322 00329 Xapian::doccount get_rank() const { 00330 return mset.get_firstitem() + index; 00331 } 00332 00334 Xapian::weight get_weight() const; 00335 00352 Xapian::doccount get_collapse_count() const; 00353 00359 Xapian::percent get_percent() const; 00360 00364 std::string get_description() const; 00365 00367 00368 typedef std::bidirectional_iterator_tag iterator_category; // FIXME: could enhance to be a randomaccess_iterator 00369 typedef Xapian::docid value_type; 00370 typedef Xapian::doccount_diff difference_type; 00371 typedef Xapian::docid * pointer; 00372 typedef Xapian::docid & reference; 00374 }; 00375 00376 inline bool operator==(const MSetIterator &a, const MSetIterator &b) 00377 { 00378 return (a.index == b.index); 00379 } 00380 00381 inline bool operator!=(const MSetIterator &a, const MSetIterator &b) 00382 { 00383 return (a.index != b.index); 00384 } 00385 00386 class ESetIterator; 00387 00392 class ESet { 00393 public: 00394 class Internal; 00396 Xapian::Internal::RefCntPtr<Internal> internal; 00397 00399 ESet(); 00400 00402 ~ESet(); 00403 00405 ESet(const ESet & other); 00406 00408 void operator=(const ESet &other); 00409 00414 Xapian::termcount get_ebound() const; 00415 00417 Xapian::termcount size() const; 00418 00420 Xapian::termcount max_size() const { return size(); } 00421 00423 bool empty() const; 00424 00426 void swap(ESet & other); 00427 00429 ESetIterator begin() const; 00430 00432 ESetIterator end() const; 00433 00435 ESetIterator back() const; 00436 00438 ESetIterator operator[](Xapian::doccount i) const; 00439 00444 std::string get_description() const; 00445 }; 00446 00448 class ESetIterator { 00449 private: 00450 friend class ESet; 00451 friend bool operator==(const ESetIterator &a, const ESetIterator &b); 00452 friend bool operator!=(const ESetIterator &a, const ESetIterator &b); 00453 00454 ESetIterator(Xapian::termcount index_, const ESet & eset_) 00455 : index(index_), eset(eset_) { } 00456 00457 Xapian::termcount index; 00458 ESet eset; 00459 00460 public: 00464 ESetIterator() : index(0), eset() { } 00465 00466 ~ESetIterator() { } 00467 00469 ESetIterator(const ESetIterator &other) { 00470 index = other.index; 00471 eset = other.eset; 00472 } 00473 00475 void operator=(const ESetIterator &other) { 00476 index = other.index; 00477 eset = other.eset; 00478 } 00479 00481 ESetIterator & operator++() { 00482 ++index; 00483 return *this; 00484 } 00485 00487 ESetIterator operator++(int) { 00488 ESetIterator tmp = *this; 00489 ++index; 00490 return tmp; 00491 } 00492 00494 ESetIterator & operator--() { 00495 --index; 00496 return *this; 00497 } 00498 00500 ESetIterator operator--(int) { 00501 ESetIterator tmp = *this; 00502 --index; 00503 return tmp; 00504 } 00505 00507 const std::string & operator *() const; 00508 00510 Xapian::weight get_weight() const; 00511 00515 std::string get_description() const; 00516 00518 00519 typedef std::bidirectional_iterator_tag iterator_category; // FIXME: go for randomaccess_iterator! 00520 typedef std::string value_type; 00521 typedef Xapian::termcount_diff difference_type; 00522 typedef std::string * pointer; 00523 typedef std::string & reference; 00525 }; 00526 00527 inline bool operator==(const ESetIterator &a, const ESetIterator &b) 00528 { 00529 return (a.index == b.index); 00530 } 00531 00532 inline bool operator!=(const ESetIterator &a, const ESetIterator &b) 00533 { 00534 return (a.index != b.index); 00535 } 00536 00541 class RSet { 00542 public: 00544 class Internal; 00545 00547 Xapian::Internal::RefCntPtr<Internal> internal; 00548 00550 RSet(const RSet &rset); 00551 00553 void operator=(const RSet &rset); 00554 00556 RSet(); 00557 00559 ~RSet(); 00560 00562 Xapian::doccount size() const; 00563 00565 bool empty() const; 00566 00568 void add_document(Xapian::docid did); 00569 00571 void add_document(const Xapian::MSetIterator & i) { add_document(*i); } 00572 00574 void remove_document(Xapian::docid did); 00575 00577 void remove_document(const Xapian::MSetIterator & i) { remove_document(*i); } 00578 00580 bool contains(Xapian::docid did) const; 00581 00583 bool contains(const Xapian::MSetIterator & i) { return contains(*i); } 00584 00589 std::string get_description() const; 00590 }; 00591 00594 class MatchDecider { 00595 public: 00598 virtual int operator()(const Xapian::Document &doc) const = 0; 00599 00601 virtual ~MatchDecider() {} 00602 }; 00603 00606 class ExpandDecider { 00607 public: 00610 virtual int operator()(const std::string & tname) const = 0; 00611 00613 virtual ~ExpandDecider() {} 00614 }; 00615 00626 class Enquire { 00627 private: 00629 Enquire(const Enquire &); 00630 00632 void operator=(const Enquire &); 00633 00634 public: 00635 class Internal; 00637 Xapian::Internal::RefCntPtr<Internal> internal; 00638 00654 Enquire(const Database &databases, ErrorHandler * errorhandler_ = 0); 00655 00658 ~Enquire(); 00659 00666 void set_query(const Xapian::Query & query, Xapian::termcount qlen = 0); 00667 00674 const Xapian::Query & get_query(); 00675 00682 void set_weighting_scheme(const Weight &weight_); 00683 00710 void set_collapse_key(Xapian::valueno collapse_key); 00711 00712 typedef enum { 00713 ASCENDING = 1, 00714 DESCENDING = 0, 00715 DONT_CARE = 2 00716 } docid_order; 00717 00740 void set_docid_order(docid_order order); 00741 00743 void set_sort_forward(bool sort_forward) { 00744 set_docid_order(sort_forward ? ASCENDING : DESCENDING); 00745 } 00746 00765 void set_cutoff(Xapian::percent percent_cutoff, Xapian::weight weight_cutoff = 0); 00766 00768 void set_sorting(Xapian::valueno sort_key, int sort_bands, 00769 bool sort_by_relevance = false) { 00770 if (sort_bands > 1) { 00771 throw Xapian::UnimplementedError("sort bands are no longer supported"); 00772 } 00773 if (sort_bands == 0 || sort_key == Xapian::valueno(-1)) { 00774 set_sort_by_relevance(); 00775 } else if (!sort_by_relevance) { 00776 set_sort_by_value(sort_key); 00777 } else { 00778 set_sort_by_value_then_relevance(sort_key); 00779 } 00780 } 00781 00784 void set_sort_by_relevance(); 00785 00796 void set_sort_by_value(Xapian::valueno sort_key, bool ascending = true); 00797 void set_sort_by_value_then_relevance(Xapian::valueno sort_key, 00798 bool ascending = true); 00799 // FIXME: consider implementing this: 00800 // void set_sort_by_relevance_then_value(Xapian::valueno sort_key, 00801 // bool ascending); 00802 00814 void set_bias(Xapian::weight bias_weight, time_t bias_halflife); 00815 00841 MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, 00842 Xapian::doccount checkatleast = 0, 00843 const RSet * omrset = 0, 00844 const MatchDecider * mdecider = 0) const; 00845 MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems, 00846 const RSet * omrset, 00847 const MatchDecider * mdecider = 0) const { 00848 return get_mset(first, maxitems, 0, omrset, mdecider); 00849 } 00850 00851 static const int include_query_terms = 1; 00852 static const int use_exact_termfreq = 2; 00875 ESet get_eset(Xapian::termcount maxitems, 00876 const RSet & omrset, 00877 int flags = 0, 00878 double k = 1.0, 00879 const Xapian::ExpandDecider * edecider = 0) const; 00880 00894 inline ESet get_eset(Xapian::termcount maxitems, const RSet & omrset, 00895 const Xapian::ExpandDecider * edecider) const { 00896 return get_eset(maxitems, omrset, 0, 1.0, edecider); 00897 } 00898 00927 TermIterator get_matching_terms_begin(Xapian::docid did) const; 00928 00930 TermIterator get_matching_terms_end(Xapian::docid did) const; 00931 00954 TermIterator get_matching_terms_begin(const MSetIterator &it) const; 00955 00957 TermIterator get_matching_terms_end(const MSetIterator &it) const; 00958 00965 void register_match_decider(const std::string &name, 00966 const MatchDecider *mdecider = NULL); 00967 00971 std::string get_description() const; 00972 }; 00973 00974 } 00975 00976 class SocketServer; 00977 00978 namespace Xapian { 00979 00981 class Weight { 00982 friend class Enquire; // So Enquire can clone us 00983 friend class ::SocketServer; // So SocketServer can clone us - FIXME 00984 public: 00985 class Internal; 00986 protected: 00987 Weight(const Weight &); 00988 private: 00989 void operator=(Weight &); 00990 01000 virtual Weight * clone() const = 0; 01001 01002 protected: 01003 const Internal * internal; // Weight::Internal == StatsSource 01004 Xapian::doclength querysize; 01005 Xapian::termcount wqf; 01006 std::string tname; 01007 01008 public: 01009 Weight() { } 01010 virtual ~Weight() { } 01011 01024 Weight * create(const Internal * internal_, Xapian::doclength querysize_, 01025 Xapian::termcount wqf_, std::string tname_) const { 01026 Weight * wt = clone(); 01027 wt->internal = internal_; 01028 wt->querysize = querysize_; 01029 wt->wqf = wqf_; 01030 wt->tname = tname_; 01031 return wt; 01032 } 01033 01038 virtual std::string name() const = 0; 01039 01041 virtual std::string serialise() const = 0; 01042 01044 virtual Weight * unserialise(const std::string &s) const = 0; 01045 01053 virtual Xapian::weight get_sumpart(Xapian::termcount wdf, 01054 Xapian::doclength len) const = 0; 01055 01061 virtual Xapian::weight get_maxpart() const = 0; 01062 01071 virtual Xapian::weight get_sumextra(Xapian::doclength len) const = 0; 01072 01076 virtual Xapian::weight get_maxextra() const = 0; 01077 01079 virtual bool get_sumpart_needs_doclength() const { return true; } 01080 }; 01081 01083 class BoolWeight : public Weight { 01084 public: 01085 BoolWeight * clone() const { 01086 return new BoolWeight; 01087 } 01088 BoolWeight() { } 01089 ~BoolWeight() { } 01090 std::string name() const { return "Bool"; } 01091 std::string serialise() const { return ""; } 01092 BoolWeight * unserialise(const std::string & /*s*/) const { 01093 return new BoolWeight; 01094 } 01095 Xapian::weight get_sumpart(Xapian::termcount /*wdf*/, Xapian::doclength /*len*/) const { return 0; } 01096 Xapian::weight get_maxpart() const { return 0; } 01097 01098 Xapian::weight get_sumextra(Xapian::doclength /*len*/) const { return 0; } 01099 Xapian::weight get_maxextra() const { return 0; } 01100 01101 bool get_sumpart_needs_doclength() const { return false; } 01102 }; 01103 01116 class BM25Weight : public Weight { 01117 private: 01118 mutable Xapian::weight termweight; 01119 mutable Xapian::doclength lenpart; 01120 01121 double k1, k2, k3, b; 01122 Xapian::doclength min_normlen; 01123 01124 mutable bool weight_calculated; 01125 01126 void calc_termweight() const; 01127 01128 public: 01147 BM25Weight(double k1_, double k2_, double k3_, double b_, 01148 double min_normlen_) 01149 : k1(k1_), k2(k2_), k3(k3_), b(b_), min_normlen(min_normlen_), 01150 weight_calculated(false) 01151 { 01152 if (k1 < 0) k1 = 0; 01153 if (k2 < 0) k2 = 0; 01154 if (k3 < 0) k3 = 0; 01155 if (b < 0) b = 0; else if (b > 1) b = 1; 01156 } 01157 BM25Weight() : k1(1), k2(0), k3(1), b(0.5), min_normlen(0.5), 01158 weight_calculated(false) { } 01159 01160 BM25Weight * clone() const; 01161 ~BM25Weight() { } 01162 std::string name() const; 01163 std::string serialise() const; 01164 BM25Weight * unserialise(const std::string & s) const; 01165 Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const; 01166 Xapian::weight get_maxpart() const; 01167 01168 Xapian::weight get_sumextra(Xapian::doclength len) const; 01169 Xapian::weight get_maxextra() const; 01170 01171 bool get_sumpart_needs_doclength() const; 01172 }; 01173 01187 class TradWeight : public Weight { 01188 private: 01189 mutable Xapian::weight termweight; 01190 mutable Xapian::doclength lenpart; 01191 01192 double param_k; 01193 01194 mutable bool weight_calculated; 01195 01196 void calc_termweight() const; 01197 01198 public: 01206 explicit TradWeight(double k) : param_k(k), weight_calculated(false) { 01207 if (param_k < 0) param_k = 0; 01208 } 01209 01210 TradWeight() : param_k(1.0), weight_calculated(false) { } 01211 01212 TradWeight * clone() const; 01213 ~TradWeight() { } 01214 std::string name() const; 01215 std::string serialise() const; 01216 TradWeight * unserialise(const std::string & s) const; 01217 01218 Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const; 01219 Xapian::weight get_maxpart() const; 01220 01221 Xapian::weight get_sumextra(Xapian::doclength len) const; 01222 Xapian::weight get_maxextra() const; 01223 01224 bool get_sumpart_needs_doclength() const; 01225 }; 01226 01227 } 01228 01229 #endif /* XAPIAN_INCLUDED_ENQUIRE_H */

Documentation for Xapian (version 0.9.0).
Generated on 13 May 2005 by Doxygen 1.3.8.