00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
#ifndef XAPIAN_INCLUDED_ENQUIRE_H
00027
#define XAPIAN_INCLUDED_ENQUIRE_H
00028
00029
#include <string>
00030
#include <time.h>
00031
00032
#include <xapian/base.h>
00033
#include <xapian/types.h>
00034
00035
namespace Xapian {
00036
00037
class Database;
00038
class Document;
00039
class ErrorHandler;
00040
class MSetIterator;
00041
class Query;
00042
class TermIterator;
00043
class Weight;
00044
00048 class MSet {
00049
public:
00050
class Internal;
00052 Xapian::Internal::RefCntPtr<Internal> internal;
00053
00054
public:
00055
00057
explicit MSet(MSet::Internal * internal_);
00058
00060
MSet();
00061
00063
~MSet();
00064
00066
MSet(
const MSet & other);
00067
00069
void operator=(
const MSet &other);
00070
00086
void fetch(
const MSetIterator &begin,
const MSetIterator &end)
const;
00087
00090
void fetch(
const MSetIterator &item)
const;
00091
00094
void fetch() const;
00095
00100 Xapian::
percent convert_to_percent(Xapian::
weight wt) const;
00101
00103 Xapian::
percent convert_to_percent(const
MSetIterator &it) const;
00104
00112 Xapian::
doccount get_termfreq(const std::string &tname) const;
00113
00121 Xapian::
weight get_termweight(const std::string &tname) const;
00122
00130 Xapian::
doccount get_firstitem() const;
00131
00141 Xapian::
doccount get_matches_lower_bound() const;
00142
00155 Xapian::
doccount get_matches_estimated() const;
00156
00166 Xapian::
doccount get_matches_upper_bound() const;
00167
00173 Xapian::
weight get_max_possible() const;
00174
00188 Xapian::
weight get_max_attained() const;
00189
00191 Xapian::
termcount size() const;
00192
00193 Xapian::
doccount max_size() const;
00194
00196
bool empty() const;
00197
00199
void swap(
MSet & other);
00200
00202
MSetIterator begin() const;
00203
00205
MSetIterator end() const;
00206
00208
MSetIterator back() const;
00209
00219
MSetIterator operator[](Xapian::
doccount i) const;
00220
00222
00223 typedef
MSetIterator value_type;
00224 typedef
MSetIterator iterator;
00225 typedef
MSetIterator const_iterator;
00226 typedef
MSetIterator &
reference;
00227 typedef
MSetIterator &
const_reference;
00228 typedef
MSetIterator *
pointer;
00229 typedef Xapian::
doccount_diff difference_type;
00230 typedef Xapian::
doccount size_type;
00232
00236 std::string get_description() const;
00237 };
00238
00242 class
MSetIterator {
00243
private:
00244
friend class MSet;
00245
friend bool operator==(
const MSetIterator &a,
const MSetIterator &b);
00246
friend bool operator!=(
const MSetIterator &a,
const MSetIterator &b);
00247
00248 MSetIterator(Xapian::doccount index_,
const MSet & mset_)
00249 : index(index_), mset(mset_) { }
00250
00251 Xapian::doccount index;
00252
MSet mset;
00253
00254
public:
00258 MSetIterator() : index(0), mset() { }
00259
00260 ~MSetIterator() { }
00261
00263 MSetIterator(
const MSetIterator &other) {
00264 index = other.
index;
00265 mset = other.
mset;
00266 }
00267
00269 void operator=(
const MSetIterator &other) {
00270 index = other.
index;
00271 mset = other.
mset;
00272 }
00273
00275 MSetIterator & operator++() {
00276 ++index;
00277
return *
this;
00278 }
00279
00281 MSetIterator operator++(
int) {
00282 MSetIterator tmp = *
this;
00283 ++index;
00284
return tmp;
00285 }
00286
00288 MSetIterator & operator--() {
00289 --index;
00290
return *
this;
00291 }
00292
00294 MSetIterator operator--(
int) {
00295 MSetIterator tmp = *
this;
00296 --index;
00297
return tmp;
00298 }
00299
00301 Xapian::docid operator*() const;
00302
00321 Xapian::
Document get_document() const;
00322
00329 Xapian::
doccount get_rank()
const {
00330
return mset.
get_firstitem() + index;
00331 }
00332
00334 Xapian::weight get_weight() const;
00335
00352 Xapian::
doccount get_collapse_count() const;
00353
00359 Xapian::
percent get_percent() const;
00360
00364 std::string get_description() const;
00365
00367
00368 typedef std::bidirectional_iterator_tag iterator_category;
00369 typedef Xapian::
docid value_type;
00370 typedef Xapian::
doccount_diff difference_type;
00371 typedef Xapian::
docid * pointer;
00372 typedef Xapian::
docid & reference;
00374 };
00375
00376 inline
bool operator==(const MSetIterator &a, const MSetIterator &b)
00377 {
00378
return (a.index == b.index);
00379 }
00380
00381
inline bool operator!=(
const MSetIterator &a,
const MSetIterator &b)
00382 {
00383
return (a.index != b.index);
00384 }
00385
00386
class ESetIterator;
00387
00392 class ESet {
00393
public:
00394
class Internal;
00396 Xapian::Internal::RefCntPtr<Internal> internal;
00397
00399
ESet();
00400
00402 ~
ESet();
00403
00405
ESet(
const ESet & other);
00406
00408
void operator=(
const ESet &other);
00409
00414 Xapian::termcount get_ebound() const;
00415
00417 Xapian::
termcount size() const;
00418
00420
bool empty() const;
00421
00423
void swap(
ESet & other);
00424
00426
ESetIterator begin() const;
00427
00429
ESetIterator end() const;
00430
00432
ESetIterator back() const;
00433
00435
ESetIterator operator[](Xapian::
doccount i) const;
00436
00441 std::string get_description() const;
00442 };
00443
00445 class
ESetIterator {
00446
private:
00447
friend class ESet;
00448
friend bool operator==(
const ESetIterator &a,
const ESetIterator &b);
00449
friend bool operator!=(
const ESetIterator &a,
const ESetIterator &b);
00450
00451 ESetIterator(Xapian::termcount index_,
const ESet & eset_)
00452 : index(index_), eset(eset_) { }
00453
00454 Xapian::termcount index;
00455
ESet eset;
00456
00457
public:
00461 ESetIterator() : index(0), eset() { }
00462
00463 ~ESetIterator() { }
00464
00466 ESetIterator(
const ESetIterator &other) {
00467 index = other.
index;
00468 eset = other.
eset;
00469 }
00470
00472 void operator=(
const ESetIterator &other) {
00473 index = other.
index;
00474 eset = other.
eset;
00475 }
00476
00478 ESetIterator & operator++() {
00479 ++index;
00480
return *
this;
00481 }
00482
00484 ESetIterator operator++(
int) {
00485 ESetIterator tmp = *
this;
00486 ++index;
00487
return tmp;
00488 }
00489
00491 ESetIterator & operator--() {
00492 --index;
00493
return *
this;
00494 }
00495
00497 ESetIterator operator--(
int) {
00498 ESetIterator tmp = *
this;
00499 --index;
00500
return tmp;
00501 }
00502
00504
const std::string & operator *() const;
00505
00507 Xapian::
weight get_weight() const;
00508
00512 std::string get_description() const;
00513
00515
00516 typedef std::bidirectional_iterator_tag iterator_category;
00517 typedef std::string value_type;
00518 typedef Xapian::
termcount_diff difference_type;
00519 typedef std::string * pointer;
00520 typedef std::string & reference;
00522 };
00523
00524 inline
bool operator==(const ESetIterator &a, const ESetIterator &b)
00525 {
00526
return (a.index == b.index);
00527 }
00528
00529
inline bool operator!=(
const ESetIterator &a,
const ESetIterator &b)
00530 {
00531
return (a.index != b.index);
00532 }
00533
00538 class RSet {
00539
public:
00541
class Internal;
00542
00544 Internal *internal;
00545
00547
RSet(
const RSet &rset);
00548
00550
void operator=(
const RSet &rset);
00551
00553
RSet();
00554
00556 ~
RSet();
00557
00559 Xapian::doccount
size() const;
00560
00562
bool empty() const;
00563
00565
void add_document(Xapian::
docid did);
00566
00568 void add_document(const Xapian::MSetIterator & i) { add_document(*i); }
00569
00571
void remove_document(Xapian::docid did);
00572
00574 void remove_document(
const Xapian::MSetIterator & i) { remove_document(*i); }
00575
00577
bool contains(Xapian::docid did)
const;
00578
00580 bool contains(
const Xapian::MSetIterator & i) {
return contains(*i); }
00581
00586 std::string
get_description() const;
00587 };
00588
00591 class
MatchDecider {
00592
public:
00595
virtual int operator()(
const Xapian::Document &doc)
const = 0;
00596
00598 virtual ~MatchDecider() {}
00599 };
00600
00603 class ExpandDecider {
00604
public:
00607
virtual int operator()(
const std::string & tname)
const = 0;
00608
00610 virtual ~
ExpandDecider() {}
00611 };
00612
00623 class Enquire {
00624
private:
00626
Enquire(
const Enquire &);
00627
00629
void operator=(
const Enquire &);
00630
00631
public:
00632
class Internal;
00634 Xapian::Internal::RefCntPtr<Internal> internal;
00635
00651
Enquire(
const Database &databases,
ErrorHandler * errorhandler_ = 0);
00652
00655 ~
Enquire();
00656
00663
void set_query(
const Xapian::Query & query_);
00664
00671
const Xapian::Query & get_query();
00672
00679
void set_weighting_scheme(
const Weight &weight_);
00680
00707
void set_collapse_key(Xapian::valueno collapse_key);
00708
00715
void set_sort_forward(
bool sort_forward);
00716
00734
void set_cutoff(Xapian::percent percent_cutoff, Xapian::weight weight_cutoff = 0);
00735
00752
void set_sorting(Xapian::valueno sort_key,
int sort_bands,
00753
bool sort_by_relevance =
false);
00754
00766
void set_bias(Xapian::weight bias_weight, time_t bias_halflife);
00767
00793
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00794 Xapian::doccount checkatleast = 0,
00795
const RSet * omrset = 0,
00796
const MatchDecider * mdecider = 0)
const;
00797
MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00798
const RSet * omrset,
00799
const MatchDecider * mdecider = 0)
const {
00800
return get_mset(first, maxitems, 0, omrset, mdecider);
00801 }
00802
00803
static const int include_query_terms = 1;
00804
static const int use_exact_termfreq = 2;
00827
ESet get_eset(Xapian::termcount maxitems,
00828
const RSet & omrset,
00829
int flags = 0,
00830
double k = 1.0,
00831
const Xapian::ExpandDecider * edecider = 0)
const;
00832
00846 inline ESet get_eset(Xapian::termcount maxitems,
const RSet & omrset,
00847
const Xapian::ExpandDecider * edecider)
const {
00848
return get_eset(maxitems, omrset, 0, 1.0, edecider);
00849 }
00850
00879
TermIterator get_matching_terms_begin(Xapian::docid did)
const;
00880
00882
TermIterator get_matching_terms_end(Xapian::docid did)
const;
00883
00906
TermIterator get_matching_terms_begin(
const MSetIterator &it)
const;
00907
00909
TermIterator get_matching_terms_end(
const MSetIterator &it)
const;
00910
00917
void register_match_decider(
const std::string &name,
00918
const MatchDecider *mdecider = NULL);
00919
00923 std::string
get_description() const;
00924 };
00925
00926 }
00927
00928 class SocketServer;
00929
00930 namespace Xapian {
00931
00933 class Weight {
00934
friend class Enquire;
00935
friend class ::SocketServer;
00936
public:
00937
class Internal;
00938
protected:
00939
Weight(
const Weight &);
00940
private:
00941
void operator=(
Weight &);
00942
00952
virtual Weight * clone()
const = 0;
00953
00954
protected:
00955
const Internal * internal;
00956 Xapian::doclength querysize;
00957 Xapian::termcount wqf;
00958 std::string tname;
00959
00960
public:
00961
Weight() { }
00962
virtual ~
Weight() { }
00963
00976 Weight * create(
const Internal * internal_, Xapian::doclength querysize_,
00977 Xapian::termcount wqf_, std::string tname_)
const {
00978
Weight * wt = clone();
00979 wt->
internal = internal_;
00980 wt->
querysize = querysize_;
00981 wt->
wqf = wqf_;
00982 wt->
tname = tname_;
00983
return wt;
00984 }
00985
00990
virtual std::string name() const = 0;
00991
00993 virtual std::string serialise() const = 0;
00994
00996 virtual
Weight * unserialise(const std::string &s) const = 0;
00997
01005 virtual Xapian::
weight get_sumpart(Xapian::
termcount wdf,
01006 Xapian::
doclength len) const = 0;
01007
01013 virtual Xapian::
weight get_maxpart() const = 0;
01014
01023 virtual Xapian::
weight get_sumextra(Xapian::
doclength len) const = 0;
01024
01028 virtual Xapian::
weight get_maxextra() const = 0;
01029
01031 virtual
bool get_sumpart_needs_doclength()
const {
return true; }
01032 };
01033
01035 class BoolWeight :
public Weight {
01036
public:
01037 BoolWeight * clone()
const {
01038
return new BoolWeight;
01039 }
01040
BoolWeight() { }
01041 ~BoolWeight() { }
01042 std::string name()
const {
return "Bool"; }
01043 std::string serialise()
const {
return ""; }
01044 BoolWeight * unserialise(
const std::string & )
const {
01045
return new BoolWeight;
01046 }
01047 Xapian::weight get_sumpart(Xapian::termcount , Xapian::doclength )
const {
return 0; }
01048 Xapian::weight get_maxpart()
const {
return 0; }
01049
01050 Xapian::weight get_sumextra(Xapian::doclength )
const {
return 0; }
01051 Xapian::weight get_maxextra()
const {
return 0; }
01052
01053 bool get_sumpart_needs_doclength()
const {
return false; }
01054 };
01055
01068 class BM25Weight :
public Weight {
01069
private:
01070
mutable Xapian::weight termweight;
01071
mutable Xapian::doclength lenpart;
01072
01073
double k1, k2, k3, b;
01074 Xapian::doclength min_normlen;
01075
01076
mutable bool weight_calculated;
01077
01078
void calc_termweight()
const;
01079
01080
public:
01099 BM25Weight(
double k1_,
double k2_,
double k3_,
double b_,
01100
double min_normlen_)
01101 : k1(k1_), k2(k2_), k3(k3_), b(b_), min_normlen(min_normlen_),
01102 weight_calculated(false)
01103 {
01104
if (k1 < 0) k1 = 0;
01105
if (k2 < 0) k2 = 0;
01106
if (k3 < 0) k3 = 0;
01107
if (b < 0) b = 0; else if (b > 1) b = 1;
01108 }
01109
BM25Weight() : k1(1), k2(0), k3(1), b(0.5), min_normlen(0.5),
01110 weight_calculated(false) { }
01111
01112 BM25Weight * clone() const;
01113 ~BM25Weight() { }
01114 std::string name() const;
01115 std::string serialise() const;
01116 BM25Weight * unserialise(const std::string & s) const;
01117 Xapian::
weight get_sumpart(Xapian::
termcount wdf, Xapian::
doclength len) const;
01118 Xapian::
weight get_maxpart() const;
01119
01120 Xapian::
weight get_sumextra(Xapian::
doclength len) const;
01121 Xapian::
weight get_maxextra() const;
01122
01123
bool get_sumpart_needs_doclength() const;
01124 };
01125
01139 class
TradWeight : public
Weight {
01140
private:
01141
mutable Xapian::weight termweight;
01142
mutable Xapian::doclength lenpart;
01143
01144
double param_k;
01145
01146
mutable bool weight_calculated;
01147
01148
void calc_termweight()
const;
01149
01150
public:
01158 explicit TradWeight(
double k) : param_k(k), weight_calculated(false) {
01159
if (param_k < 0) param_k = 0;
01160 }
01161
01162
TradWeight() : param_k(1.0), weight_calculated(false) { }
01163
01164 TradWeight * clone() const;
01165 ~TradWeight() { }
01166 std::string name() const;
01167 std::string serialise() const;
01168 TradWeight * unserialise(const std::string & s) const;
01169
01170 Xapian::
weight get_sumpart(Xapian::
termcount wdf, Xapian::
doclength len) const;
01171 Xapian::
weight get_maxpart() const;
01172
01173 Xapian::
weight get_sumextra(Xapian::
doclength len) const;
01174 Xapian::
weight get_maxextra() const;
01175
01176
bool get_sumpart_needs_doclength() const;
01177 };
01178
01179 }
01180
01181 #endif