Branch data Line data Source code
1 : : /* andmaybepostlist.h: Merged postlist: items from one list, weights from both
2 : : *
3 : : * AND MAYBE of two posting lists
4 : : * A AND MAYBE B is logically just A, but we keep B around for weight purposes
5 : : *
6 : : * Copyright 1999,2000,2001 BrightStation PLC
7 : : * Copyright 2002 Ananova Ltd
8 : : * Copyright 2003,2004,2009 Olly Betts
9 : : * Copyright 2009 Lemur Consulting Ltd
10 : : *
11 : : * This program is free software; you can redistribute it and/or
12 : : * modify it under the terms of the GNU General Public License as
13 : : * published by the Free Software Foundation; either version 2 of the
14 : : * License, or (at your option) any later version.
15 : : *
16 : : * This program is distributed in the hope that it will be useful,
17 : : * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 : : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 : : * GNU General Public License for more details.
20 : : *
21 : : * You should have received a copy of the GNU General Public License
22 : : * along with this program; if not, write to the Free Software
23 : : * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
24 : : * USA
25 : : */
26 : :
27 : :
28 : : #ifndef OM_HGUARD_ANDMAYBEPOSTLIST_H
29 : : #define OM_HGUARD_ANDMAYBEPOSTLIST_H
30 : :
31 : : #include "branchpostlist.h"
32 : :
33 : : /** A postlist with weights modified by another postlist.
34 : : *
35 : : * This postlist returns a posting if and only if it is in the left
36 : : * sub-postlist.
37 : : *
38 : : * If the posting does not occur in the right postlist, the weight for the
39 : : * posting is simply that in the left postlist. If the posting occurs in
40 : : * both postlists, the weight for the posting is the sum of the weights in
41 : : * the sub-postlists.
42 : : *
43 : : * This type of postlist is useful for specifying a set of terms which
44 : : * must appear in the query result: these terms can be specified as the
45 : : * left hand argument, with the rest of the query being on the right hand
46 : : * side, and having the effect of modifying the weights.
47 : : *
48 : : * The postlist is also used as a "decay product" of other postlist types
49 : : * during the match process: when a postlist can no longer cause a
50 : : * document to enter the mset on its own, but can influence relative
51 : : * rankings, it may be combined using one of these.
52 : : */
53 [ + - ][ # # ]: 232 : class AndMaybePostList : public BranchPostList {
54 : : private:
55 : : Xapian::doccount dbsize; // only need in case we decay to an AndPostList
56 : : Xapian::docid lhead, rhead;
57 : : Xapian::weight lmax, rmax;
58 : :
59 : : PostList * process_next_or_skip_to(Xapian::weight w_min, PostList *ret);
60 : : public:
61 : : Xapian::doccount get_termfreq_max() const;
62 : : Xapian::doccount get_termfreq_min() const;
63 : : Xapian::doccount get_termfreq_est() const;
64 : :
65 : : TermFreqs get_termfreq_est_using_stats(
66 : : const Xapian::Weight::Internal & stats) const;
67 : :
68 : : Xapian::docid get_docid() const;
69 : : Xapian::weight get_weight() const;
70 : : Xapian::weight get_maxweight() const;
71 : :
72 : : Xapian::weight recalc_maxweight();
73 : :
74 : : PostList *next(Xapian::weight w_min);
75 : : PostList *skip_to(Xapian::docid did, Xapian::weight w_min);
76 : : bool at_end() const;
77 : :
78 : : std::string get_description() const;
79 : :
80 : : /** Return the document length of the document the current term
81 : : * comes from.
82 : : */
83 : : virtual Xapian::termcount get_doclength() const;
84 : :
85 : 100 : AndMaybePostList(PostList *left_,
86 : : PostList *right_,
87 : : MultiMatch *matcher_,
88 : : Xapian::doccount dbsize_)
89 : : : BranchPostList(left_, right_, matcher_),
90 : 100 : dbsize(dbsize_), lhead(0), rhead(0)
91 : : {
92 : : // lmax and rmax will get initialised by a recalc_maxweight
93 : 100 : }
94 : :
95 : : /// Constructor for use by decomposing OrPostList
96 : 132 : AndMaybePostList(PostList *left_,
97 : : PostList *right_,
98 : : MultiMatch *matcher_,
99 : : Xapian::doccount dbsize_,
100 : : Xapian::docid lhead_,
101 : : Xapian::docid rhead_)
102 : : : BranchPostList(left_, right_, matcher_),
103 : 132 : dbsize(dbsize_), lhead(lhead_), rhead(rhead_)
104 : : {
105 : : // Initialise the maxweights from the kids so we can avoid forcing
106 : : // a full maxweight recalc
107 : 132 : lmax = l->get_maxweight();
108 : 132 : rmax = r->get_maxweight();
109 : 132 : }
110 : :
111 : : /** Synchronise the RHS to the LHS after construction.
112 : : * Used after constructing from a decomposing OrPostList
113 : : */
114 : : PostList * sync_rhs(Xapian::weight w_min);
115 : :
116 : : /** get_wdf() for ANDMAYBE postlists returns the sum of the wdfs of the
117 : : * sub postlists which are at the current document - this is desirable
118 : : * when the ANDMAYBE is part of a synonym.
119 : : */
120 : : Xapian::termcount get_wdf() const;
121 : :
122 : : Xapian::termcount count_matching_subqs() const;
123 : : };
124 : :
125 : : #endif /* OM_HGUARD_ANDMAYBEPOSTLIST_H */
|