Branch data Line data Source code
1 : : /** @file synonympostlist.h
2 : : * @brief Combine subqueries, weighting as if they are synonyms
3 : : */
4 : : /* Copyright 2007,2009 Lemur Consulting Ltd
5 : : * Copyright 2009 Olly Betts
6 : : *
7 : : * This program is free software; you can redistribute it and/or modify
8 : : * it under the terms of the GNU General Public License as published by
9 : : * the Free Software Foundation; either version 2 of the License, or
10 : : * (at your option) any later version.
11 : : *
12 : : * This program is distributed in the hope that it will be useful,
13 : : * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 : : * GNU General Public License for more details.
16 : : *
17 : : * You should have received a copy of the GNU General Public License
18 : : * along with this program; if not, write to the Free Software
19 : : * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 : : */
21 : :
22 : : #ifndef XAPIAN_INCLUDED_SYNONYMPOSTLIST_H
23 : : #define XAPIAN_INCLUDED_SYNONYMPOSTLIST_H
24 : :
25 : : #include "multimatch.h"
26 : : #include "postlist.h"
27 : :
28 : : /** A postlist comprising several postlists SYNONYMed together.
29 : : *
30 : : * This postlist returns all postings in the OR of the sub postlists, but
31 : : * returns weights as if they represented a single term. The term frequency
32 : : * portion of the weight is approximated.
33 : : */
34 : : class SynonymPostList : public PostList {
35 : : /** The subtree, which starts as an OR of all the sub-postlists being
36 : : * joined with Synonym, but may decay into something else.
37 : : */
38 : : PostList * subtree;
39 : :
40 : : /** The object which is using this postlist to perform a match.
41 : : *
42 : : * This object needs to be notified when the tree changes such that the
43 : : * maximum weights need to be recalculated.
44 : : */
45 : : MultiMatch * matcher;
46 : :
47 : : /// Weighting object used for calculating the synonym weights.
48 : : const Xapian::Weight * wt;
49 : :
50 : : /// Flag indicating whether the weighting object needs the doclength.
51 : : bool want_doclength;
52 : :
53 : : /// Flag indicating whether the weighting object needs the wdf.
54 : : bool want_wdf;
55 : :
56 : : /// Flag indicating if we've called recalc_maxweight on the subtree yet.
57 : : bool have_calculated_subtree_maxweights;
58 : :
59 : : public:
60 : 592 : SynonymPostList(PostList * subtree_, MultiMatch * matcher_)
61 : : : subtree(subtree_), matcher(matcher_), wt(NULL),
62 : : want_doclength(false), want_wdf(false),
63 : 592 : have_calculated_subtree_maxweights(false) { }
64 : :
65 : : ~SynonymPostList();
66 : :
67 : : /** Set the weight object to be used for the synonym postlist.
68 : : *
69 : : * Ownership of the weight object passes to the synonym postlist - the
70 : : * caller must not delete it after use.
71 : : */
72 : : void set_weight(const Xapian::Weight * wt_);
73 : :
74 : : PostList *next(Xapian::weight w_min);
75 : : PostList *skip_to(Xapian::docid did, Xapian::weight w_min);
76 : :
77 : : Xapian::weight get_weight() const;
78 : : Xapian::weight get_maxweight() const;
79 : : Xapian::weight recalc_maxweight();
80 : :
81 : : // The following methods just call through to the subtree.
82 : : Xapian::termcount get_wdf() const;
83 : : Xapian::doccount get_termfreq_min() const;
84 : : Xapian::doccount get_termfreq_est() const;
85 : : Xapian::doccount get_termfreq_max() const;
86 : : // Note - we don't need to implement get_termfreq_est_using_stats()
87 : : // because a synonym when used as a child of a synonym will be optimised
88 : : // to an OR.
89 : : Xapian::docid get_docid() const;
90 : : Xapian::termcount get_doclength() const;
91 : : bool at_end() const;
92 : :
93 : : Xapian::termcount count_matching_subqs() const;
94 : :
95 : : std::string get_description() const;
96 : : };
97 : :
98 : : #endif /* XAPIAN_INCLUDED_SYNONYMPOSTLIST_H */
|