Branch data Line data Source code
1 : : /** @file localsubmatch.cc
2 : : * @brief SubMatch class for a local database.
3 : : */
4 : : /* Copyright (C) 2006,2007,2009,2010 Olly Betts
5 : : * Copyright (C) 2007,2008,2009 Lemur Consulting Ltd
6 : : *
7 : : * This program is free software; you can redistribute it and/or modify
8 : : * it under the terms of the GNU General Public License as published by
9 : : * the Free Software Foundation; either version 2 of the License, or
10 : : * (at your option) any later version.
11 : : *
12 : : * This program is distributed in the hope that it will be useful,
13 : : * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 : : * GNU General Public License for more details.
16 : : *
17 : : * You should have received a copy of the GNU General Public License
18 : : * along with this program; if not, write to the Free Software
19 : : * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 : : */
21 : :
22 : : #include <config.h>
23 : :
24 : : #include "localsubmatch.h"
25 : :
26 : : #include "database.h"
27 : : #include "debuglog.h"
28 : : #include "extraweightpostlist.h"
29 : : #include "leafpostlist.h"
30 : : #include "omassert.h"
31 : : #include "omqueryinternal.h"
32 : : #include "queryoptimiser.h"
33 : : #include "synonympostlist.h"
34 : : #include "termlist.h"
35 : : #include "weightinternal.h"
36 : :
37 : : #include "autoptr.h"
38 : : #include <map>
39 : : #include <string>
40 : :
41 : : using namespace std;
42 : :
43 : : bool
44 : 246355 : LocalSubMatch::prepare_match(bool nowait,
45 : : Xapian::Weight::Internal & total_stats)
46 : : {
47 : : LOGCALL(MATCH, bool, "LocalSubMatch::prepare_match", nowait | total_stats);
48 : : (void)nowait;
49 : : Assert(db);
50 : : Assert(query);
51 : 246355 : total_stats.accumulate_stats(*db, rset);
52 : 246352 : RETURN(true);
53 : : }
54 : :
55 : : void
56 : 246352 : LocalSubMatch::start_match(Xapian::doccount first,
57 : : Xapian::doccount maxitems,
58 : : Xapian::doccount check_at_least,
59 : : const Xapian::Weight::Internal & total_stats)
60 : : {
61 : : LOGCALL_VOID(MATCH, "LocalSubMatch::start_match", first | maxitems | check_at_least | total_stats);
62 : : (void)first;
63 : : (void)maxitems;
64 : : (void)check_at_least;
65 : : // Store a pointer to the total stats to use when building the Query tree.
66 : 246352 : stats = &total_stats;
67 : 246352 : }
68 : :
69 : : PostList *
70 : 246352 : LocalSubMatch::get_postlist_and_term_info(MultiMatch * matcher,
71 : : map<string, Xapian::MSet::Internal::TermFreqAndWeight> * termfreqandwts,
72 : : Xapian::termcount * total_subqs_ptr)
73 : : {
74 : : LOGCALL(MATCH, PostList *, "LocalSubMatch::get_postlist_and_term_info", matcher | termfreqandwts | total_subqs_ptr);
75 : : (void)matcher;
76 : 246352 : term_info = termfreqandwts;
77 : :
78 : : // Build the postlist tree for the query. This calls
79 : : // LocalSubMatch::postlist_from_op_leaf_query() for each term in the query,
80 : : // which builds term_info as a side effect.
81 : 246352 : QueryOptimiser opt(*db, *this, matcher);
82 : 246352 : PostList * pl = opt.optimise_query(query);
83 : 246352 : *total_subqs_ptr = opt.get_total_subqueries();
84 : :
85 : 246352 : AutoPtr<Xapian::Weight> extra_wt(wt_factory->clone());
86 : 246352 : extra_wt->init_(*stats, qlen);
87 [ + + ]: 246352 : if (extra_wt->get_maxextra() != 0.0) {
88 : : // There's a term-independent weight contribution, so we combine the
89 : : // postlist tree with an ExtraWeightPostList which adds in this
90 : : // contribution.
91 : 16 : pl = new ExtraWeightPostList(pl, extra_wt.release(), matcher);
92 : : }
93 : :
94 : 246352 : RETURN(pl);
95 : : }
96 : :
97 : : PostList *
98 : 592 : LocalSubMatch::make_synonym_postlist(PostList * or_pl, MultiMatch * matcher,
99 : : double factor)
100 : : {
101 : : LOGCALL(MATCH, PostList *, "LocalSubMatch::make_synonym_postlist", or_pl | matcher | factor);
102 : : LOGVALUE(MATCH, or_pl->get_termfreq_est());
103 : 592 : AutoPtr<SynonymPostList> res(new SynonymPostList(or_pl, matcher));
104 : 592 : AutoPtr<Xapian::Weight> wt(wt_factory->clone());
105 : :
106 : 592 : TermFreqs freqs;
107 : : // Avoid calling get_termfreq_est_using_stats() if the database is empty
108 : : // so we don't need to special case that repeatedly when implementing it.
109 : : // FIXME: it would be nicer to handle an empty database higher up, though
110 : : // we need to catch the case where all the non-empty subdatabases have
111 : : // failed, so we can't just push this right up to the start of get_mset().
112 [ + - ]: 592 : if (usual(stats->collection_size != 0)) {
113 : 592 : freqs = or_pl->get_termfreq_est_using_stats(*stats);
114 : : }
115 : 592 : wt->init_(*stats, qlen, factor, freqs.termfreq, freqs.reltermfreq);
116 : :
117 : 592 : res->set_weight(wt.release());
118 : 592 : RETURN(res.release());
119 : : }
120 : :
121 : : PostList *
122 : 476180 : LocalSubMatch::postlist_from_op_leaf_query(const Xapian::Query::Internal *leaf,
123 : : double factor)
124 : : {
125 : : LOGCALL(MATCH, PostList *, "LocalSubMatch::postlist_from_op_leaf_query", leaf | factor);
126 : : Assert(leaf);
127 : : AssertEq(leaf->op, Xapian::Query::Internal::OP_LEAF);
128 : : Assert(leaf->subqs.empty());
129 : 476180 : const string & term = leaf->tname;
130 : 476180 : bool boolean = (factor == 0.0);
131 : 476180 : AutoPtr<Xapian::Weight> wt;
132 [ + + ]: 476180 : if (!boolean) {
133 : 473840 : wt.reset(wt_factory->clone());
134 : 473840 : wt->init_(*stats, qlen, term, leaf->get_wqf(), factor);
135 : : }
136 : :
137 [ + + ]: 476180 : if (term_info) {
138 : 335295 : Xapian::doccount tf = stats->get_termfreq(term);
139 : : using namespace Xapian;
140 : : // Find existing entry for term, or else make a new one.
141 : 335295 : map<string, MSet::Internal::TermFreqAndWeight>::iterator i;
142 : : i = term_info->insert(
143 : 335295 : make_pair(term, MSet::Internal::TermFreqAndWeight(tf))).first;
144 [ + + ]: 335295 : if (!boolean)
145 : 333396 : i->second.termweight += wt->get_maxpart();
146 : : }
147 : :
148 : 476180 : LeafPostList * pl = db->open_post_list(term);
149 : : // The default for LeafPostList is to return 0 weight and maxweight which
150 : : // is the same as boolean weighting.
151 [ + + ]: 476180 : if (!boolean)
152 : 473840 : pl->set_termweight(wt.release());
153 : 476180 : RETURN(pl);
154 : : }
|