Branch data Line data Source code
1 : : /** @file api_percentages.cc
2 : : * @brief Tests of percentage calculations.
3 : : */
4 : : /* Copyright (C) 2008,2009 Lemur Consulting Ltd
5 : : * Copyright (C) 2008,2009,2010 Olly Betts
6 : : *
7 : : * This program is free software; you can redistribute it and/or modify
8 : : * it under the terms of the GNU General Public License as published by
9 : : * the Free Software Foundation; either version 2 of the License, or
10 : : * (at your option) any later version.
11 : : *
12 : : * This program is distributed in the hope that it will be useful,
13 : : * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 : : * GNU General Public License for more details.
16 : : *
17 : : * You should have received a copy of the GNU General Public License
18 : : * along with this program; if not, write to the Free Software
19 : : * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 : : */
21 : :
22 : : #include <config.h>
23 : :
24 : : #include "api_percentages.h"
25 : :
26 : : #include <xapian.h>
27 : :
28 : : #include "apitest.h"
29 : : #include "backendmanager_local.h"
30 : : #include "testutils.h"
31 : :
32 : : #include <cfloat>
33 : :
34 : : using namespace std;
35 : :
36 : : // Test that percentages reported are the same regardless of which part of the
37 : : // mset is returned, for sort-by-value search. Regression test for bug#216 in
38 : : // 1.0.10 and earlier with returned percentages.
39 : 13 : DEFINE_TESTCASE(consistency3, backend) {
40 : 13 : Xapian::Database db(get_database("apitest_sortconsist"));
41 : 13 : Xapian::Enquire enquire(db);
42 : 13 : enquire.set_query(Xapian::Query("foo"));
43 : 13 : enquire.set_sort_by_value(1, 0);
44 : 13 : Xapian::doccount lots = 3;
45 : 13 : Xapian::MSet bigmset = enquire.get_mset(0, lots);
46 [ - + # # ]: 13 : TEST_EQUAL(bigmset.size(), lots);
47 [ + + ]: 52 : for (Xapian::doccount start = 0; start < lots; ++start) {
48 : : tout << *bigmset[start] << ":" << bigmset[start].get_weight() << ":"
49 : 39 : << bigmset[start].get_percent() << "%" << endl;
50 [ + + ]: 117 : for (Xapian::doccount size = 0; size < lots - start; ++size) {
51 : 78 : Xapian::MSet mset = enquire.get_mset(start, size);
52 [ + + ]: 78 : if (mset.size()) {
53 [ - + ][ # # ]: 39 : TEST_EQUAL(start + mset.size(),
54 : : min(start + size, bigmset.size()));
55 [ - + ]: 39 : } else if (size) {
56 [ # # ][ # # ]: 0 : TEST(start >= bigmset.size());
57 : : }
58 [ + + ]: 130 : for (Xapian::doccount i = 0; i < mset.size(); ++i) {
59 [ - + ][ # # ]: 52 : TEST_EQUAL(*mset[i], *bigmset[start + i]);
60 [ - + ][ # # ]: 52 : TEST_EQUAL_DOUBLE(mset[i].get_weight(),
61 : : bigmset[start + i].get_weight());
62 [ - + ][ # # ]: 52 : TEST_EQUAL_DOUBLE(mset[i].get_percent(),
63 : : bigmset[start + i].get_percent());
64 : : }
65 : : }
66 : : }
67 : 13 : return true;
68 : : }
69 : :
70 [ + - ][ - + ]: 1380 : class MyPostingSource : public Xapian::PostingSource {
71 : : vector<pair<Xapian::docid, Xapian::weight> > weights;
72 : : vector<pair<Xapian::docid, Xapian::weight> >::const_iterator i;
73 : : bool started;
74 : :
75 : 924 : MyPostingSource(const vector<pair<Xapian::docid, Xapian::weight> > &weights_,
76 : : Xapian::weight max_wt)
77 : 924 : : weights(weights_), started(false)
78 : : {
79 : 924 : set_maxweight(max_wt);
80 : 924 : }
81 : :
82 : : public:
83 : 456 : MyPostingSource() : started(false) { }
84 : :
85 : 924 : PostingSource * clone() const
86 : : {
87 : 924 : return new MyPostingSource(weights, get_maxweight());
88 : : }
89 : :
90 : 924 : void append_docweight(Xapian::docid did, Xapian::weight wt) {
91 : 924 : weights.push_back(make_pair(did, wt));
92 [ + + ]: 924 : if (wt > get_maxweight()) set_maxweight(wt);
93 : 924 : }
94 : :
95 : 468 : void init(const Xapian::Database &) { started = false; }
96 : :
97 : 984 : Xapian::weight get_weight() const { return i->second; }
98 : :
99 : 468 : Xapian::doccount get_termfreq_min() const { return weights.size(); }
100 : 468 : Xapian::doccount get_termfreq_est() const { return weights.size(); }
101 : 468 : Xapian::doccount get_termfreq_max() const { return weights.size(); }
102 : :
103 : 1452 : void next(Xapian::weight /*wt*/) {
104 [ + + ]: 1452 : if (!started) {
105 : 468 : i = weights.begin();
106 : 468 : started = true;
107 : : } else {
108 : 984 : ++i;
109 : : }
110 : 1452 : }
111 : :
112 : 1452 : bool at_end() const {
113 : 1452 : return (i == weights.end());
114 : : }
115 : :
116 : 984 : Xapian::docid get_docid() const { return i->first; }
117 : :
118 : 0 : string get_description() const {
119 : 0 : return "MyPostingSource";
120 : : }
121 : : };
122 : :
123 : :
124 : : /// Test for rounding errors in percentage weight calculations and cutoffs.
125 : 4 : DEFINE_TESTCASE(pctcutoff4, backend && !remote && !multi) {
126 : : // Find the number of DBL_EPSILONs to subtract which result in the
127 : : // percentage of the second hit being 49% instead of 50%.
128 : 4 : int epsilons = 0;
129 : 4 : Xapian::Database db(get_database("apitest_simpledata"));
130 : 4 : Xapian::Enquire enquire(db);
131 [ + + ][ + + ]: 452 : while (true) {
132 : 452 : MyPostingSource source;
133 : 452 : source.append_docweight(1, 100);
134 : 452 : source.append_docweight(2, 50 - epsilons * DBL_EPSILON);
135 : 452 : enquire.set_query(Xapian::Query(&source));
136 : 452 : Xapian::MSet mset = enquire.get_mset(0, 10);
137 [ - + # # ]: 452 : TEST_EQUAL(mset.size(), 2);
138 [ + + ]: 452 : if (mset[1].get_percent() != 50) break;
139 : 448 : ++epsilons;
140 : : }
141 : :
142 : : // Make a set of document weights including ones on either side of the
143 : : // 49% / 50% boundary.
144 : 4 : MyPostingSource source;
145 : 4 : source.append_docweight(1, 100);
146 : 4 : source.append_docweight(2, 50);
147 : 4 : source.append_docweight(3, 50 - (epsilons - 1) * DBL_EPSILON);
148 : 4 : source.append_docweight(4, 50 - epsilons * DBL_EPSILON);
149 : 4 : source.append_docweight(5, 25);
150 : :
151 : 4 : enquire.set_query(Xapian::Query(&source));
152 : 4 : Xapian::MSet mset1 = enquire.get_mset(0, 10);
153 [ - + # # ]: 4 : TEST_EQUAL(mset1.size(), 5);
154 [ - + ][ # # ]: 4 : TEST_EQUAL(mset1[2].get_percent(), 50);
155 [ - + ][ # # ]: 4 : TEST_EQUAL(mset1[3].get_percent(), 49);
156 : :
157 : : // Use various different percentage cutoffs, and check that the values
158 : : // returned are as expected.
159 : 4 : int percent = 100;
160 [ + + ]: 24 : for (Xapian::MSetIterator i = mset1.begin(); i != mset1.end(); ++i) {
161 : 20 : int new_percent = mset1.convert_to_percent(i);
162 : 20 : tout << "mset1 item = " << i.get_percent() << "%\n";
163 [ + + ]: 20 : if (new_percent != percent) {
164 : 12 : enquire.set_cutoff(percent);
165 : 12 : Xapian::MSet mset2 = enquire.get_mset(0, 10);
166 : : tout << "cutoff = " << percent << "%, "
167 : 12 : "mset size = " << mset2.size() << "\n";
168 [ - + # # ]: 12 : TEST_EQUAL(mset2.size(), i.get_rank());
169 : 12 : percent = new_percent;
170 : : }
171 : 4 : }
172 : :
173 : 4 : return true;
174 : : }
175 : :
176 : : /// Check we throw for a percentage cutoff while sorting primarily by value.
177 : 13 : DEFINE_TESTCASE(pctcutoff5, backend) {
178 : 13 : Xapian::Database db(get_database("apitest_simpledata"));
179 : 13 : Xapian::Enquire enquire(db);
180 : 13 : enquire.set_query(Xapian::Query("test"));
181 : 13 : enquire.set_cutoff(42);
182 : 13 : Xapian::MSet mset;
183 : :
184 : 13 : enquire.set_sort_by_value(0, false);
185 [ + - ][ - + ]: 26 : TEST_EXCEPTION(Xapian::UnimplementedError, mset = enquire.get_mset(0, 10));
[ # # ][ - + ]
186 : :
187 : 13 : enquire.set_sort_by_value(0, true);
188 [ + - ][ - + ]: 26 : TEST_EXCEPTION(Xapian::UnimplementedError, mset = enquire.get_mset(0, 10));
[ # # ][ - + ]
189 : :
190 : 13 : enquire.set_sort_by_value_then_relevance(0, false);
191 [ + - ][ - + ]: 26 : TEST_EXCEPTION(Xapian::UnimplementedError, mset = enquire.get_mset(0, 10));
[ # # ][ - + ]
192 : :
193 : 13 : enquire.set_sort_by_value_then_relevance(0, true);
194 [ + - ][ - + ]: 26 : TEST_EXCEPTION(Xapian::UnimplementedError, mset = enquire.get_mset(0, 10));
[ # # ][ - + ]
195 : :
196 : 13 : return true;
197 : : }
198 : :
199 : : // Regression test for bug fixed in 1.0.14.
200 : 6 : DEFINE_TESTCASE(topercent3, remote) {
201 : 6 : BackendManagerLocal local_manager;
202 : 6 : local_manager.set_datadir(test_driver::get_srcdir() + "/testdata/");
203 : 6 : Xapian::Database db;
204 : 6 : db.add_database(get_database("apitest_simpledata"));
205 : 6 : db.add_database(local_manager.get_database("apitest_simpledata"));
206 : :
207 : 6 : Xapian::Enquire enquire(db);
208 : 6 : enquire.set_sort_by_value(1, false);
209 : :
210 : 6 : const char * terms[] = { "paragraph", "banana" };
211 : 6 : enquire.set_query(Xapian::Query(Xapian::Query::OP_OR, terms, terms + 2));
212 : :
213 : 6 : Xapian::MSet mset = enquire.get_mset(0, 20);
214 : :
215 : 6 : Xapian::MSetIterator i;
216 [ + + ]: 78 : for (i = mset.begin(); i != mset.end(); ++i) {
217 : : // We should never achieve 100%.
218 [ - + ][ # # ]: 72 : TEST_REL(i.get_percent(),<,100);
219 : : }
220 : :
221 : 6 : return true;
222 : : }
223 : :
224 : : // Regression test for bug introduced temporarily by the "percent without
225 : : // termlist" patch.
226 : 13 : DEFINE_TESTCASE(topercent4, backend) {
227 : 13 : Xapian::Enquire enquire(get_database("apitest_simpledata"));
228 : :
229 : : Xapian::Query query(Xapian::Query::OP_FILTER,
230 : : Xapian::Query("paragraph"),
231 : 13 : Xapian::Query("queri"));
232 : : query = Xapian::Query(Xapian::Query::OP_XOR,
233 : 13 : query, Xapian::Query("rubbish"));
234 : :
235 : 13 : enquire.set_query(query);
236 : 13 : Xapian::MSet mset = enquire.get_mset(0, 10);
237 : :
238 : : // We should get 50% not 33%.
239 [ - + # # ]: 13 : TEST(!mset.empty());
240 [ - + ][ # # ]: 13 : TEST_EQUAL(mset[0].get_percent(), 50);
241 : :
242 : 13 : return true;
243 : : }
244 : :
245 : : /// Test that a search with a non-existent term doesn't get 100%.
246 : 13 : DEFINE_TESTCASE(topercent5, backend) {
247 : 13 : Xapian::Enquire enquire(get_database("apitest_simpledata"));
248 : : Xapian::Query q(Xapian::Query::OP_OR,
249 : 13 : Xapian::Query("paragraph"), Xapian::Query("xyzzy"));
250 : 13 : enquire.set_query(q);
251 : 13 : Xapian::MSet mset = enquire.get_mset(0, 10);
252 [ - + # # ]: 13 : TEST(!mset.empty());
253 [ - + ][ # # ]: 13 : TEST(mset[0].get_percent() < 100);
254 : : // It would be odd if the non-existent term was worth more, but in 1.0.x
255 : : // the top hit got 4% in this testcase. In 1.2.x it gets 50%, which is
256 : : // better, but >50% would be more natural.
257 [ - + ][ # # ]: 13 : TEST(mset[0].get_percent() >= 50);
258 : 13 : return true;
259 : : }
|