Branch data Line data Source code
1 : : /** @file api_spelling.cc
2 : : * @brief Test the spelling correction suggestion API.
3 : : */
4 : : /* Copyright (C) 2007,2008,2009,2010 Olly Betts
5 : : * Copyright (C) 2007 Lemur Consulting Ltd
6 : : *
7 : : * This program is free software; you can redistribute it and/or modify
8 : : * it under the terms of the GNU General Public License as published by
9 : : * the Free Software Foundation; either version 2 of the License, or
10 : : * (at your option) any later version.
11 : : *
12 : : * This program is distributed in the hope that it will be useful,
13 : : * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 : : * GNU General Public License for more details.
16 : : *
17 : : * You should have received a copy of the GNU General Public License
18 : : * along with this program; if not, write to the Free Software
19 : : * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 : : */
21 : :
22 : : #include <config.h>
23 : :
24 : : #include "api_spelling.h"
25 : :
26 : : #include <xapian.h>
27 : :
28 : : #include "apitest.h"
29 : : #include "testsuite.h"
30 : : #include "testutils.h"
31 : :
32 : : #include <string>
33 : :
34 : : using namespace std;
35 : :
36 : : // Test add_spelling() and remove_spelling(), which remote dbs support.
37 : 9 : DEFINE_TESTCASE(spell0, spelling || remote) {
38 : 9 : Xapian::WritableDatabase db = get_writable_database();
39 : :
40 : 9 : db.add_spelling("hello");
41 : 9 : db.add_spelling("cell", 2);
42 : 9 : db.commit();
43 : 9 : db.add_spelling("zig");
44 : 9 : db.add_spelling("ch");
45 : 9 : db.add_spelling("hello", 2);
46 : 9 : db.remove_spelling("hello", 2);
47 : 9 : db.remove_spelling("cell", 6);
48 : 9 : db.commit();
49 : 9 : db.remove_spelling("hello");
50 : 9 : db.remove_spelling("nonsuch");
51 : 9 : db.remove_spelling("zzzzzzzzz", 1000000);
52 : 9 : db.remove_spelling("aarvark");
53 : 9 : db.remove_spelling("hello");
54 : 9 : db.commit();
55 : 9 : db.remove_spelling("hello");
56 : :
57 : 9 : return true;
58 : : }
59 : :
60 : : // Test basic spelling correction features.
61 : 3 : DEFINE_TESTCASE(spell1, spelling) {
62 : 3 : Xapian::WritableDatabase db = get_writable_database();
63 : :
64 : : // Check that the more frequent term is chosen.
65 : 3 : db.add_spelling("hello");
66 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("cell"), "hello");
67 : 3 : db.add_spelling("cell", 2);
68 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("hell"), "cell");
69 : 3 : db.commit();
70 : 3 : Xapian::Database dbr(get_writable_database_as_database());
71 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("hell"), "cell");
72 [ - + ][ # # ]: 3 : TEST_EQUAL(dbr.get_spelling_suggestion("hell"), "cell");
73 : :
74 : : // Check suggestions for single edit errors to "zig".
75 : 3 : db.add_spelling("zig");
76 : : // Transpositions:
77 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("izg"), "zig");
78 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("zgi"), "zig");
79 : : // Substitutions:
80 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("sig"), "zig");
81 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("zog"), "zig");
82 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("zif"), "zig");
83 : : // Deletions:
84 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("ig"), "zig");
85 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("zg"), "zig");
86 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("zi"), "zig");
87 : : // Insertions:
88 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("azig"), "zig");
89 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("zaig"), "zig");
90 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("ziag"), "zig");
91 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("ziga"), "zig");
92 : :
93 : : // Check suggestions for single edit errors to "ch".
94 : 3 : db.add_spelling("ch");
95 : : // Transpositions:
96 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("hc"), "ch");
97 : : // Substitutions - we don't handle these for two character words:
98 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("qh"), "");
99 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("cq"), "");
100 : : // Deletions would leave a single character, and we don't handle those.
101 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("c"), "");
102 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("h"), "");
103 : : // Insertions:
104 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("qch"), "ch");
105 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("cqh"), "ch");
106 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("chq"), "ch");
107 : :
108 : : // Check assorted cases:
109 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("shello"), "hello");
110 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("hellot"), "hello");
111 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("acell"), "cell");
112 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("cella"), "cell");
113 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("acella"), "cell");
114 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("helo"), "hello");
115 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("cll"), "cell");
116 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("helol"), "hello");
117 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("clel"), "cell");
118 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("ecll"), "cell");
119 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("cll"), "cell");
120 : :
121 : : // Check that edit distance 3 isn't found by default:
122 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("shelolx"), "");
123 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("celling"), "");
124 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("dellin"), "");
125 : :
126 : : // Check that edit distance 3 is found if specified:
127 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("shelolx", 3), "hello");
128 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("celling", 3), "cell");
129 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("dellin", 3), "cell");
130 : :
131 : : // Make "hello" more frequent than "cell" (3 vs 2).
132 : 3 : db.add_spelling("hello", 2);
133 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("hell"), "hello");
134 : 3 : db.commit();
135 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("cello"), "hello");
136 : 3 : db.remove_spelling("hello", 2);
137 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("hell"), "cell");
138 : : // Test "over-removing".
139 : 3 : db.remove_spelling("cell", 6);
140 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("cell"), "hello");
141 : 3 : db.commit();
142 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("cell"), "hello");
143 : 3 : db.remove_spelling("hello");
144 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("cell"), "");
145 : :
146 : : // Test removing words not in the table.
147 : 3 : db.remove_spelling("nonsuch");
148 : 3 : db.remove_spelling("zzzzzzzzz", 1000000);
149 : 3 : db.remove_spelling("aarvark");
150 : :
151 : : // Try removing word which was present but no longer is.
152 : 3 : db.remove_spelling("hello");
153 : 3 : db.commit();
154 : 3 : db.remove_spelling("hello");
155 : :
156 : 3 : return true;
157 : : }
158 : :
159 : : // Test spelling correction for Unicode.
160 : 3 : DEFINE_TESTCASE(spell2, spelling) {
161 : 3 : Xapian::WritableDatabase db = get_writable_database();
162 : :
163 : : // Check that a UTF-8 sequence counts as a single character.
164 : 3 : db.add_spelling("h\xc3\xb6hle");
165 : 3 : db.add_spelling("ascii");
166 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("hohle", 1), "h\xc3\xb6hle");
167 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("hhle", 1), "h\xc3\xb6hle");
168 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("\xf0\xa8\xa8\x8f\xc3\xb6le", 2), "h\xc3\xb6hle");
169 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("hh\xc3\xb6l"), "h\xc3\xb6hle");
170 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("as\xc3\xb6\xc3\xb7i"), "ascii");
171 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("asc\xc3\xb6i\xc3\xb7i"), "ascii");
172 : 3 : db.commit();
173 : 3 : Xapian::Database dbr(get_writable_database_as_database());
174 [ - + ][ # # ]: 3 : TEST_EQUAL(dbr.get_spelling_suggestion("hohle", 1), "h\xc3\xb6hle");
175 [ - + ][ # # ]: 3 : TEST_EQUAL(dbr.get_spelling_suggestion("hhle", 1), "h\xc3\xb6hle");
176 [ - + ][ # # ]: 3 : TEST_EQUAL(dbr.get_spelling_suggestion("\xf0\xa8\xa8\x8f\xc3\xb6le", 2), "h\xc3\xb6hle");
177 [ - + ][ # # ]: 3 : TEST_EQUAL(dbr.get_spelling_suggestion("hh\xc3\xb6l"), "h\xc3\xb6hle");
178 [ - + ][ # # ]: 3 : TEST_EQUAL(dbr.get_spelling_suggestion("as\xc3\xb6\xc3\xb7i"), "ascii");
179 [ - + ][ # # ]: 3 : TEST_EQUAL(dbr.get_spelling_suggestion("asc\xc3\xb6i\xc3\xb7i"), "ascii");
180 : :
181 : 3 : return true;
182 : : }
183 : :
184 : : // Test spelling correction with multi databases
185 : 3 : DEFINE_TESTCASE(spell3, spelling) {
186 : 3 : Xapian::WritableDatabase db1 = get_writable_database();
187 : : // We can't just call get_writable_database() since it would delete db1
188 : : // which doesn't work at all under __WIN32__ and will go wrong elsewhere if
189 : : // changes to db1 are committed.
190 : 3 : Xapian::WritableDatabase db2 = get_named_writable_database("spell3", "");
191 : :
192 : 3 : db1.add_spelling("hello");
193 : 3 : db1.add_spelling("cell", 2);
194 : 3 : db2.add_spelling("hello", 2);
195 : 3 : db2.add_spelling("helo");
196 : :
197 : 3 : Xapian::Database db;
198 : 3 : db.add_database(db1);
199 : 3 : db.add_database(db2);
200 : :
201 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("hello"), "");
202 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("hell"), "hello");
203 [ - + ][ # # ]: 3 : TEST_EQUAL(db1.get_spelling_suggestion("hell"), "cell");
204 [ - + ][ # # ]: 3 : TEST_EQUAL(db2.get_spelling_suggestion("hell"), "hello");
205 : :
206 : :
207 : : // Test spelling iterator
208 : 3 : Xapian::TermIterator i(db1.spellings_begin());
209 [ - + ][ # # ]: 3 : TEST_EQUAL(*i, "cell");
210 [ - + ][ # # ]: 3 : TEST_EQUAL(i.get_termfreq(), 2);
211 : 3 : ++i;
212 [ - + ][ # # ]: 3 : TEST_EQUAL(*i, "hello");
213 [ - + ][ # # ]: 3 : TEST_EQUAL(i.get_termfreq(), 1);
214 : 3 : ++i;
215 [ - + ][ # # ]: 3 : TEST(i == db1.spellings_end());
216 : :
217 : 3 : i = db2.spellings_begin();
218 [ - + ][ # # ]: 3 : TEST_EQUAL(*i, "hello");
219 [ - + ][ # # ]: 3 : TEST_EQUAL(i.get_termfreq(), 2);
220 : 3 : ++i;
221 [ - + ][ # # ]: 3 : TEST_EQUAL(*i, "helo");
222 [ - + ][ # # ]: 3 : TEST_EQUAL(i.get_termfreq(), 1);
223 : 3 : ++i;
224 [ - + ][ # # ]: 3 : TEST(i == db2.spellings_end());
225 : :
226 : 3 : i = db.spellings_begin();
227 [ - + ][ # # ]: 3 : TEST_EQUAL(*i, "cell");
228 [ - + ][ # # ]: 3 : TEST_EQUAL(i.get_termfreq(), 2);
229 : 3 : ++i;
230 [ - + ][ # # ]: 3 : TEST_EQUAL(*i, "hello");
231 [ - + ][ # # ]: 3 : TEST_EQUAL(i.get_termfreq(), 3);
232 : 3 : ++i;
233 [ - + ][ # # ]: 3 : TEST_EQUAL(*i, "helo");
234 [ - + ][ # # ]: 3 : TEST_EQUAL(i.get_termfreq(), 1);
235 : 3 : ++i;
236 [ - + ][ # # ]: 3 : TEST(i == db.spellings_end());
237 : :
238 : 3 : return true;
239 : : }
240 : :
241 : : // Regression test - check that appending works correctly.
242 : 3 : DEFINE_TESTCASE(spell4, spelling) {
243 : 3 : Xapian::WritableDatabase db = get_writable_database();
244 : :
245 : 3 : db.add_spelling("check");
246 : 3 : db.add_spelling("pecks", 2);
247 : 3 : db.commit();
248 : 3 : db.add_spelling("becky");
249 : 3 : db.commit();
250 : :
251 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("jeck", 2), "pecks");
252 : :
253 : 3 : return true;
254 : : }
255 : :
256 : : // Regression test - used to segfault with some input values.
257 : 3 : DEFINE_TESTCASE(spell5, spelling) {
258 : 3 : const char * target = "\xe4\xb8\x80\xe4\xba\x9b";
259 : :
260 : 3 : Xapian::WritableDatabase db = get_writable_database();
261 : 3 : db.add_spelling(target);
262 : 3 : db.commit();
263 : :
264 : 3 : string s = db.get_spelling_suggestion("\xe4\xb8\x8d", 3);
265 [ - + # # ]: 3 : TEST_EQUAL(s, target);
266 : :
267 : 3 : return true;
268 : : }
269 : :
270 : : // Test basic spelling correction features.
271 : 3 : DEFINE_TESTCASE(spell6, spelling) {
272 : 3 : Xapian::WritableDatabase db = get_writable_database();
273 : :
274 : : // Check that the more frequent term is chosen.
275 : 3 : db.add_spelling("hello", 2);
276 : 3 : db.add_spelling("sell", 3);
277 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("hell"), "sell");
278 : 3 : db.commit();
279 : 3 : Xapian::Database dbr(get_writable_database_as_database());
280 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("hell"), "sell");
281 [ - + ][ # # ]: 3 : TEST_EQUAL(dbr.get_spelling_suggestion("hell"), "sell");
282 : :
283 : 3 : return true;
284 : : }
285 : :
286 : : // Test suggestions when there's an exact match.
287 : 3 : DEFINE_TESTCASE(spell7, spelling) {
288 : 3 : Xapian::WritableDatabase db = get_writable_database();
289 : :
290 : : // Check that the more frequent term is chosen.
291 : 3 : db.add_spelling("word", 57);
292 : 3 : db.add_spelling("wrod", 3);
293 : 3 : db.add_spelling("sword", 56);
294 : 3 : db.add_spelling("words", 57);
295 : 3 : db.add_spelling("ward", 58);
296 : 3 : db.commit();
297 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("ward"), "");
298 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("words"), "word");
299 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("sword"), "word");
300 [ - + ][ # # ]: 3 : TEST_EQUAL(db.get_spelling_suggestion("wrod"), "word");
301 : :
302 : 3 : return true;
303 : : }
|