LCOV - code coverage report
Current view: top level - home/olly/git/atia-xapian/xapian-core/tests - api_spelling.cc (source / functions) Hit Total Coverage
Test: Test Coverage for xapian-core r Lines: 188 188 100.0 %
Date: 2011-08-21 Functions: 8 8 100.0 %
Branches: 89 356 25.0 %

           Branch data     Line data    Source code
       1                 :            : /** @file api_spelling.cc
       2                 :            :  * @brief Test the spelling correction suggestion API.
       3                 :            :  */
       4                 :            : /* Copyright (C) 2007,2008,2009,2010 Olly Betts
       5                 :            :  * Copyright (C) 2007 Lemur Consulting Ltd
       6                 :            :  *
       7                 :            :  * This program is free software; you can redistribute it and/or modify
       8                 :            :  * it under the terms of the GNU General Public License as published by
       9                 :            :  * the Free Software Foundation; either version 2 of the License, or
      10                 :            :  * (at your option) any later version.
      11                 :            :  *
      12                 :            :  * This program is distributed in the hope that it will be useful,
      13                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      15                 :            :  * GNU General Public License for more details.
      16                 :            :  *
      17                 :            :  * You should have received a copy of the GNU General Public License
      18                 :            :  * along with this program; if not, write to the Free Software
      19                 :            :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
      20                 :            :  */
      21                 :            : 
      22                 :            : #include <config.h>
      23                 :            : 
      24                 :            : #include "api_spelling.h"
      25                 :            : 
      26                 :            : #include <xapian.h>
      27                 :            : 
      28                 :            : #include "apitest.h"
      29                 :            : #include "testsuite.h"
      30                 :            : #include "testutils.h"
      31                 :            : 
      32                 :            : #include <string>
      33                 :            : 
      34                 :            : using namespace std;
      35                 :            : 
      36                 :            : // Test add_spelling() and remove_spelling(), which remote dbs support.
      37                 :          9 : DEFINE_TESTCASE(spell0, spelling || remote) {
      38                 :          9 :     Xapian::WritableDatabase db = get_writable_database();
      39                 :            : 
      40                 :          9 :     db.add_spelling("hello");
      41                 :          9 :     db.add_spelling("cell", 2);
      42                 :          9 :     db.commit();
      43                 :          9 :     db.add_spelling("zig");
      44                 :          9 :     db.add_spelling("ch");
      45                 :          9 :     db.add_spelling("hello", 2);
      46                 :          9 :     db.remove_spelling("hello", 2);
      47                 :          9 :     db.remove_spelling("cell", 6);
      48                 :          9 :     db.commit();
      49                 :          9 :     db.remove_spelling("hello");
      50                 :          9 :     db.remove_spelling("nonsuch");
      51                 :          9 :     db.remove_spelling("zzzzzzzzz", 1000000);
      52                 :          9 :     db.remove_spelling("aarvark");
      53                 :          9 :     db.remove_spelling("hello");
      54                 :          9 :     db.commit();
      55                 :          9 :     db.remove_spelling("hello");
      56                 :            : 
      57                 :          9 :     return true;
      58                 :            : }
      59                 :            : 
      60                 :            : // Test basic spelling correction features.
      61                 :          3 : DEFINE_TESTCASE(spell1, spelling) {
      62                 :          3 :     Xapian::WritableDatabase db = get_writable_database();
      63                 :            : 
      64                 :            :     // Check that the more frequent term is chosen.
      65                 :          3 :     db.add_spelling("hello");
      66 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("cell"), "hello");
      67                 :          3 :     db.add_spelling("cell", 2);
      68 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("hell"), "cell");
      69                 :          3 :     db.commit();
      70                 :          3 :     Xapian::Database dbr(get_writable_database_as_database());
      71 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("hell"), "cell");
      72 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(dbr.get_spelling_suggestion("hell"), "cell");
      73                 :            : 
      74                 :            :     // Check suggestions for single edit errors to "zig".
      75                 :          3 :     db.add_spelling("zig");
      76                 :            :     // Transpositions:
      77 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("izg"), "zig");
      78 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("zgi"), "zig");
      79                 :            :     // Substitutions:
      80 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("sig"), "zig");
      81 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("zog"), "zig");
      82 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("zif"), "zig");
      83                 :            :     // Deletions:
      84 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("ig"), "zig");
      85 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("zg"), "zig");
      86 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("zi"), "zig");
      87                 :            :     // Insertions:
      88 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("azig"), "zig");
      89 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("zaig"), "zig");
      90 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("ziag"), "zig");
      91 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("ziga"), "zig");
      92                 :            : 
      93                 :            :     // Check suggestions for single edit errors to "ch".
      94                 :          3 :     db.add_spelling("ch");
      95                 :            :     // Transpositions:
      96 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("hc"), "ch");
      97                 :            :     // Substitutions - we don't handle these for two character words:
      98 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("qh"), "");
      99 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("cq"), "");
     100                 :            :     // Deletions would leave a single character, and we don't handle those.
     101 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("c"), "");
     102 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("h"), "");
     103                 :            :     // Insertions:
     104 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("qch"), "ch");
     105 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("cqh"), "ch");
     106 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("chq"), "ch");
     107                 :            : 
     108                 :            :     // Check assorted cases:
     109 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("shello"), "hello");
     110 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("hellot"), "hello");
     111 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("acell"), "cell");
     112 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("cella"), "cell");
     113 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("acella"), "cell");
     114 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("helo"), "hello");
     115 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("cll"), "cell");
     116 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("helol"), "hello");
     117 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("clel"), "cell");
     118 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("ecll"), "cell");
     119 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("cll"), "cell");
     120                 :            : 
     121                 :            :     // Check that edit distance 3 isn't found by default:
     122 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("shelolx"), "");
     123 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("celling"), "");
     124 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("dellin"), "");
     125                 :            : 
     126                 :            :     // Check that edit distance 3 is found if specified:
     127 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("shelolx", 3), "hello");
     128 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("celling", 3), "cell");
     129 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("dellin", 3), "cell");
     130                 :            : 
     131                 :            :     // Make "hello" more frequent than "cell" (3 vs 2).
     132                 :          3 :     db.add_spelling("hello", 2);
     133 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("hell"), "hello");
     134                 :          3 :     db.commit();
     135 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("cello"), "hello");
     136                 :          3 :     db.remove_spelling("hello", 2);
     137 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("hell"), "cell");
     138                 :            :     // Test "over-removing".
     139                 :          3 :     db.remove_spelling("cell", 6);
     140 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("cell"), "hello");
     141                 :          3 :     db.commit();
     142 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("cell"), "hello");
     143                 :          3 :     db.remove_spelling("hello");
     144 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("cell"), "");
     145                 :            : 
     146                 :            :     // Test removing words not in the table.
     147                 :          3 :     db.remove_spelling("nonsuch");
     148                 :          3 :     db.remove_spelling("zzzzzzzzz", 1000000);
     149                 :          3 :     db.remove_spelling("aarvark");
     150                 :            : 
     151                 :            :     // Try removing word which was present but no longer is.
     152                 :          3 :     db.remove_spelling("hello");
     153                 :          3 :     db.commit();
     154                 :          3 :     db.remove_spelling("hello");
     155                 :            : 
     156                 :          3 :     return true;
     157                 :            : }
     158                 :            : 
     159                 :            : // Test spelling correction for Unicode.
     160                 :          3 : DEFINE_TESTCASE(spell2, spelling) {
     161                 :          3 :     Xapian::WritableDatabase db = get_writable_database();
     162                 :            : 
     163                 :            :     // Check that a UTF-8 sequence counts as a single character.
     164                 :          3 :     db.add_spelling("h\xc3\xb6hle");
     165                 :          3 :     db.add_spelling("ascii");
     166 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("hohle", 1), "h\xc3\xb6hle");
     167 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("hhle", 1), "h\xc3\xb6hle");
     168 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("\xf0\xa8\xa8\x8f\xc3\xb6le", 2), "h\xc3\xb6hle");
     169 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("hh\xc3\xb6l"), "h\xc3\xb6hle");
     170 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("as\xc3\xb6\xc3\xb7i"), "ascii");
     171 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("asc\xc3\xb6i\xc3\xb7i"), "ascii");
     172                 :          3 :     db.commit();
     173                 :          3 :     Xapian::Database dbr(get_writable_database_as_database());
     174 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(dbr.get_spelling_suggestion("hohle", 1), "h\xc3\xb6hle");
     175 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(dbr.get_spelling_suggestion("hhle", 1), "h\xc3\xb6hle");
     176 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(dbr.get_spelling_suggestion("\xf0\xa8\xa8\x8f\xc3\xb6le", 2), "h\xc3\xb6hle");
     177 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(dbr.get_spelling_suggestion("hh\xc3\xb6l"), "h\xc3\xb6hle");
     178 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(dbr.get_spelling_suggestion("as\xc3\xb6\xc3\xb7i"), "ascii");
     179 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(dbr.get_spelling_suggestion("asc\xc3\xb6i\xc3\xb7i"), "ascii");
     180                 :            : 
     181                 :          3 :     return true;
     182                 :            : }
     183                 :            : 
     184                 :            : // Test spelling correction with multi databases
     185                 :          3 : DEFINE_TESTCASE(spell3, spelling) {
     186                 :          3 :     Xapian::WritableDatabase db1 = get_writable_database();
     187                 :            :     // We can't just call get_writable_database() since it would delete db1
     188                 :            :     // which doesn't work at all under __WIN32__ and will go wrong elsewhere if
     189                 :            :     // changes to db1 are committed.
     190                 :          3 :     Xapian::WritableDatabase db2 = get_named_writable_database("spell3", "");
     191                 :            : 
     192                 :          3 :     db1.add_spelling("hello");
     193                 :          3 :     db1.add_spelling("cell", 2);
     194                 :          3 :     db2.add_spelling("hello", 2);
     195                 :          3 :     db2.add_spelling("helo");
     196                 :            : 
     197                 :          3 :     Xapian::Database db;
     198                 :          3 :     db.add_database(db1);
     199                 :          3 :     db.add_database(db2);
     200                 :            : 
     201 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("hello"), "");
     202 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("hell"), "hello");
     203 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db1.get_spelling_suggestion("hell"), "cell");
     204 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db2.get_spelling_suggestion("hell"), "hello");
     205                 :            : 
     206                 :            : 
     207                 :            :     // Test spelling iterator
     208                 :          3 :     Xapian::TermIterator i(db1.spellings_begin());
     209 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(*i, "cell");
     210 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(i.get_termfreq(), 2);
     211                 :          3 :     ++i;
     212 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(*i, "hello");
     213 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(i.get_termfreq(), 1);
     214                 :          3 :     ++i;
     215 [ -  + ][ #  # ]:          3 :     TEST(i == db1.spellings_end());
     216                 :            : 
     217                 :          3 :     i = db2.spellings_begin();
     218 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(*i, "hello");
     219 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(i.get_termfreq(), 2);
     220                 :          3 :     ++i;
     221 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(*i, "helo");
     222 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(i.get_termfreq(), 1);
     223                 :          3 :     ++i;
     224 [ -  + ][ #  # ]:          3 :     TEST(i == db2.spellings_end());
     225                 :            : 
     226                 :          3 :     i = db.spellings_begin();
     227 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(*i, "cell");
     228 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(i.get_termfreq(), 2);
     229                 :          3 :     ++i;
     230 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(*i, "hello");
     231 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(i.get_termfreq(), 3);
     232                 :          3 :     ++i;
     233 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(*i, "helo");
     234 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(i.get_termfreq(), 1);
     235                 :          3 :     ++i;
     236 [ -  + ][ #  # ]:          3 :     TEST(i == db.spellings_end());
     237                 :            : 
     238                 :          3 :     return true;
     239                 :            : }
     240                 :            : 
     241                 :            : // Regression test - check that appending works correctly.
     242                 :          3 : DEFINE_TESTCASE(spell4, spelling) {
     243                 :          3 :     Xapian::WritableDatabase db = get_writable_database();
     244                 :            : 
     245                 :          3 :     db.add_spelling("check");
     246                 :          3 :     db.add_spelling("pecks", 2);
     247                 :          3 :     db.commit();
     248                 :          3 :     db.add_spelling("becky");
     249                 :          3 :     db.commit();
     250                 :            : 
     251 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("jeck", 2), "pecks");
     252                 :            : 
     253                 :          3 :     return true;
     254                 :            : }
     255                 :            : 
     256                 :            : // Regression test - used to segfault with some input values.
     257                 :          3 : DEFINE_TESTCASE(spell5, spelling) {
     258                 :          3 :     const char * target = "\xe4\xb8\x80\xe4\xba\x9b";
     259                 :            : 
     260                 :          3 :     Xapian::WritableDatabase db = get_writable_database();
     261                 :          3 :     db.add_spelling(target);
     262                 :          3 :     db.commit();
     263                 :            : 
     264                 :          3 :     string s = db.get_spelling_suggestion("\xe4\xb8\x8d", 3);
     265   [ -  +  #  # ]:          3 :     TEST_EQUAL(s, target);
     266                 :            : 
     267                 :          3 :     return true;
     268                 :            : }
     269                 :            : 
     270                 :            : // Test basic spelling correction features.
     271                 :          3 : DEFINE_TESTCASE(spell6, spelling) {
     272                 :          3 :     Xapian::WritableDatabase db = get_writable_database();
     273                 :            : 
     274                 :            :     // Check that the more frequent term is chosen.
     275                 :          3 :     db.add_spelling("hello", 2);
     276                 :          3 :     db.add_spelling("sell", 3);
     277 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("hell"), "sell");
     278                 :          3 :     db.commit();
     279                 :          3 :     Xapian::Database dbr(get_writable_database_as_database());
     280 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("hell"), "sell");
     281 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(dbr.get_spelling_suggestion("hell"), "sell");
     282                 :            : 
     283                 :          3 :     return true;
     284                 :            : }
     285                 :            : 
     286                 :            : // Test suggestions when there's an exact match.
     287                 :          3 : DEFINE_TESTCASE(spell7, spelling) {
     288                 :          3 :     Xapian::WritableDatabase db = get_writable_database();
     289                 :            : 
     290                 :            :     // Check that the more frequent term is chosen.
     291                 :          3 :     db.add_spelling("word", 57);
     292                 :          3 :     db.add_spelling("wrod", 3);
     293                 :          3 :     db.add_spelling("sword", 56);
     294                 :          3 :     db.add_spelling("words", 57);
     295                 :          3 :     db.add_spelling("ward", 58);
     296                 :          3 :     db.commit();
     297 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("ward"), "");
     298 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("words"), "word");
     299 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("sword"), "word");
     300 [ -  + ][ #  # ]:          3 :     TEST_EQUAL(db.get_spelling_suggestion("wrod"), "word");
     301                 :            : 
     302                 :          3 :     return true;
     303                 :            : }

Generated by: LCOV version 1.8