LCOV - code coverage report
Current view: top level - backends/flint - flint_termlisttable.cc (source / functions) Hit Total Coverage
Test: Test Coverage for xapian-core r Lines: 43 47 91.5 %
Date: 2011-08-21 Functions: 2 2 100.0 %
Branches: 18 22 81.8 %

           Branch data     Line data    Source code
       1                 :            : /** @file flint_termlisttable.cc
       2                 :            :  * @brief Subclass of FlintTable which holds termlists.
       3                 :            :  */
       4                 :            : /* Copyright (C) 2007,2008 Olly Betts
       5                 :            :  *
       6                 :            :  * This program is free software; you can redistribute it and/or modify
       7                 :            :  * it under the terms of the GNU General Public License as published by
       8                 :            :  * the Free Software Foundation; either version 2 of the License, or
       9                 :            :  * (at your option) any later version.
      10                 :            :  *
      11                 :            :  * This program is distributed in the hope that it will be useful,
      12                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      13                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14                 :            :  * GNU General Public License for more details.
      15                 :            :  *
      16                 :            :  * You should have received a copy of the GNU General Public License
      17                 :            :  * along with this program; if not, write to the Free Software
      18                 :            :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
      19                 :            :  */
      20                 :            : 
      21                 :            : #include <config.h>
      22                 :            : 
      23                 :            : #include <xapian/document.h>
      24                 :            : #include <xapian/error.h>
      25                 :            : #include <xapian/termiterator.h>
      26                 :            : 
      27                 :            : #include "flint_termlisttable.h"
      28                 :            : #include "flint_utils.h"
      29                 :            : #include "debuglog.h"
      30                 :            : #include "omassert.h"
      31                 :            : #include "str.h"
      32                 :            : #include "stringutils.h"
      33                 :            : 
      34                 :            : #include <string>
      35                 :            : 
      36                 :            : using namespace std;
      37                 :            : 
      38                 :            : void
      39                 :      47367 : FlintTermListTable::set_termlist(Xapian::docid did,
      40                 :            :                                  const Xapian::Document & doc,
      41                 :            :                                  flint_doclen_t doclen)
      42                 :            : {
      43                 :            :     LOGCALL_VOID(DB, "FlintTermListTable::set_termlist", did | doc | doclen);
      44                 :            : 
      45                 :      47367 :     Xapian::doccount termlist_size = doc.termlist_count();
      46         [ +  + ]:      47367 :     if (termlist_size == 0) {
      47                 :            :         // doclen is sum(wdf) so should be zero if there are no terms.
      48                 :            :         Assert(doclen == 0);
      49                 :            :         Assert(doc.termlist_begin() == doc.termlist_end());
      50                 :      12275 :         add(flint_docid_to_key(did), string());
      51                 :      12275 :         return;
      52                 :            :     }
      53                 :            : 
      54                 :      35092 :     string tag = F_pack_uint(doclen);
      55                 :            : 
      56                 :      35092 :     Xapian::TermIterator t = doc.termlist_begin();
      57         [ +  - ]:      35092 :     if (t != doc.termlist_end()) {
      58                 :      35092 :         tag += F_pack_uint(termlist_size);
      59                 :      35092 :         string prev_term = *t;
      60                 :            : 
      61                 :            :         // Previous database versions encoded a boolean here, which was
      62                 :            :         // always false (and F_pack_bool() encodes false as a '0').  We can
      63                 :            :         // just omit this and successfully read old and new termlists
      64                 :            :         // except in the case where the next byte is a '0' - in this case
      65                 :            :         // we need keep the '0' so that the decoder can just skip any '0'
      66                 :            :         // it sees in this position (this shouldn't be a common case - 48
      67                 :            :         // character terms aren't very common, and the first term
      68                 :            :         // alphabetically is likely to be shorter than average).
      69         [ +  + ]:      35092 :         if (prev_term.size() == '0') tag += '0';
      70                 :            : 
      71                 :      35092 :         tag += char(prev_term.size());
      72                 :      35092 :         tag += prev_term;
      73                 :      35092 :         tag += F_pack_uint(t.get_wdf());
      74                 :      35092 :         --termlist_size;
      75                 :            : 
      76         [ +  + ]:     777086 :         while (++t != doc.termlist_end()) {
      77                 :     741994 :             const string & term = *t;
      78                 :            :             // If there's a shared prefix with the previous term, we don't
      79                 :            :             // store it explicitly, but just store the length of the shared
      80                 :            :             // prefix.  In general, this is a big win.
      81                 :     741994 :             size_t reuse = common_prefix_length(prev_term, term);
      82                 :            : 
      83                 :            :             // reuse must be <= prev_term.size(), and we know that value while
      84                 :            :             // decoding.  So if the wdf is small enough that we can multiply it
      85                 :            :             // by (prev_term.size() + 1), add reuse and fit the result in a
      86                 :            :             // byte, then we can pack reuse and the wdf into a single byte and
      87                 :            :             // save ourselves a byte.  We actually need to add one to the wdf
      88                 :            :             // before multiplying so that a wdf of 0 can be detected by the
      89                 :            :             // decoder.
      90                 :     741994 :             size_t packed = 0;
      91                 :     741994 :             Xapian::termcount wdf = t.get_wdf();
      92                 :            :             // If wdf >= 128, then we aren't going to be able to pack it in so
      93                 :            :             // don't even try to avoid the calculation overflowing and making
      94                 :            :             // us think we can.
      95         [ +  + ]:     741994 :             if (wdf < 127)
      96                 :     741988 :                 packed = (wdf + 1) * (prev_term.size() + 1) + reuse;
      97                 :            : 
      98 [ +  + ][ +  + ]:    1483979 :             if (packed && packed < 256) {
      99                 :            :                 // We can pack the wdf into the same byte.
     100                 :     741985 :                 tag += char(packed);
     101                 :     741985 :                 tag += char(term.size() - reuse);
     102                 :     741985 :                 tag.append(term.data() + reuse, term.size() - reuse);
     103                 :            :             } else {
     104                 :          9 :                 tag += char(reuse);
     105                 :          9 :                 tag += char(term.size() - reuse);
     106                 :          9 :                 tag.append(term.data() + reuse, term.size() - reuse);
     107                 :            :                 // FIXME: pack wdf after reuse next time we rejig the format
     108                 :            :                 // incompatibly.
     109                 :          9 :                 tag += F_pack_uint(wdf);
     110                 :            :             }
     111                 :            : 
     112                 :     741994 :             prev_term = *t;
     113                 :     741994 :             --termlist_size;
     114                 :      35092 :         }
     115                 :            :     }
     116                 :            :     Assert(termlist_size == 0);
     117                 :      47367 :     add(flint_docid_to_key(did), tag);
     118                 :            : }
     119                 :            : 
     120                 :            : flint_doclen_t
     121                 :      26004 : FlintTermListTable::get_doclength(Xapian::docid did) const
     122                 :            : {
     123                 :            :     LOGCALL(DB, flint_doclen_t, "FlintTermListTable::get_doclength", did);
     124                 :            : 
     125                 :      26004 :     string tag;
     126         [ +  + ]:      26004 :     if (!get_exact_entry(flint_docid_to_key(did), tag))
     127                 :            :         throw Xapian::DocNotFoundError("No termlist found for document " +
     128                 :      15039 :                                        str(did));
     129                 :            : 
     130         [ +  + ]:      10965 :     if (tag.empty()) RETURN(0);
     131                 :            : 
     132                 :      10946 :     const char * pos = tag.data();
     133                 :      10946 :     const char * end = pos + tag.size();
     134                 :            : 
     135                 :            :     flint_doclen_t doclen;
     136         [ -  + ]:      10946 :     if (!F_unpack_uint(&pos, end, &doclen)) {
     137                 :            :         const char *msg;
     138         [ #  # ]:          0 :         if (pos == 0) {
     139                 :          0 :             msg = "Too little data for doclen in termlist";
     140                 :            :         } else {
     141                 :          0 :             msg = "Overflowed value for doclen in termlist";
     142                 :            :         }
     143                 :          0 :         throw Xapian::DatabaseCorruptError(msg);
     144                 :            :     }
     145                 :            : 
     146                 :      26004 :     RETURN(doclen);
     147                 :            : }

Generated by: LCOV version 1.8