LCOV - code coverage report
Current view: top level - backends/chert - chert_termlisttable.cc (source / functions) Hit Total Coverage
Test: Test Coverage for xapian-core r Lines: 33 33 100.0 %
Date: 2011-08-21 Functions: 1 1 100.0 %
Branches: 11 12 91.7 %

           Branch data     Line data    Source code
       1                 :            : /** @file chert_termlisttable.cc
       2                 :            :  * @brief Subclass of ChertTable which holds termlists.
       3                 :            :  */
       4                 :            : /* Copyright (C) 2007,2008 Olly Betts
       5                 :            :  *
       6                 :            :  * This program is free software; you can redistribute it and/or modify
       7                 :            :  * it under the terms of the GNU General Public License as published by
       8                 :            :  * the Free Software Foundation; either version 2 of the License, or
       9                 :            :  * (at your option) any later version.
      10                 :            :  *
      11                 :            :  * This program is distributed in the hope that it will be useful,
      12                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      13                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14                 :            :  * GNU General Public License for more details.
      15                 :            :  *
      16                 :            :  * You should have received a copy of the GNU General Public License
      17                 :            :  * along with this program; if not, write to the Free Software
      18                 :            :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
      19                 :            :  */
      20                 :            : 
      21                 :            : #include <config.h>
      22                 :            : 
      23                 :            : #include "chert_termlisttable.h"
      24                 :            : 
      25                 :            : #include <xapian/document.h>
      26                 :            : #include <xapian/error.h>
      27                 :            : #include <xapian/termiterator.h>
      28                 :            : 
      29                 :            : #include "debuglog.h"
      30                 :            : #include "omassert.h"
      31                 :            : #include "pack.h"
      32                 :            : #include "stringutils.h"
      33                 :            : #include "utils.h"
      34                 :            : 
      35                 :            : #include <string>
      36                 :            : 
      37                 :            : using namespace std;
      38                 :            : 
      39                 :            : void
      40                 :      77389 : ChertTermListTable::set_termlist(Xapian::docid did,
      41                 :            :                                  const Xapian::Document & doc,
      42                 :            :                                  chert_doclen_t doclen)
      43                 :            : {
      44                 :            :     LOGCALL_VOID(DB, "ChertTermListTable::set_termlist", did | doc | doclen);
      45                 :            : 
      46                 :      77389 :     string tag;
      47                 :      77389 :     pack_uint(tag, doclen);
      48                 :            : 
      49                 :      77389 :     Xapian::doccount termlist_size = doc.termlist_count();
      50         [ +  + ]:      77389 :     if (termlist_size == 0) {
      51                 :            :         // doclen is sum(wdf) so should be zero if there are no terms.
      52                 :            :         Assert(doclen == 0);
      53                 :            :         Assert(doc.termlist_begin() == doc.termlist_end());
      54                 :      42295 :         add(make_key(did), string());
      55                 :            :         return;
      56                 :            :     }
      57                 :            : 
      58                 :      35094 :     Xapian::TermIterator t = doc.termlist_begin();
      59         [ +  - ]:      35094 :     if (t != doc.termlist_end()) {
      60                 :      35094 :         pack_uint(tag, termlist_size);
      61                 :      35094 :         string prev_term = *t;
      62                 :            : 
      63                 :      35094 :         tag += prev_term.size();
      64                 :      35094 :         tag += prev_term;
      65                 :      35094 :         pack_uint(tag, t.get_wdf());
      66                 :      35094 :         --termlist_size;
      67                 :            : 
      68         [ +  + ]:     777096 :         while (++t != doc.termlist_end()) {
      69                 :     742002 :             const string & term = *t;
      70                 :            :             // If there's a shared prefix with the previous term, we don't
      71                 :            :             // store it explicitly, but just store the length of the shared
      72                 :            :             // prefix.  In general, this is a big win.
      73                 :     742002 :             size_t reuse = common_prefix_length(prev_term, term);
      74                 :            : 
      75                 :            :             // reuse must be <= prev_term.size(), and we know that value while
      76                 :            :             // decoding.  So if the wdf is small enough that we can multiply it
      77                 :            :             // by (prev_term.size() + 1), add reuse and fit the result in a
      78                 :            :             // byte, then we can pack reuse and the wdf into a single byte and
      79                 :            :             // save ourselves a byte.  We actually need to add one to the wdf
      80                 :            :             // before multiplying so that a wdf of 0 can be detected by the
      81                 :            :             // decoder.
      82                 :     742002 :             size_t packed = 0;
      83                 :     742002 :             Xapian::termcount wdf = t.get_wdf();
      84                 :            :             // If wdf >= 128, then we aren't going to be able to pack it in so
      85                 :            :             // don't even try to avoid the calculation overflowing and making
      86                 :            :             // us think we can.
      87         [ +  + ]:     742002 :             if (wdf < 127)
      88                 :     741996 :                 packed = (wdf + 1) * (prev_term.size() + 1) + reuse;
      89                 :            : 
      90 [ +  + ][ +  + ]:    1483995 :             if (packed && packed < 256) {
      91                 :            :                 // We can pack the wdf into the same byte.
      92                 :     741993 :                 tag += char(packed);
      93                 :     741993 :                 tag += char(term.size() - reuse);
      94                 :     741993 :                 tag.append(term.data() + reuse, term.size() - reuse);
      95                 :            :             } else {
      96                 :          9 :                 tag += char(reuse);
      97                 :          9 :                 tag += char(term.size() - reuse);
      98                 :          9 :                 tag.append(term.data() + reuse, term.size() - reuse);
      99                 :            :                 // FIXME: pack wdf after reuse next time we rejig the format
     100                 :            :                 // incompatibly.
     101                 :          9 :                 pack_uint(tag, wdf);
     102                 :            :             }
     103                 :            : 
     104                 :     742002 :             prev_term = *t;
     105                 :     742002 :             --termlist_size;
     106                 :      35094 :         }
     107                 :            :     }
     108                 :            :     Assert(termlist_size == 0);
     109                 :      77389 :     add(make_key(did), tag);
     110                 :            : }

Generated by: LCOV version 1.8