LCOV - code coverage report
Current view: top level - matcher - collapser.h (source / functions) Hit Total Coverage
Test: Test Coverage for xapian-core r Lines: 14 15 93.3 %
Date: 2011-08-21 Functions: 11 12 91.7 %
Branches: 0 0 -

           Branch data     Line data    Source code
       1                 :            : /** @file collapser.h
       2                 :            :  * @brief Collapse documents with the same collapse key during the match.
       3                 :            :  */
       4                 :            : /* Copyright (C) 2009 Olly Betts
       5                 :            :  *
       6                 :            :  * This program is free software; you can redistribute it and/or
       7                 :            :  * modify it under the terms of the GNU General Public License as
       8                 :            :  * published by the Free Software Foundation; either version 2 of the
       9                 :            :  * License, or (at your option) any later version.
      10                 :            :  *
      11                 :            :  * This program is distributed in the hope that it will be useful,
      12                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      13                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14                 :            :  * GNU General Public License for more details.
      15                 :            :  *
      16                 :            :  * You should have received a copy of the GNU General Public License
      17                 :            :  * along with this program; if not, write to the Free Software
      18                 :            :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
      19                 :            :  */
      20                 :            : 
      21                 :            : #ifndef XAPIAN_INCLUDED_COLLAPSER_H
      22                 :            : #define XAPIAN_INCLUDED_COLLAPSER_H
      23                 :            : 
      24                 :            : #include "document.h"
      25                 :            : #include "msetcmp.h"
      26                 :            : #include "omenquireinternal.h"
      27                 :            : #include "postlist.h"
      28                 :            : 
      29                 :            : #include <map>
      30                 :            : 
      31                 :            : /// Enumeration reporting how a document was handled by the Collapser.
      32                 :            : typedef enum {
      33                 :            :     EMPTY,
      34                 :            :     ADDED,
      35                 :            :     REJECTED,
      36                 :            :     REPLACED
      37                 :            : } collapse_result;
      38                 :            : 
      39                 :            : /// Class tracking information for a given value of the collapse key.
      40                 :      27944 : class CollapseData {
      41                 :            :     /** Currently kept MSet entries for this value of the collapse key.
      42                 :            :      *
      43                 :            :      *  If collapse_max > 1, then this is a min-heap once items.size()
      44                 :            :      *  reaches collapse_max.
      45                 :            :      *
      46                 :            :      *  FIXME: We expect collapse_max to be small, so perhaps we should
      47                 :            :      *  preallocate space for that many entries and/or allocate space in
      48                 :            :      *  larger blocks to divvy up?
      49                 :            :      */
      50                 :            :     vector<Xapian::Internal::MSetItem> items;
      51                 :            : 
      52                 :            :     /// The highest weight of a document we've rejected.
      53                 :            :     Xapian::weight next_best_weight;
      54                 :            : 
      55                 :            :     /// The number of documents we've rejected.
      56                 :            :     Xapian::doccount collapse_count;
      57                 :            : 
      58                 :            :   public:
      59                 :            :     /// Construct with the given MSetItem @a item.
      60                 :       3992 :     CollapseData(const Xapian::Internal::MSetItem & item)
      61                 :       3992 :         : items(1, item), next_best_weight(0), collapse_count(0) {
      62                 :       3992 :         items[0].collapse_key = string();
      63                 :       3992 :     }
      64                 :            : 
      65                 :            :     /** Handle a new MSetItem with this collapse key value.
      66                 :            :      *
      67                 :            :      *  @param item             The new item.
      68                 :            :      *  @param collapse_max     Max no. of items for each collapse key value.
      69                 :            :      *  @param mcmp             MSetItem comparison functor.
      70                 :            :      *  @param[out] old_item    Replaced item (when REPLACED is returned).
      71                 :            :      *
      72                 :            :      *  @return How @a item was handled: ADDED, REJECTED or REPLACED.
      73                 :            :      */
      74                 :            :     collapse_result add_item(const Xapian::Internal::MSetItem & item,
      75                 :            :                              Xapian::doccount collapse_max,
      76                 :            :                              const MSetCmp & mcmp,
      77                 :            :                              Xapian::Internal::MSetItem & old_item);
      78                 :            : 
      79                 :            :     /// The highest weight of a document we've rejected.
      80                 :          0 :     Xapian::weight get_next_best_weight() const { return next_best_weight; }
      81                 :            : 
      82                 :            :     /// The number of documents we've rejected.
      83                 :       5377 :     Xapian::doccount get_collapse_count() const { return collapse_count; }
      84                 :            : };
      85                 :            : 
      86                 :            : /// The Collapser class tracks collapse keys and the documents they match.
      87                 :     173480 : class Collapser {
      88                 :            :     /// Map from collapse key values to the items we're keeping for them.
      89                 :            :     std::map<std::string, CollapseData> table;
      90                 :            : 
      91                 :            :     /// How many items we're currently keeping in @a table.
      92                 :            :     Xapian::doccount entry_count;
      93                 :            : 
      94                 :            :     /** How many documents have we seen without a collapse key?
      95                 :            :      *
      96                 :            :      *  We use this statistic to improve matches_lower_bound.
      97                 :            :      */
      98                 :            :     Xapian::doccount no_collapse_key;
      99                 :            : 
     100                 :            :     /** How many documents with duplicate collapse keys we have ignored.
     101                 :            :      *
     102                 :            :      *  We use this statistic to improve matches_estimated (by considering
     103                 :            :      *  the rate of collapsing) and matches_upper_bound.
     104                 :            :      */
     105                 :            :     Xapian::doccount dups_ignored;
     106                 :            : 
     107                 :            :     /** How many documents we've considered for collapsing.
     108                 :            :      *
     109                 :            :      *  We use this statistic to improve matches_estimated (by considering
     110                 :            :      *  the rate of collapsing).
     111                 :            :      */
     112                 :            :     Xapian::doccount docs_considered;
     113                 :            : 
     114                 :            :     /** The value slot we're getting collapse keys from. */
     115                 :            :     Xapian::valueno slot;
     116                 :            : 
     117                 :            :     /** The maximum number of items to keep for each collapse key value. */
     118                 :            :     Xapian::doccount collapse_max;
     119                 :            : 
     120                 :            :   public:
     121                 :            :     /// Replaced item when REPLACED is returned by @a collapse().
     122                 :            :     Xapian::Internal::MSetItem old_item;
     123                 :            : 
     124                 :     173480 :     Collapser(Xapian::valueno slot_, Xapian::doccount collapse_max_)
     125                 :            :         : entry_count(0), no_collapse_key(0), dups_ignored(0),
     126                 :            :           docs_considered(0), slot(slot_), collapse_max(collapse_max_),
     127                 :     173480 :           old_item(0, 0) { }
     128                 :            : 
     129                 :            :     /// Return true if collapsing is active for this match.
     130                 :   48430344 :     operator bool() const { return collapse_max != 0; }
     131                 :            : 
     132                 :            :     /** Handle a new MSetItem.
     133                 :            :      *
     134                 :            :      *  @param item             The new item.
     135                 :            :      *  @param postlist         PostList to try to get collapse key from
     136                 :            :      *                          (this happens for a remote match).
     137                 :            :      *  @param doc              Document for getting values.
     138                 :            :      *  @param mcmp             MSetItem comparison functor.
     139                 :            :      *
     140                 :            :      *  @return How @a item was handled: EMPTY, ADDED, REJECTED or REPLACED.
     141                 :            :      */
     142                 :            :     collapse_result process(Xapian::Internal::MSetItem & item,
     143                 :            :                             PostList * postlist,
     144                 :            :                             Xapian::Document::Internal & vsdoc,
     145                 :            :                             const MSetCmp & mcmp);
     146                 :            : 
     147                 :            :     Xapian::doccount get_collapse_count(const std::string & collapse_key,
     148                 :            :                                         int percent_cutoff,
     149                 :            :                                         Xapian::weight min_weight) const;
     150                 :            : 
     151                 :        638 :     Xapian::doccount get_docs_considered() const { return docs_considered; }
     152                 :            : 
     153                 :        638 :     Xapian::doccount get_dups_ignored() const { return dups_ignored; }
     154                 :            : 
     155                 :       1092 :     Xapian::doccount entries() const { return entry_count; }
     156                 :            : 
     157                 :            :     Xapian::doccount get_matches_lower_bound() const;
     158                 :            : 
     159                 :       1132 :     bool empty() const { return table.empty(); }
     160                 :            : };
     161                 :            : 
     162                 :            : #endif // XAPIAN_INCLUDED_COLLAPSER_H

Generated by: LCOV version 1.8