LCOV - code coverage report
Current view: top level - matcher - collapser.cc (source / functions) Hit Total Coverage
Test: Test Coverage for xapian-core r Lines: 47 50 94.0 %
Date: 2011-08-21 Functions: 4 4 100.0 %
Branches: 21 26 80.8 %

           Branch data     Line data    Source code
       1                 :            : /** @file collapser.cc
       2                 :            :  * @brief Collapse documents with the same collapse key during the match.
       3                 :            :  */
       4                 :            : /* Copyright (C) 2009 Olly Betts
       5                 :            :  *
       6                 :            :  * This program is free software; you can redistribute it and/or
       7                 :            :  * modify it under the terms of the GNU General Public License as
       8                 :            :  * published by the Free Software Foundation; either version 2 of the
       9                 :            :  * License, or (at your option) any later version.
      10                 :            :  *
      11                 :            :  * This program is distributed in the hope that it will be useful,
      12                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      13                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14                 :            :  * GNU General Public License for more details.
      15                 :            :  *
      16                 :            :  * You should have received a copy of the GNU General Public License
      17                 :            :  * along with this program; if not, write to the Free Software
      18                 :            :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
      19                 :            :  */
      20                 :            : 
      21                 :            : #include <config.h>
      22                 :            : 
      23                 :            : #include "collapser.h"
      24                 :            : 
      25                 :            : #include "omassert.h"
      26                 :            : 
      27                 :            : #include <algorithm>
      28                 :            : 
      29                 :            : using namespace std;
      30                 :            : 
      31                 :            : collapse_result
      32                 :       7301 : CollapseData::add_item(const Xapian::Internal::MSetItem & item,
      33                 :            :                        Xapian::doccount collapse_max, const MSetCmp & mcmp,
      34                 :            :                        Xapian::Internal::MSetItem & old_item)
      35                 :            : {
      36         [ +  + ]:       7301 :     if (items.size() < collapse_max) {
      37                 :       1768 :         items.push_back(item);
      38                 :       1768 :         items.back().collapse_key = string();
      39                 :       1768 :         return ADDED;
      40                 :            :     }
      41                 :            : 
      42                 :            :     // We already have collapse_max items better than item so we need to
      43                 :            :     // eliminate the lowest ranked.
      44 [ +  + ][ +  + ]:       5533 :     if (collapse_count == 0 && collapse_max != 1) {
      45                 :            :         // Be lazy about calling make_heap - if we see <= collapse_max
      46                 :            :         // items with a particular collapse key, we never need to use
      47                 :            :         // the heap.
      48                 :        234 :         make_heap(items.begin(), items.end(), mcmp);
      49                 :            :     }
      50                 :       5533 :     ++collapse_count;
      51                 :            : 
      52         [ +  + ]:       5533 :     if (mcmp(items.front(), item)) {
      53                 :            :         // If this is the "best runner-up", update next_best_weight.
      54         [ +  + ]:       4777 :         if (item.wt > next_best_weight) next_best_weight = item.wt;
      55                 :       4777 :         return REJECTED;
      56                 :            :     }
      57                 :            : 
      58                 :        756 :     next_best_weight = items.front().wt;
      59                 :            : 
      60                 :        756 :     items.push_back(item);
      61                 :        756 :     push_heap(items.begin(), items.end(), mcmp);
      62                 :        756 :     pop_heap(items.begin(), items.end(), mcmp);
      63                 :        756 :     swap(old_item, items.back());
      64                 :        756 :     items.pop_back();
      65                 :            : 
      66                 :       7301 :     return REPLACED;
      67                 :            : }
      68                 :            : 
      69                 :            : collapse_result
      70                 :      11422 : Collapser::process(Xapian::Internal::MSetItem & item,
      71                 :            :                    PostList * postlist,
      72                 :            :                    Xapian::Document::Internal & vsdoc,
      73                 :            :                    const MSetCmp & mcmp)
      74                 :            : {
      75                 :      11422 :     ++docs_considered;
      76                 :            :     // The postlist will supply the collapse key for a remote match.
      77                 :      11422 :     const string * key_ptr = postlist->get_collapse_key();
      78         [ -  + ]:      11422 :     if (key_ptr) {
      79                 :          0 :         item.collapse_key = *key_ptr;
      80                 :            :     } else {
      81                 :            :         // Otherwise use the Document object to get the value.
      82                 :      11422 :         item.collapse_key = vsdoc.get_value(slot);
      83                 :            :     }
      84                 :            : 
      85         [ +  + ]:      11422 :     if (item.collapse_key.empty()) {
      86                 :            :         // We don't collapse items with an empty collapse key.
      87                 :        129 :         ++no_collapse_key;
      88                 :        129 :         return EMPTY;
      89                 :            :     }
      90                 :            : 
      91                 :      11293 :     map<string, CollapseData>::iterator oldkey;
      92                 :      11293 :     oldkey = table.find(item.collapse_key);
      93         [ +  + ]:      11293 :     if (oldkey == table.end()) {
      94                 :            :         // We've not seen this collapse key before.
      95                 :       3992 :         table.insert(make_pair(item.collapse_key, CollapseData(item)));
      96                 :       3992 :         ++entry_count;
      97                 :       3992 :         return ADDED;
      98                 :            :     }
      99                 :            : 
     100                 :            :     collapse_result res;
     101                 :       7301 :     CollapseData & collapse_data = oldkey->second;
     102                 :       7301 :     res = collapse_data.add_item(item, collapse_max, mcmp, old_item);
     103         [ +  + ]:       7301 :     if (res == ADDED) {
     104                 :       1768 :         ++entry_count;
     105 [ +  + ][ +  - ]:       5533 :     } else if (res == REJECTED || res == REPLACED) {
     106                 :       5533 :         ++dups_ignored;
     107                 :            :     }
     108                 :      11422 :     return res;
     109                 :            : }
     110                 :            : 
     111                 :            : Xapian::doccount
     112                 :       5377 : Collapser::get_collapse_count(const string & collapse_key, int percent_cutoff,
     113                 :            :                               Xapian::weight min_weight) const
     114                 :            : {
     115                 :       5377 :     map<string, CollapseData>::const_iterator key = table.find(collapse_key);
     116                 :            :     // If a collapse key is present in the MSet, it must be in our table.
     117                 :            :     Assert(key != table.end());
     118                 :            : 
     119         [ +  - ]:       5377 :     if (!percent_cutoff) {
     120                 :            :         // The recorded collapse_count is correct.
     121                 :       5377 :         return key->second.get_collapse_count();
     122                 :            :     }
     123                 :            : 
     124         [ #  # ]:          0 :     if (key->second.get_next_best_weight() < min_weight) {
     125                 :            :         // We know for certain that all collapsed items would have failed the
     126                 :            :         // percentage cutoff, so collapse_count should be 0.
     127                 :          0 :         return 0;
     128                 :            :     }
     129                 :            : 
     130                 :            :     // We know that the highest weighted collapsed item would have survived the
     131                 :            :     // percentage cutoff, but it's possible all other collapsed items would
     132                 :            :     // have failed it.  Since collapse_count is a lower bound, we must set it
     133                 :            :     // to 1.
     134                 :       5377 :     return 1;
     135                 :            : }
     136                 :            : 
     137                 :            : Xapian::doccount
     138                 :        638 : Collapser::get_matches_lower_bound() const
     139                 :            : {
     140                 :            :     // We've seen this many matches, but all other documents matching the query
     141                 :            :     // could be collapsed onto values already seen.
     142                 :        638 :     Xapian::doccount matches_lower_bound = no_collapse_key + entry_count;
     143                 :        638 :     return matches_lower_bound;
     144                 :            :     // FIXME: *Unless* we haven't achieved collapse_max occurrences of *any*
     145                 :            :     // collapse key value, so we can increase matches_lower_bound like the
     146                 :            :     // code below, but it isn't quite that simple - there may not be that
     147                 :            :     // many documents.
     148                 :            : #if 0
     149                 :            :     Xapian::doccount max_kept = 0;
     150                 :            :     map<string, CollapseData>::const_iterator i;
     151                 :            :     for (i = table.begin(); i != table.end(); ++i) {
     152                 :            :         if (i->second.get_collapse_count() > max_kept) {
     153                 :            :             max_kept = i->second.get_collapse_count();
     154                 :            :             if (max_kept == collapse_max) {
     155                 :            :                 return matches_lower_bound;
     156                 :            :             }
     157                 :            :         }
     158                 :            :     }
     159                 :            :     return matches_lower_bound + (collapse_max - max_kept);
     160                 :            : #endif
     161                 :            : }

Generated by: LCOV version 1.8