LCOV - code coverage report
Current view: top level - matcher - multiandpostlist.h (source / functions) Hit Total Coverage
Test: Test Coverage for xapian-core r Lines: 39 42 92.9 %
Date: 2011-08-21 Functions: 7 7 100.0 %
Branches: 11 16 68.8 %

           Branch data     Line data    Source code
       1                 :            : /** @file multiandpostlist.h
       2                 :            :  * @brief N-way AND postlist
       3                 :            :  */
       4                 :            : /* Copyright (C) 2007,2009 Olly Betts
       5                 :            :  * Copyright (C) 2009 Lemur Consulting Ltd
       6                 :            :  *
       7                 :            :  * This program is free software; you can redistribute it and/or
       8                 :            :  * modify it under the terms of the GNU General Public License as
       9                 :            :  * published by the Free Software Foundation; either version 2 of the
      10                 :            :  * License, or (at your option) any later version.
      11                 :            :  *
      12                 :            :  * This program is distributed in the hope that it will be useful,
      13                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      15                 :            :  * GNU General Public License for more details.
      16                 :            :  *
      17                 :            :  * You should have received a copy of the GNU General Public License
      18                 :            :  * along with this program; if not, write to the Free Software
      19                 :            :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
      20                 :            :  */
      21                 :            : 
      22                 :            : #ifndef XAPIAN_INCLUDED_MULTIANDPOSTLIST_H
      23                 :            : #define XAPIAN_INCLUDED_MULTIANDPOSTLIST_H
      24                 :            : 
      25                 :            : #include "multimatch.h"
      26                 :            : #include "omassert.h"
      27                 :            : #include "postlist.h"
      28                 :            : 
      29                 :            : /// N-way AND postlist.
      30                 :            : class MultiAndPostList : public PostList {
      31                 :            :     /** Comparison functor which orders PostList* by ascending
      32                 :            :      *  get_termfreq_est(). */
      33                 :            :     struct ComparePostListTermFreqAscending {
      34                 :            :         /// Order by ascending get_termfreq_est().
      35                 :       3185 :         bool operator()(const PostList *a, const PostList *b) {
      36                 :       3185 :             return a->get_termfreq_est() < b->get_termfreq_est();
      37                 :            :         }
      38                 :            :     };
      39                 :            : 
      40                 :            :     /// Don't allow assignment.
      41                 :            :     void operator=(const MultiAndPostList &);
      42                 :            : 
      43                 :            :     /// Don't allow copying.
      44                 :            :     MultiAndPostList(const MultiAndPostList &);
      45                 :            : 
      46                 :            :     /// The current docid, or zero if we haven't started or are at_end.
      47                 :            :     Xapian::docid did;
      48                 :            : 
      49                 :            :     /// The number of sub-postlists.
      50                 :            :     size_t n_kids;
      51                 :            : 
      52                 :            :     /// Array of pointers to sub-postlists.
      53                 :            :     PostList ** plist;
      54                 :            : 
      55                 :            :     /// Array of maximum weights for the sub-postlists.
      56                 :            :     Xapian::weight * max_wt;
      57                 :            : 
      58                 :            :     /// Total maximum weight (== sum of max_wt values).
      59                 :            :     Xapian::weight max_total;
      60                 :            : 
      61                 :            :     /// The number of documents in the database.
      62                 :            :     Xapian::doccount db_size;
      63                 :            : 
      64                 :            :     /// Pointer to the matcher object, so we can report pruning.
      65                 :            :     MultiMatch *matcher;
      66                 :            : 
      67                 :            :     /// Calculate the new minimum weight for sub-postlist n.
      68                 :      16215 :     Xapian::weight new_min(Xapian::weight w_min, size_t n) {
      69                 :      16215 :         return w_min - (max_total - max_wt[n]);
      70                 :            :     }
      71                 :            : 
      72                 :            :     /// Call next on a sub-postlist n, and handle any pruning.
      73                 :       6555 :     void next_helper(size_t n, Xapian::weight w_min) {
      74                 :       6555 :         PostList * res = plist[n]->next(new_min(w_min, n));
      75         [ +  + ]:       6555 :         if (res) {
      76         [ +  - ]:          3 :             delete plist[n];
      77                 :          3 :             plist[n] = res;
      78                 :          3 :             matcher->recalc_maxweight();
      79                 :            :         }
      80                 :       6555 :     }
      81                 :            : 
      82                 :            :     /// Call skip_to on a sub-postlist n, and handle any pruning.
      83                 :        830 :     void skip_to_helper(size_t n, Xapian::docid did_min, Xapian::weight w_min) {
      84                 :        830 :         PostList * res = plist[n]->skip_to(did_min, new_min(w_min, n));
      85         [ +  + ]:        830 :         if (res) {
      86         [ +  - ]:         13 :             delete plist[n];
      87                 :         13 :             plist[n] = res;
      88                 :         13 :             matcher->recalc_maxweight();
      89                 :            :         }
      90                 :        830 :     }
      91                 :            : 
      92                 :            :     /// Call check on a sub-postlist n, and handle any pruning.
      93                 :       8830 :     void check_helper(size_t n, Xapian::docid did_min, Xapian::weight w_min,
      94                 :            :                       bool &valid) {
      95                 :       8830 :         PostList * res = plist[n]->check(did_min, new_min(w_min, n), valid);
      96         [ -  + ]:       8830 :         if (res) {
      97         [ #  # ]:          0 :             delete plist[n];
      98                 :          0 :             plist[n] = res;
      99                 :          0 :             matcher->recalc_maxweight();
     100                 :            :         }
     101                 :       8830 :     }
     102                 :            : 
     103                 :            :     /** Allocate plist and max_wt arrays of @a n_kids each.
     104                 :            :      *
     105                 :            :      *  @exceptions  std::bad_alloc.
     106                 :            :      */
     107                 :            :     void allocate_plist_and_max_wt();
     108                 :            : 
     109                 :            :     /// Advance the sublists to the next match.
     110                 :            :     PostList * find_next_match(Xapian::weight w_min);
     111                 :            : 
     112                 :            :   public:
     113                 :            :     /** Construct from 2 random-access iterators to a container of PostList*,
     114                 :            :      *  a pointer to the matcher, and the document collection size.
     115                 :            :      */
     116                 :            :     template <class RandomItor>
     117                 :       1545 :     MultiAndPostList(RandomItor pl_begin, RandomItor pl_end,
     118                 :            :                      MultiMatch * matcher_, Xapian::doccount db_size_)
     119                 :            :         : did(0), n_kids(pl_end - pl_begin), plist(NULL), max_wt(NULL),
     120                 :       1545 :           max_total(0), db_size(db_size_), matcher(matcher_)
     121                 :            :     {
     122                 :       1545 :         allocate_plist_and_max_wt();
     123                 :            : 
     124                 :            :         // Copy the postlists in ascending termfreq order, since it will
     125                 :            :         // be more efficient to look at the shorter lists first, and skip
     126                 :            :         // the longer lists based on those.
     127                 :       1545 :         std::partial_sort_copy(pl_begin, pl_end, plist, plist + n_kids,
     128                 :            :                                ComparePostListTermFreqAscending());
     129                 :       1545 :     }
     130                 :            : 
     131                 :            :     /** Construct as the decay product of an OrPostList or AndMaybePostList.
     132                 :            :      *
     133                 :            :      *  @parameter check_order  If true, then l and r may need swapping to
     134                 :            :      *                          ensure freq_est(l) >= freq_est(r).  This
     135                 :            :      *                          should not be necessary for an OrPostList.
     136                 :            :      */
     137                 :         75 :     MultiAndPostList(PostList *l, PostList *r,
     138                 :            :                      Xapian::weight lmax, Xapian::weight rmax,
     139                 :            :                      MultiMatch * matcher_, Xapian::doccount db_size_,
     140                 :            :                      bool check_order = false)
     141                 :            :         : did(0), n_kids(2), plist(NULL), max_wt(NULL),
     142                 :         75 :           max_total(lmax + rmax), db_size(db_size_), matcher(matcher_)
     143                 :            :     {
     144         [ +  + ]:         75 :         if (check_order) {
     145         [ +  + ]:          9 :             if (l->get_termfreq_est() < r->get_termfreq_est()) {
     146                 :          8 :                 std::swap(l, r);
     147                 :          8 :                 std::swap(lmax, rmax);
     148                 :            :             }
     149                 :            :         } else {
     150                 :            :             AssertRel(l->get_termfreq_est(),>=,r->get_termfreq_est());
     151                 :            :         }
     152                 :         75 :         allocate_plist_and_max_wt();
     153                 :            :         // Put the least frequent postlist first.
     154                 :         75 :         plist[0] = r;
     155                 :         75 :         plist[1] = l;
     156                 :         75 :         max_wt[0] = rmax;
     157                 :         75 :         max_wt[1] = lmax;
     158                 :         75 :     }
     159                 :            : 
     160                 :            :     ~MultiAndPostList();
     161                 :            : 
     162                 :            :     Xapian::doccount get_termfreq_min() const;
     163                 :            : 
     164                 :            :     Xapian::doccount get_termfreq_max() const;
     165                 :            : 
     166                 :            :     Xapian::doccount get_termfreq_est() const;
     167                 :            : 
     168                 :            :     TermFreqs get_termfreq_est_using_stats(
     169                 :            :         const Xapian::Weight::Internal & stats) const;
     170                 :            : 
     171                 :            :     Xapian::weight get_maxweight() const;
     172                 :            : 
     173                 :            :     Xapian::docid get_docid() const;
     174                 :            : 
     175                 :            :     Xapian::termcount get_doclength() const;
     176                 :            : 
     177                 :            :     Xapian::weight get_weight() const;
     178                 :            : 
     179                 :            :     bool at_end() const;
     180                 :            : 
     181                 :            :     Xapian::weight recalc_maxweight();
     182                 :            : 
     183                 :            :     Internal *next(Xapian::weight w_min);
     184                 :            : 
     185                 :            :     Internal *skip_to(Xapian::docid, Xapian::weight w_min);
     186                 :            : 
     187                 :            :     std::string get_description() const;
     188                 :            : 
     189                 :            :     /** get_wdf() for MultiAndPostlists returns the sum of the wdfs of the
     190                 :            :      *  sub postlists.
     191                 :            :      *
     192                 :            :      *  The wdf isn't really meaningful in many situations, but if the lists
     193                 :            :      *  are being combined as a synonym we want the sum of the wdfs, so we do
     194                 :            :      *  that in general.
     195                 :            :      */
     196                 :            :     Xapian::termcount get_wdf() const;
     197                 :            : 
     198                 :            :     Xapian::termcount count_matching_subqs() const;
     199                 :            : };
     200                 :            : 
     201                 :            : #endif // XAPIAN_INCLUDED_MULTIANDPOSTLIST_H

Generated by: LCOV version 1.8