Branch data Line data Source code
1 : : /* phrasepostlist.h: Return only items where terms are near or form a phrase
2 : : *
3 : : * ----START-LICENCE----
4 : : * Copyright 1999,2000,2001 BrightStation PLC
5 : : * Copyright 2003,2004,2005 Olly Betts
6 : : * Copyright 2009 Lemur Consulting Ltd
7 : : *
8 : : * This program is free software; you can redistribute it and/or
9 : : * modify it under the terms of the GNU General Public License as
10 : : * published by the Free Software Foundation; either version 2 of the
11 : : * License, or (at your option) any later version.
12 : : *
13 : : * This program is distributed in the hope that it will be useful,
14 : : * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 : : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 : : * GNU General Public License for more details.
17 : : *
18 : : * You should have received a copy of the GNU General Public License
19 : : * along with this program; if not, write to the Free Software
20 : : * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21 : : * USA
22 : : */
23 : :
24 : : #ifndef OM_HGUARD_PHRASEPOSTLIST_H
25 : : #define OM_HGUARD_PHRASEPOSTLIST_H
26 : :
27 : : #include "selectpostlist.h"
28 : : #include <vector>
29 : :
30 : : typedef Xapian::PositionIterator::Internal PositionList;
31 : :
32 : : /** a postlist comprising several postlists NEARed together.
33 : : *
34 : : * This postlist returns a posting if and only if it is in all of the
35 : : * sub-postlists and all the terms occur within a specified distance of
36 : : * each other somewhere in the document. The weight for a posting is the
37 : : * sum of the weights of the sub-postings.
38 : : */
39 [ + - ][ # # ]: 288 : class NearPostList : public SelectPostList {
40 : : private:
41 : : Xapian::termpos window;
42 : : std::vector<PostList *> terms;
43 : :
44 : : bool test_doc();
45 : : bool do_test(std::vector<PositionList *> &plists, Xapian::termcount i,
46 : : Xapian::termcount min, Xapian::termcount max);
47 : : public:
48 : : std::string get_description() const;
49 : : Xapian::termcount get_wdf() const;
50 : :
51 : 272 : Xapian::doccount get_termfreq_est() const
52 : : {
53 : : // No idea how to estimate this - FIXME
54 : 272 : return source->get_termfreq_est() / 2;
55 : : }
56 : :
57 : : TermFreqs get_termfreq_est_using_stats(
58 : : const Xapian::Weight::Internal & stats) const;
59 : :
60 : 288 : NearPostList(PostList *source_, Xapian::termpos window_,
61 : : std::vector<PostList *>::const_iterator &terms_begin_,
62 : : std::vector<PostList *>::const_iterator &terms_end_)
63 : 288 : : SelectPostList(source_), terms(terms_begin_, terms_end_)
64 : : {
65 : 288 : window = window_;
66 : 288 : }
67 : : };
68 : :
69 : : /** A postlist comprising several postlists PHRASEd together.
70 : : *
71 : : * This postlist returns a posting if and only if it is in all of the
72 : : * sub-postlists and all the terms occur IN THE GIVEN ORDER within a
73 : : * specified distance of each other somewhere in the document. The weight
74 : : * for a posting is the sum of the weights of the sub-postings.
75 : : */
76 [ + - ][ # # ]: 176 : class PhrasePostList : public SelectPostList {
77 : : private:
78 : : Xapian::termpos window;
79 : : std::vector<PostList *> terms;
80 : :
81 : : bool test_doc();
82 : : bool do_test(std::vector<PositionList *> &plists, Xapian::termcount i,
83 : : Xapian::termcount min, Xapian::termcount max);
84 : : public:
85 : : std::string get_description() const;
86 : : Xapian::termcount get_wdf() const;
87 : :
88 : 176 : Xapian::doccount get_termfreq_est() const
89 : : {
90 : : // No idea how to estimate this - FIXME
91 : 176 : return source->get_termfreq_est() / 3;
92 : : }
93 : :
94 : : TermFreqs get_termfreq_est_using_stats(
95 : : const Xapian::Weight::Internal & stats) const;
96 : :
97 : 176 : PhrasePostList(PostList *source_, Xapian::termpos window_,
98 : : std::vector<PostList *>::const_iterator &terms_begin_,
99 : : std::vector<PostList *>::const_iterator &terms_end_)
100 : 176 : : SelectPostList(source_), terms(terms_begin_, terms_end_)
101 : : {
102 : 176 : window = window_;
103 : 176 : }
104 : : };
105 : :
106 : : #endif /* OM_HGUARD_PHRASEPOSTLIST_H */
|