LCOV - code coverage report
Current view: top level - queryparser/cjk - cjk-tokenizer.h (source / functions) Hit Total Coverage
Test: Test Coverage for xapian-core r Lines: 9 9 100.0 %
Date: 2011-08-21 Functions: 5 5 100.0 %
Branches: 0 0 -

           Branch data     Line data    Source code
       1                 :            : /** @file cjk-tokenizer.h
       2                 :            :  * @brief Tokenise CJK text as n-grams
       3                 :            :  */
       4                 :            : /* Copyright (c) 2007, 2008 Yung-chung Lin (henearkrxern@gmail.com)
       5                 :            :  * Copyright (c) 2011 Richard Boulton (richard@tartarus.org)
       6                 :            :  * Copyright (c) 2011 Brandon Schaefer (brandontschaefer@gmail.com)
       7                 :            :  * Copyright (c) 2011 Olly Betts
       8                 :            :  *
       9                 :            :  * Permission is hereby granted, free of charge, to any person obtaining a copy
      10                 :            :  * of this software and associated documentation files (the "Software"), to deal
      11                 :            :  * deal in the Software without restriction, including without limitation the
      12                 :            :  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
      13                 :            :  * sell copies of the Software, and to permit persons to whom the Software is
      14                 :            :  * furnished to do so, subject to the following conditions:
      15                 :            :  *
      16                 :            :  * The above copyright notice and this permission notice shall be included in
      17                 :            :  * all copies or substantial portions of the Software.
      18                 :            :  *
      19                 :            :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      20                 :            :  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      21                 :            :  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
      22                 :            :  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      23                 :            :  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
      24                 :            :  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
      25                 :            :  * IN THE SOFTWARE.
      26                 :            :  */
      27                 :            : 
      28                 :            : #ifndef XAPIAN_INCLUDED_CJK_TOKENIZER_H
      29                 :            : #define XAPIAN_INCLUDED_CJK_TOKENIZER_H
      30                 :            : 
      31                 :            : #include "xapian/unicode.h"
      32                 :            : 
      33                 :            : #include <string>
      34                 :            : 
      35                 :            : namespace CJK {
      36                 :            : 
      37                 :            : bool codepoint_is_cjk(unsigned codepoint);
      38                 :            : 
      39                 :            : std::string get_cjk(Xapian::Utf8Iterator &it);
      40                 :            : 
      41                 :            : }
      42                 :            : 
      43                 :        173 : class CJKTokenIterator {
      44                 :            :     Xapian::Utf8Iterator it;
      45                 :            : 
      46                 :            :     mutable Xapian::Utf8Iterator p;
      47                 :            : 
      48                 :            :     mutable unsigned len;
      49                 :            : 
      50                 :            :     mutable std::string current_token;
      51                 :            : 
      52                 :            :   public:
      53                 :         33 :     CJKTokenIterator(const std::string & s)
      54                 :         33 :         : it(s) { }
      55                 :            : 
      56                 :            :     CJKTokenIterator(const Xapian::Utf8Iterator & it_)
      57                 :            :         : it(it_) { }
      58                 :            : 
      59                 :        140 :     CJKTokenIterator()
      60                 :        140 :         : it() { }
      61                 :            : 
      62                 :            :     const std::string & operator*() const;
      63                 :            : 
      64                 :            :     CJKTokenIterator & operator++();
      65                 :            : 
      66                 :            :     friend bool operator==(const CJKTokenIterator &, const CJKTokenIterator &);
      67                 :            : };
      68                 :            : 
      69                 :            : inline bool
      70                 :        140 : operator==(const CJKTokenIterator & a, const CJKTokenIterator & b)
      71                 :            : {
      72                 :            :     // We only really care about comparisons where one or other is an end
      73                 :            :     // iterator.
      74                 :        140 :     return a.it == b.it;
      75                 :            : }
      76                 :            : 
      77                 :            : inline bool
      78                 :        140 : operator!=(const CJKTokenIterator & a, const CJKTokenIterator & b)
      79                 :            : {
      80                 :        140 :     return !(a == b);
      81                 :            : }
      82                 :            : 
      83                 :            : #endif // XAPIAN_INCLUDED_CJK_TOKENIZER_H

Generated by: LCOV version 1.8