LCOV - code coverage report
Current view: top level - api - sortable-serialise.cc (source / functions) Hit Total Coverage
Test: Test Coverage for xapian-core r Lines: 94 95 98.9 %
Date: 2011-08-21 Functions: 3 3 100.0 %
Branches: 64 68 94.1 %

           Branch data     Line data    Source code
       1                 :            : /** @file sortable-serialise.cc
       2                 :            :  * @brief Serialise floating point values to string which sort the same way.
       3                 :            :  */
       4                 :            : /* Copyright (C) 2007,2009 Olly Betts
       5                 :            :  *
       6                 :            :  * This program is free software; you can redistribute it and/or modify
       7                 :            :  * it under the terms of the GNU General Public License as published by
       8                 :            :  * the Free Software Foundation; either version 2 of the License, or
       9                 :            :  * (at your option) any later version.
      10                 :            :  *
      11                 :            :  * This program is distributed in the hope that it will be useful,
      12                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      13                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14                 :            :  * GNU General Public License for more details.
      15                 :            :  *
      16                 :            :  * You should have received a copy of the GNU General Public License
      17                 :            :  * along with this program; if not, write to the Free Software
      18                 :            :  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
      19                 :            :  */
      20                 :            : 
      21                 :            : #include <config.h>
      22                 :            : 
      23                 :            : #include <xapian/queryparser.h>
      24                 :            : 
      25                 :            : #include <cfloat>
      26                 :            : #include <cmath>
      27                 :            : 
      28                 :            : #include <string>
      29                 :            : 
      30                 :            : #include "omassert.h"
      31                 :            : 
      32                 :            : using namespace std;
      33                 :            : 
      34                 :            : #if FLT_RADIX != 2
      35                 :            : # error Code currently assumes FLT_RADIX == 2
      36                 :            : #endif
      37                 :            : 
      38                 :            : #ifdef _MSC_VER
      39                 :            : // Disable warning about negating an unsigned type, which we do deliberately.
      40                 :            : # pragma warning(disable:4146)
      41                 :            : #endif
      42                 :            : 
      43                 :            : string
      44                 :     111941 : Xapian::sortable_serialise(double value)
      45                 :            : {
      46                 :            :     double mantissa;
      47                 :            :     int exponent;
      48                 :            : 
      49                 :            :     // Negative infinity.
      50         [ +  + ]:     111941 :     if (value < -DBL_MAX) return string();
      51                 :            : 
      52                 :     111939 :     mantissa = frexp(value, &exponent);
      53                 :            : 
      54                 :            :     /* Deal with zero specially.
      55                 :            :      *
      56                 :            :      * IEEE representation of doubles uses 11 bits for the exponent, with a
      57                 :            :      * bias of 1023.  We bias this by subtracting 8, and non-IEEE
      58                 :            :      * representations may allow higher exponents, so allow exponents down to
      59                 :            :      * -2039 - if smaller exponents are possible anywhere, we underflow such
      60                 :            :      *  numbers to 0.
      61                 :            :      */
      62 [ +  + ][ -  + ]:     111939 :     if (mantissa == 0.0 || exponent < -2039) return "\x80";
      63                 :            : 
      64                 :     105900 :     bool negative = (mantissa < 0);
      65         [ +  + ]:     105900 :     if (negative) mantissa = -mantissa;
      66                 :            : 
      67                 :            :     // Infinity, or extremely large non-IEEE representation.
      68 [ +  + ][ -  + ]:     105900 :     if (value > DBL_MAX || exponent > 2055) {
      69         [ -  + ]:          2 :         if (negative) {
      70                 :            :             // This can only happen with a non-IEEE representation, because
      71                 :            :             // we've already tested for value < -DBL_MAX
      72                 :          0 :             return string();
      73                 :            :         } else {
      74                 :          2 :             return string(9, '\xff');
      75                 :            :         }
      76                 :            :     }
      77                 :            : 
      78                 :            :     // Encoding:
      79                 :            :     //
      80                 :            :     // [ 7 | 6 | 5 | 4 3 2 1 0]
      81                 :            :     //   Sm  Se  Le
      82                 :            :     //
      83                 :            :     // Sm stores the sign of the mantissa: 1 = positive or zero, 0 = negative.
      84                 :            :     // Se stores the sign of the exponent: Sm for positive/zero, !Sm for neg.
      85                 :            :     // Le stores the length of the exponent: !Se for 3 bits, Se for 11 bits.
      86         [ +  + ]:     105898 :     unsigned char next = (negative ? 0 : 0xe0);
      87                 :            : 
      88                 :            :     // Bias the exponent by 8 so that more small integers get short encodings.
      89                 :     105898 :     exponent -= 8;
      90                 :     105898 :     bool exponent_negative = (exponent < 0);
      91         [ +  + ]:     105898 :     if (exponent_negative) {
      92                 :      97581 :         exponent = -exponent;
      93                 :      97581 :         next ^= 0x60;
      94                 :            :     }
      95                 :            : 
      96                 :     105898 :     string result;
      97                 :            : 
      98                 :            :     /* We store the exponent in 3 or 11 bits.  If the number is negative, we
      99                 :            :      * flip all the bits of the exponent, since larger negative numbers should
     100                 :            :      * sort first.
     101                 :            :      *
     102                 :            :      * If the exponent is negative, we flip the bits of the exponent, since
     103                 :            :      * larger negative exponents should sort first (unless the number is
     104                 :            :      * negative, in which case they should sort later).
     105                 :            :      */
     106                 :            :     Assert(exponent >= 0);
     107         [ +  + ]:     105898 :     if (exponent < 8) {
     108                 :     103912 :         next ^= 0x20;
     109                 :     103912 :         next |= static_cast<unsigned char>(exponent << 2);
     110         [ +  + ]:     103912 :         if (negative ^ exponent_negative) next ^= 0x1c;
     111                 :            :     } else {
     112                 :            :         Assert((exponent >> 11) == 0);
     113                 :            :         // Put the top 5 bits of the exponent into the lower 5 bits of the
     114                 :            :         // first byte:
     115                 :       1986 :         next |= static_cast<unsigned char>(exponent >> 6);
     116         [ +  + ]:       1986 :         if (negative ^ exponent_negative) next ^= 0x1f;
     117                 :       1986 :         result += next;
     118                 :            :         // And the lower 6 bits of the exponent go into the upper 6 bits
     119                 :            :         // of the second byte:
     120                 :       1986 :         next = static_cast<unsigned char>(exponent) << 2;
     121         [ +  + ]:       1986 :         if (negative ^ exponent_negative) next ^= 0xfc;
     122                 :            :     }
     123                 :            : 
     124                 :            :     // Convert the 52 (or 53) bits of the mantissa into two 32-bit words.
     125         [ +  + ]:     105898 :     mantissa *= 1 << (negative ? 26 : 27);
     126                 :     105898 :     unsigned word1 = static_cast<unsigned>(mantissa);
     127                 :     105898 :     mantissa -= word1;
     128                 :     105898 :     unsigned word2 = static_cast<unsigned>(mantissa * 4294967296.0); // 1<<32
     129                 :            :     // If the number is positive, the first bit will always be set because 0.5
     130                 :            :     // <= mantissa < 1, unless mantissa is zero, which we handle specially
     131                 :            :     // above).  If the number is negative, we negate the mantissa instead of
     132                 :            :     // flipping all the bits, so in the case of 0.5, the first bit isn't set
     133                 :            :     // so we need to store it explicitly.  But for the cost of one extra
     134                 :            :     // leading bit, we can save several trailing 0xff bytes in lots of common
     135                 :            :     // cases.
     136                 :            :     Assert(negative || (word1 & (1<<26)));
     137         [ +  + ]:     105898 :     if (negative) {
     138                 :            :         // We negate the mantissa for negative numbers, so that the sort order
     139                 :            :         // is reversed (since larger negative numbers should come first).
     140                 :      12627 :         word1 = -word1;
     141         [ +  + ]:      12627 :         if (word2 != 0) ++word1;
     142                 :      12627 :         word2 = -word2;
     143                 :            :     }
     144                 :            : 
     145                 :     105898 :     word1 &= 0x03ffffff;
     146                 :     105898 :     next |= static_cast<unsigned char>(word1 >> 24);
     147                 :     105898 :     result += next;
     148                 :     105898 :     result.push_back(char(word1 >> 16));
     149                 :     105898 :     result.push_back(char(word1 >> 8));
     150                 :     105898 :     result.push_back(char(word1));
     151                 :            : 
     152                 :     105898 :     result.push_back(char(word2 >> 24));
     153                 :     105898 :     result.push_back(char(word2 >> 16));
     154                 :     105898 :     result.push_back(char(word2 >> 8));
     155                 :     105898 :     result.push_back(char(word2));
     156                 :            : 
     157                 :            :     // Finally, we can chop off any trailing zero bytes.
     158                 :     105898 :     size_t len = result.size();
     159 [ +  - ][ +  + ]:     797718 :     while (len > 0 && result[len - 1] == '\0') {
                 [ +  + ]
     160                 :     691820 :         --len;
     161                 :            :     }
     162                 :     105898 :     result.resize(len);
     163                 :            : 
     164                 :     111941 :     return result;
     165                 :            : }
     166                 :            : 
     167                 :            : /// Get a number from the character at a given position in a string, returning
     168                 :            : /// 0 if the string isn't long enough.
     169                 :            : static inline unsigned char
     170                 :       6008 : numfromstr(const std::string & str, std::string::size_type pos)
     171                 :            : {
     172         [ +  + ]:       6008 :     return (pos < str.size()) ? static_cast<unsigned char>(str[pos]) : '\0';
     173                 :            : }
     174                 :            : 
     175                 :            : double
     176                 :       1486 : Xapian::sortable_unserialise(const std::string & value)
     177                 :            : {
     178                 :            :     // Zero.
     179         [ +  + ]:       1486 :     if (value == "\x80") return 0.0;
     180                 :            : 
     181                 :            :     // Positive infinity.
     182         [ +  + ]:       1481 :     if (value == string(9, '\xff')) {
     183                 :            : #ifdef INFINITY
     184                 :            :         // INFINITY is C99.  Oddly, it's of type "float" so sanity check in
     185                 :            :         // case it doesn't cast to double as infinity (apparently some
     186                 :            :         // implementations have this problem).
     187                 :            :         if (double(INFINITY) > HUGE_VAL) return INFINITY;
     188                 :            : #endif
     189                 :          2 :         return HUGE_VAL;
     190                 :            :     }
     191                 :            : 
     192                 :            :     // Negative infinity.
     193         [ +  + ]:       1479 :     if (value.empty()) {
     194                 :            : #ifdef INFINITY
     195                 :            :         if (double(INFINITY) > HUGE_VAL) return -INFINITY;
     196                 :            : #endif
     197                 :          2 :         return -HUGE_VAL;
     198                 :            :     }
     199                 :            : 
     200                 :       1477 :     unsigned char first = numfromstr(value, 0);
     201                 :       1477 :     size_t i = 0;
     202                 :            : 
     203                 :       1477 :     first ^= static_cast<unsigned char>(first & 0xc0) >> 1;
     204                 :       1477 :     bool negative = !(first & 0x80);
     205                 :       1477 :     bool exponent_negative = (first & 0x40);
     206                 :       1477 :     bool explen = !(first & 0x20);
     207                 :       1477 :     int exponent = first & 0x1f;
     208         [ +  + ]:       1477 :     if (!explen) {
     209                 :       1457 :         exponent >>= 2;
     210         [ +  + ]:       1457 :         if (negative ^ exponent_negative) exponent ^= 0x07;
     211                 :            :     } else {
     212                 :         20 :         first = numfromstr(value, ++i);
     213                 :         20 :         exponent <<= 6;
     214                 :         20 :         exponent |= (first >> 2);
     215         [ +  + ]:         20 :         if (negative ^ exponent_negative) exponent ^= 0x07ff;
     216                 :            :     }
     217                 :            : 
     218                 :            :     unsigned word1;
     219                 :       1477 :     word1 = (unsigned(first & 0x03) << 24);
     220                 :       1477 :     word1 |= numfromstr(value, ++i) << 16;
     221                 :       1477 :     word1 |= numfromstr(value, ++i) << 8;
     222                 :       1477 :     word1 |= numfromstr(value, ++i);
     223                 :            : 
     224                 :       1477 :     unsigned word2 = 0;
     225         [ +  + ]:       1477 :     if (i < value.size()) {
     226                 :         20 :         word2 = numfromstr(value, ++i) << 24;
     227                 :         20 :         word2 |= numfromstr(value, ++i) << 16;
     228                 :         20 :         word2 |= numfromstr(value, ++i) << 8;
     229                 :         20 :         word2 |= numfromstr(value, ++i);
     230                 :            :     }
     231                 :            : 
     232         [ +  + ]:       1477 :     if (negative) {
     233                 :         17 :         word1 = -word1;
     234         [ +  + ]:         17 :         if (word2 != 0) ++word1;
     235                 :         17 :         word2 = -word2;
     236                 :            :         Assert((word1 & 0xf0000000) != 0);
     237                 :         17 :         word1 &= 0x03ffffff;
     238                 :            :     }
     239         [ +  + ]:       1477 :     if (!negative) word1 |= 1<<26;
     240                 :            : 
     241                 :       1477 :     double mantissa = 0;
     242         [ +  + ]:       1477 :     if (word2) mantissa = word2 / 4294967296.0; // 1<<32
     243                 :       1477 :     mantissa += word1;
     244         [ +  + ]:       1477 :     mantissa /= 1 << (negative ? 26 : 27);
     245                 :            : 
     246         [ +  + ]:       1477 :     if (exponent_negative) exponent = -exponent;
     247                 :       1477 :     exponent += 8;
     248                 :            : 
     249         [ +  + ]:       1477 :     if (negative) mantissa = -mantissa;
     250                 :            : 
     251                 :       1486 :     return ldexp(mantissa, exponent);
     252                 :            : }

Generated by: LCOV version 1.8