Branch data Line data Source code
1 : : /** @file sortable-serialise.cc
2 : : * @brief Serialise floating point values to string which sort the same way.
3 : : */
4 : : /* Copyright (C) 2007,2009 Olly Betts
5 : : *
6 : : * This program is free software; you can redistribute it and/or modify
7 : : * it under the terms of the GNU General Public License as published by
8 : : * the Free Software Foundation; either version 2 of the License, or
9 : : * (at your option) any later version.
10 : : *
11 : : * This program is distributed in the hope that it will be useful,
12 : : * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : : * GNU General Public License for more details.
15 : : *
16 : : * You should have received a copy of the GNU General Public License
17 : : * along with this program; if not, write to the Free Software
18 : : * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 : : */
20 : :
21 : : #include <config.h>
22 : :
23 : : #include <xapian/queryparser.h>
24 : :
25 : : #include <cfloat>
26 : : #include <cmath>
27 : :
28 : : #include <string>
29 : :
30 : : #include "omassert.h"
31 : :
32 : : using namespace std;
33 : :
34 : : #if FLT_RADIX != 2
35 : : # error Code currently assumes FLT_RADIX == 2
36 : : #endif
37 : :
38 : : #ifdef _MSC_VER
39 : : // Disable warning about negating an unsigned type, which we do deliberately.
40 : : # pragma warning(disable:4146)
41 : : #endif
42 : :
43 : : string
44 : 111941 : Xapian::sortable_serialise(double value)
45 : : {
46 : : double mantissa;
47 : : int exponent;
48 : :
49 : : // Negative infinity.
50 [ + + ]: 111941 : if (value < -DBL_MAX) return string();
51 : :
52 : 111939 : mantissa = frexp(value, &exponent);
53 : :
54 : : /* Deal with zero specially.
55 : : *
56 : : * IEEE representation of doubles uses 11 bits for the exponent, with a
57 : : * bias of 1023. We bias this by subtracting 8, and non-IEEE
58 : : * representations may allow higher exponents, so allow exponents down to
59 : : * -2039 - if smaller exponents are possible anywhere, we underflow such
60 : : * numbers to 0.
61 : : */
62 [ + + ][ - + ]: 111939 : if (mantissa == 0.0 || exponent < -2039) return "\x80";
63 : :
64 : 105900 : bool negative = (mantissa < 0);
65 [ + + ]: 105900 : if (negative) mantissa = -mantissa;
66 : :
67 : : // Infinity, or extremely large non-IEEE representation.
68 [ + + ][ - + ]: 105900 : if (value > DBL_MAX || exponent > 2055) {
69 [ - + ]: 2 : if (negative) {
70 : : // This can only happen with a non-IEEE representation, because
71 : : // we've already tested for value < -DBL_MAX
72 : 0 : return string();
73 : : } else {
74 : 2 : return string(9, '\xff');
75 : : }
76 : : }
77 : :
78 : : // Encoding:
79 : : //
80 : : // [ 7 | 6 | 5 | 4 3 2 1 0]
81 : : // Sm Se Le
82 : : //
83 : : // Sm stores the sign of the mantissa: 1 = positive or zero, 0 = negative.
84 : : // Se stores the sign of the exponent: Sm for positive/zero, !Sm for neg.
85 : : // Le stores the length of the exponent: !Se for 3 bits, Se for 11 bits.
86 [ + + ]: 105898 : unsigned char next = (negative ? 0 : 0xe0);
87 : :
88 : : // Bias the exponent by 8 so that more small integers get short encodings.
89 : 105898 : exponent -= 8;
90 : 105898 : bool exponent_negative = (exponent < 0);
91 [ + + ]: 105898 : if (exponent_negative) {
92 : 97581 : exponent = -exponent;
93 : 97581 : next ^= 0x60;
94 : : }
95 : :
96 : 105898 : string result;
97 : :
98 : : /* We store the exponent in 3 or 11 bits. If the number is negative, we
99 : : * flip all the bits of the exponent, since larger negative numbers should
100 : : * sort first.
101 : : *
102 : : * If the exponent is negative, we flip the bits of the exponent, since
103 : : * larger negative exponents should sort first (unless the number is
104 : : * negative, in which case they should sort later).
105 : : */
106 : : Assert(exponent >= 0);
107 [ + + ]: 105898 : if (exponent < 8) {
108 : 103912 : next ^= 0x20;
109 : 103912 : next |= static_cast<unsigned char>(exponent << 2);
110 [ + + ]: 103912 : if (negative ^ exponent_negative) next ^= 0x1c;
111 : : } else {
112 : : Assert((exponent >> 11) == 0);
113 : : // Put the top 5 bits of the exponent into the lower 5 bits of the
114 : : // first byte:
115 : 1986 : next |= static_cast<unsigned char>(exponent >> 6);
116 [ + + ]: 1986 : if (negative ^ exponent_negative) next ^= 0x1f;
117 : 1986 : result += next;
118 : : // And the lower 6 bits of the exponent go into the upper 6 bits
119 : : // of the second byte:
120 : 1986 : next = static_cast<unsigned char>(exponent) << 2;
121 [ + + ]: 1986 : if (negative ^ exponent_negative) next ^= 0xfc;
122 : : }
123 : :
124 : : // Convert the 52 (or 53) bits of the mantissa into two 32-bit words.
125 [ + + ]: 105898 : mantissa *= 1 << (negative ? 26 : 27);
126 : 105898 : unsigned word1 = static_cast<unsigned>(mantissa);
127 : 105898 : mantissa -= word1;
128 : 105898 : unsigned word2 = static_cast<unsigned>(mantissa * 4294967296.0); // 1<<32
129 : : // If the number is positive, the first bit will always be set because 0.5
130 : : // <= mantissa < 1, unless mantissa is zero, which we handle specially
131 : : // above). If the number is negative, we negate the mantissa instead of
132 : : // flipping all the bits, so in the case of 0.5, the first bit isn't set
133 : : // so we need to store it explicitly. But for the cost of one extra
134 : : // leading bit, we can save several trailing 0xff bytes in lots of common
135 : : // cases.
136 : : Assert(negative || (word1 & (1<<26)));
137 [ + + ]: 105898 : if (negative) {
138 : : // We negate the mantissa for negative numbers, so that the sort order
139 : : // is reversed (since larger negative numbers should come first).
140 : 12627 : word1 = -word1;
141 [ + + ]: 12627 : if (word2 != 0) ++word1;
142 : 12627 : word2 = -word2;
143 : : }
144 : :
145 : 105898 : word1 &= 0x03ffffff;
146 : 105898 : next |= static_cast<unsigned char>(word1 >> 24);
147 : 105898 : result += next;
148 : 105898 : result.push_back(char(word1 >> 16));
149 : 105898 : result.push_back(char(word1 >> 8));
150 : 105898 : result.push_back(char(word1));
151 : :
152 : 105898 : result.push_back(char(word2 >> 24));
153 : 105898 : result.push_back(char(word2 >> 16));
154 : 105898 : result.push_back(char(word2 >> 8));
155 : 105898 : result.push_back(char(word2));
156 : :
157 : : // Finally, we can chop off any trailing zero bytes.
158 : 105898 : size_t len = result.size();
159 [ + - ][ + + ]: 797718 : while (len > 0 && result[len - 1] == '\0') {
[ + + ]
160 : 691820 : --len;
161 : : }
162 : 105898 : result.resize(len);
163 : :
164 : 111941 : return result;
165 : : }
166 : :
167 : : /// Get a number from the character at a given position in a string, returning
168 : : /// 0 if the string isn't long enough.
169 : : static inline unsigned char
170 : 6008 : numfromstr(const std::string & str, std::string::size_type pos)
171 : : {
172 [ + + ]: 6008 : return (pos < str.size()) ? static_cast<unsigned char>(str[pos]) : '\0';
173 : : }
174 : :
175 : : double
176 : 1486 : Xapian::sortable_unserialise(const std::string & value)
177 : : {
178 : : // Zero.
179 [ + + ]: 1486 : if (value == "\x80") return 0.0;
180 : :
181 : : // Positive infinity.
182 [ + + ]: 1481 : if (value == string(9, '\xff')) {
183 : : #ifdef INFINITY
184 : : // INFINITY is C99. Oddly, it's of type "float" so sanity check in
185 : : // case it doesn't cast to double as infinity (apparently some
186 : : // implementations have this problem).
187 : : if (double(INFINITY) > HUGE_VAL) return INFINITY;
188 : : #endif
189 : 2 : return HUGE_VAL;
190 : : }
191 : :
192 : : // Negative infinity.
193 [ + + ]: 1479 : if (value.empty()) {
194 : : #ifdef INFINITY
195 : : if (double(INFINITY) > HUGE_VAL) return -INFINITY;
196 : : #endif
197 : 2 : return -HUGE_VAL;
198 : : }
199 : :
200 : 1477 : unsigned char first = numfromstr(value, 0);
201 : 1477 : size_t i = 0;
202 : :
203 : 1477 : first ^= static_cast<unsigned char>(first & 0xc0) >> 1;
204 : 1477 : bool negative = !(first & 0x80);
205 : 1477 : bool exponent_negative = (first & 0x40);
206 : 1477 : bool explen = !(first & 0x20);
207 : 1477 : int exponent = first & 0x1f;
208 [ + + ]: 1477 : if (!explen) {
209 : 1457 : exponent >>= 2;
210 [ + + ]: 1457 : if (negative ^ exponent_negative) exponent ^= 0x07;
211 : : } else {
212 : 20 : first = numfromstr(value, ++i);
213 : 20 : exponent <<= 6;
214 : 20 : exponent |= (first >> 2);
215 [ + + ]: 20 : if (negative ^ exponent_negative) exponent ^= 0x07ff;
216 : : }
217 : :
218 : : unsigned word1;
219 : 1477 : word1 = (unsigned(first & 0x03) << 24);
220 : 1477 : word1 |= numfromstr(value, ++i) << 16;
221 : 1477 : word1 |= numfromstr(value, ++i) << 8;
222 : 1477 : word1 |= numfromstr(value, ++i);
223 : :
224 : 1477 : unsigned word2 = 0;
225 [ + + ]: 1477 : if (i < value.size()) {
226 : 20 : word2 = numfromstr(value, ++i) << 24;
227 : 20 : word2 |= numfromstr(value, ++i) << 16;
228 : 20 : word2 |= numfromstr(value, ++i) << 8;
229 : 20 : word2 |= numfromstr(value, ++i);
230 : : }
231 : :
232 [ + + ]: 1477 : if (negative) {
233 : 17 : word1 = -word1;
234 [ + + ]: 17 : if (word2 != 0) ++word1;
235 : 17 : word2 = -word2;
236 : : Assert((word1 & 0xf0000000) != 0);
237 : 17 : word1 &= 0x03ffffff;
238 : : }
239 [ + + ]: 1477 : if (!negative) word1 |= 1<<26;
240 : :
241 : 1477 : double mantissa = 0;
242 [ + + ]: 1477 : if (word2) mantissa = word2 / 4294967296.0; // 1<<32
243 : 1477 : mantissa += word1;
244 [ + + ]: 1477 : mantissa /= 1 << (negative ? 26 : 27);
245 : :
246 [ + + ]: 1477 : if (exponent_negative) exponent = -exponent;
247 : 1477 : exponent += 8;
248 : :
249 [ + + ]: 1477 : if (negative) mantissa = -mantissa;
250 : :
251 : 1486 : return ldexp(mantissa, exponent);
252 : : }
|