1 |
|
/* |
2 |
|
** Copyright (C) 2018 Martin Brain |
3 |
|
** |
4 |
|
** See the file LICENSE for licensing information. |
5 |
|
*/ |
6 |
|
|
7 |
|
/* |
8 |
|
** add.h |
9 |
|
** |
10 |
|
** Martin Brain |
11 |
|
** martin.brain@cs.ox.ac.uk |
12 |
|
** 01/09/14 |
13 |
|
** |
14 |
|
** Addition of arbitrary precision floats |
15 |
|
** |
16 |
|
** The current design is based on a two-path adder but it may be useful to use |
17 |
|
** more paths. There are five cases that are of interest: |
18 |
|
** 1. effective add / very far |
19 |
|
** -> set the sticky bit only |
20 |
|
** 2. effective add / far or near |
21 |
|
** -> align and add, realign down if needed |
22 |
|
** 3. effective sub / very far |
23 |
|
** -> decrement, re-normalise and set sticky bits or (dependent on rounding-mode, skip entirely) |
24 |
|
** 4. effective sub / far |
25 |
|
** -> align and subtract, realign up if needed |
26 |
|
** 5. effective sub / near |
27 |
|
** -> align, subtract and normalise up |
28 |
|
** |
29 |
|
*/ |
30 |
|
|
31 |
|
|
32 |
|
/* |
33 |
|
** Ideas |
34 |
|
** Optimisation : Collar the exponent difference, convert add to twice the width and thus unify the paths and simplify the shifting. |
35 |
|
** Enable the absolute max underapproximation |
36 |
|
*/ |
37 |
|
|
38 |
|
|
39 |
|
#include "symfpu/core/unpackedFloat.h" |
40 |
|
#include "symfpu/core/ite.h" |
41 |
|
#include "symfpu/core/rounder.h" |
42 |
|
#include "symfpu/core/sign.h" |
43 |
|
#include "symfpu/core/operations.h" |
44 |
|
|
45 |
|
|
46 |
|
#ifndef SYMFPU_ADD |
47 |
|
#define SYMFPU_ADD |
48 |
|
|
49 |
|
namespace symfpu { |
50 |
|
|
51 |
|
// There are a number of variants on how this should be done. |
52 |
|
// This is the implementation that handles all of them. |
53 |
|
// Below are restricted versions for particular special cases. |
54 |
|
template <class t> |
55 |
584 |
unpackedFloat<t> addAdditionSpecialCasesComplete (const typename t::fpt &format, |
56 |
|
const typename t::rm &roundingMode, |
57 |
|
const unpackedFloat<t> &left, |
58 |
|
const unpackedFloat<t> &leftID, |
59 |
|
const unpackedFloat<t> &right, |
60 |
|
const typename t::prop &returnLeft, |
61 |
|
const typename t::prop &returnRight, |
62 |
|
const unpackedFloat<t> &additionResult, |
63 |
|
const typename t::prop &isAdd) { |
64 |
|
|
65 |
|
typedef typename t::prop prop; |
66 |
|
|
67 |
|
// NaN |
68 |
618 |
prop eitherArgumentNan(left.getNaN() || right.getNaN()); |
69 |
618 |
prop bothInfinity(left.getInf() && right.getInf()); |
70 |
618 |
prop signsMatch(left.getSign() == right.getSign()); |
71 |
|
//prop compatableSigns(ITE(isAdd, signsMatch, !signsMatch)); |
72 |
618 |
prop compatableSigns(isAdd ^ !signsMatch); |
73 |
|
|
74 |
618 |
prop generatesNaN(eitherArgumentNan || (bothInfinity && !compatableSigns)); |
75 |
|
|
76 |
|
|
77 |
|
// Inf |
78 |
1244 |
prop generatesInf((bothInfinity && compatableSigns) || |
79 |
1856 |
( left.getInf() && !right.getInf()) || |
80 |
1018 |
(!left.getInf() && right.getInf())); |
81 |
|
|
82 |
618 |
prop signOfInf(ITE(left.getInf(), left.getSign(), prop(isAdd ^ !right.getSign()))); |
83 |
|
|
84 |
|
|
85 |
|
// Zero |
86 |
618 |
prop bothZero(left.getZero() && right.getZero()); |
87 |
618 |
prop flipRightSign(!isAdd ^ right.getSign()); |
88 |
652 |
prop signOfZero(ITE((roundingMode == t::RTN()), |
89 |
1134 |
left.getSign() || flipRightSign, |
90 |
1134 |
left.getSign() && flipRightSign)); |
91 |
|
|
92 |
618 |
prop idLeft(!left.getZero() && right.getZero()); |
93 |
618 |
prop idRight( left.getZero() && !right.getZero()); |
94 |
|
|
95 |
|
// At most one of idLeft, idRight, generatesNaN, generatesInf and bothZero is true. |
96 |
|
// If used in addition additionResult is guaranteed to not be NaN. |
97 |
|
|
98 |
|
// Subtle trick : as the input to this will have been rounded it will have |
99 |
|
// an ITE with the default values "on top", thus doing the special cases |
100 |
|
// first (inner) rather than last (outer) allows them to be compacted better |
101 |
1100 |
return ITE(idRight || returnRight, |
102 |
|
ITE(isAdd, |
103 |
|
right, |
104 |
|
negate(format, right)), |
105 |
1100 |
ITE(idLeft || returnLeft, |
106 |
|
leftID, |
107 |
|
ITE(generatesNaN, |
108 |
|
unpackedFloat<t>::makeNaN(format), |
109 |
|
ITE(generatesInf, |
110 |
|
unpackedFloat<t>::makeInf(format, signOfInf), |
111 |
|
ITE(bothZero, |
112 |
|
unpackedFloat<t>::makeZero(format, signOfZero), |
113 |
2818 |
additionResult))))); |
114 |
|
} |
115 |
|
|
116 |
|
|
117 |
|
// leftID is the value returned in the idLeft case (i.e. when left is not a |
118 |
|
// special number and right is zero). This is needed by FMA as the flags |
119 |
|
// for left and leftID are computed differently and need to be handled differently. |
120 |
|
template <class t> |
121 |
|
unpackedFloat<t> addAdditionSpecialCasesWithID (const typename t::fpt &format, |
122 |
|
const typename t::rm &roundingMode, |
123 |
|
const unpackedFloat<t> &left, |
124 |
|
const unpackedFloat<t> &leftID, |
125 |
|
const unpackedFloat<t> &right, |
126 |
|
const unpackedFloat<t> &additionResult, |
127 |
|
const typename t::prop &isAdd) { |
128 |
|
return addAdditionSpecialCasesComplete<t>(format, roundingMode, left, leftID, |
129 |
|
right, typename t::prop(false), typename t::prop(false), |
130 |
|
additionResult, isAdd); |
131 |
|
} |
132 |
|
|
133 |
|
|
134 |
|
// This is the usual case; use this one! |
135 |
|
template <class t> |
136 |
584 |
unpackedFloat<t> addAdditionSpecialCases (const typename t::fpt &format, |
137 |
|
const typename t::rm &roundingMode, |
138 |
|
const unpackedFloat<t> &left, |
139 |
|
const unpackedFloat<t> &right, |
140 |
|
const unpackedFloat<t> &additionResult, |
141 |
|
const typename t::prop &isAdd) { |
142 |
|
return addAdditionSpecialCasesComplete<t>(format, roundingMode, left, left, |
143 |
|
right, typename t::prop(false), typename t::prop(false), |
144 |
584 |
additionResult, isAdd); |
145 |
|
} |
146 |
|
|
147 |
|
|
148 |
|
// As above but allows the (very) far path to be accelerated |
149 |
|
template <class t> |
150 |
|
unpackedFloat<t> addAdditionSpecialCasesWithBypass (const typename t::fpt &format, |
151 |
|
const typename t::rm &roundingMode, |
152 |
|
const unpackedFloat<t> &left, |
153 |
|
const unpackedFloat<t> &right, |
154 |
|
const typename t::prop &returnLeft, |
155 |
|
const typename t::prop &returnRight, |
156 |
|
const unpackedFloat<t> &additionResult, |
157 |
|
const typename t::prop &isAdd) { |
158 |
|
return addAdditionSpecialCasesComplete<t>(format, roundingMode, left, left, |
159 |
|
right, returnLeft, returnRight, |
160 |
|
additionResult, isAdd); |
161 |
|
} |
162 |
|
|
163 |
|
|
164 |
|
|
165 |
|
/* Computes the normal / subnormal case only. |
166 |
|
* This allows multiple versions of the first phase to be used and |
167 |
|
* the first phase to be used for other things (e.g. FMA). |
168 |
|
*/ |
169 |
|
|
170 |
|
template <class t> |
171 |
584 |
struct exponentCompareInfo { |
172 |
|
typedef typename t::sbv sbv; |
173 |
|
typedef typename t::prop prop; |
174 |
|
|
175 |
|
prop leftIsMax; |
176 |
|
sbv maxExponent; |
177 |
|
sbv absoluteExponentDifference; |
178 |
|
prop diffIsZero; |
179 |
|
prop diffIsOne; |
180 |
|
prop diffIsGreaterThanPrecision; |
181 |
|
prop diffIsTwoToPrecision; |
182 |
|
prop diffIsGreaterThanPrecisionPlusOne; |
183 |
|
|
184 |
584 |
exponentCompareInfo(const prop &lil, const sbv &me, const sbv &aed, |
185 |
|
const prop &diz, const prop &dio, const prop &digtp, const prop &dittp, const prop &disgtppo) : |
186 |
|
leftIsMax(lil), maxExponent(me), absoluteExponentDifference(aed), |
187 |
584 |
diffIsZero(diz), diffIsOne(dio), diffIsGreaterThanPrecision(digtp), diffIsTwoToPrecision(dittp), diffIsGreaterThanPrecisionPlusOne(disgtppo) {} |
188 |
|
|
189 |
|
exponentCompareInfo(const exponentCompareInfo<t> &old) : |
190 |
|
leftIsMax(old.leftIsMax), maxExponent(old.maxExponent), absoluteExponentDifference(old.absoluteExponentDifference), |
191 |
|
diffIsZero(old.diffIsZero), diffIsOne(old.diffIsOne), |
192 |
|
diffIsGreaterThanPrecision(old.diffIsGreaterThanPrecision), |
193 |
|
diffIsTwoToPrecision(old.diffIsTwoToPrecision), |
194 |
|
diffIsGreaterThanPrecisionPlusOne(old.diffIsGreaterThanPrecisionPlusOne) {} |
195 |
|
}; |
196 |
|
|
197 |
|
template <class t> |
198 |
584 |
exponentCompareInfo<t> addExponentCompare(const typename t::bwt exponentWidth, |
199 |
|
const typename t::bwt significandWidth, |
200 |
|
const typename t::sbv &leftExponent, |
201 |
|
const typename t::sbv &rightExponent, |
202 |
|
const typename t::prop &knownInCorrectOrder) { |
203 |
584 |
PRECONDITION( leftExponent.getWidth() + 1 == exponentWidth); |
204 |
584 |
PRECONDITION(rightExponent.getWidth() + 1 == exponentWidth); |
205 |
|
|
206 |
|
typedef typename t::prop prop; |
207 |
|
typedef typename t::sbv sbv; |
208 |
|
|
209 |
|
#if 0 |
210 |
|
// The obvious implementation |
211 |
|
|
212 |
|
// Compute exponent distance |
213 |
|
sbv maxExponent(max<t,sbv>(leftExponent.extend(1), rightExponent.extend(1))); |
214 |
|
sbv minExponent(min<t,sbv>(leftExponent.extend(1), rightExponent.extend(1))); |
215 |
|
sbv absoluteExponentDifference(maxExponent - minExponent); |
216 |
|
|
217 |
|
prop leftIsMax(knownInCorrectOrder || leftExponent.extend(1) == maxExponent); |
218 |
|
#else |
219 |
|
|
220 |
|
// A better implementation(?) |
221 |
1168 |
sbv exponentDifference(leftExponent.extend(1) - rightExponent.extend(1)); |
222 |
|
|
223 |
618 |
prop signBit(exponentDifference.toUnsigned().extract(exponentWidth - 1, exponentWidth - 1).isAllOnes()); |
224 |
618 |
prop leftIsMax(knownInCorrectOrder || !signBit); |
225 |
|
|
226 |
1168 |
sbv maxExponent(ITE(leftIsMax, leftExponent.extend(1), rightExponent.extend(1))); |
227 |
1168 |
sbv absoluteExponentDifference(ITE(leftIsMax, exponentDifference, exponentDifference.modularNegate())); // Largest negative value not obtainable so negate is safe |
228 |
|
#endif |
229 |
|
|
230 |
584 |
INVARIANT(sbv::zero(exponentWidth) <= absoluteExponentDifference); |
231 |
|
|
232 |
|
// Optimisation : compact these comparisons at the bit-level |
233 |
618 |
prop diffIsZero(absoluteExponentDifference == sbv::zero(exponentWidth)); |
234 |
618 |
prop diffIsOne(absoluteExponentDifference == sbv::one(exponentWidth)); |
235 |
618 |
prop diffIsGreaterThanPrecision(sbv(exponentWidth, significandWidth) < absoluteExponentDifference); // Assumes this is representable |
236 |
618 |
prop diffIsTwoToPrecision(!diffIsZero && !diffIsOne && !diffIsGreaterThanPrecision); |
237 |
618 |
prop diffIsGreaterThanPrecisionPlusOne(sbv(exponentWidth, significandWidth + 1) < absoluteExponentDifference); |
238 |
|
|
239 |
584 |
probabilityAnnotation<t,prop>(diffIsZero, UNLIKELY); |
240 |
584 |
probabilityAnnotation<t,prop>(diffIsOne, UNLIKELY); |
241 |
584 |
probabilityAnnotation<t,prop>(diffIsGreaterThanPrecision, LIKELY); // In proving if not execution |
242 |
584 |
probabilityAnnotation<t,prop>(diffIsGreaterThanPrecisionPlusOne, LIKELY); // In proving if not execution |
243 |
|
|
244 |
|
|
245 |
|
return exponentCompareInfo<t>(leftIsMax, |
246 |
|
maxExponent, absoluteExponentDifference, |
247 |
1168 |
diffIsZero, diffIsOne, diffIsGreaterThanPrecision, diffIsTwoToPrecision, diffIsGreaterThanPrecisionPlusOne); |
248 |
|
} |
249 |
|
|
250 |
|
|
251 |
|
|
252 |
|
// Note that the arithmetic part of add needs the rounding mode. |
253 |
|
// This is an oddity due to the way that the sign of zero is generated. |
254 |
|
|
255 |
|
template <class t> |
256 |
584 |
struct floatWithCustomRounderInfo { |
257 |
|
unpackedFloat<t> uf; |
258 |
|
customRounderInfo<t> known; |
259 |
|
|
260 |
584 |
floatWithCustomRounderInfo(const unpackedFloat<t> &_uf, const customRounderInfo<t> &_known) : uf(_uf), known(_known) {} |
261 |
|
floatWithCustomRounderInfo(const floatWithCustomRounderInfo<t> &old) : uf(old.uf), known(old.known) {} |
262 |
|
}; |
263 |
|
|
264 |
|
template <class t> |
265 |
584 |
floatWithCustomRounderInfo<t> arithmeticAdd (const typename t::fpt &format, |
266 |
|
const typename t::rm &roundingMode, |
267 |
|
const unpackedFloat<t> &left, |
268 |
|
const unpackedFloat<t> &right, |
269 |
|
const typename t::prop &isAdd, |
270 |
|
const typename t::prop &knownInCorrectOrder, |
271 |
|
const exponentCompareInfo<t> &ec) { |
272 |
|
|
273 |
|
typedef typename t::bwt bwt; |
274 |
|
typedef typename t::fpt fpt; |
275 |
|
typedef typename t::prop prop; |
276 |
|
typedef typename t::ubv ubv; |
277 |
|
typedef typename t::sbv sbv; |
278 |
|
|
279 |
584 |
PRECONDITION(left.valid(format)); |
280 |
584 |
PRECONDITION(right.valid(format)); |
281 |
|
|
282 |
|
// Work out if an effective subtraction |
283 |
618 |
prop effectiveAdd((left.getSign() ^ right.getSign()) ^ isAdd); |
284 |
|
|
285 |
584 |
bwt exponentWidth(left.getExponent().getWidth() + 1); |
286 |
584 |
bwt significandWidth(left.getSignificand().getWidth()); |
287 |
|
|
288 |
|
/* Exponent difference and effective add implies a large amount about the output exponent and flags |
289 |
|
** R denotes that this is possible via rounding up and incrementing the exponent |
290 |
|
** |
291 |
|
** Case A. max(l,r) + 1, B. max(l,r) C. max(l,r) - 1 D. max(l,r) - k E. zero |
292 |
|
** Eff. Add Y Y |
293 |
|
** diff = 0 Y, sticky 0 |
294 |
|
** diff = 1 Y, sticky 0, R Y, sticky 0 |
295 |
|
** diff : [2,p] decreasing prob., R Y |
296 |
|
** diff > p R Y |
297 |
|
** |
298 |
|
** Eff. Sub Y Y Y, exact Y, exact |
299 |
|
** diff = 0 Y, exact prob. drop with k low prob. |
300 |
|
** diff = 1 Y, sticky 0 Y, exact prob. drop with k |
301 |
|
** diff : [2,p] Y, R decreasing prob. |
302 |
|
** diff > p Y, R low prob. |
303 |
|
** |
304 |
|
*/ |
305 |
|
// Optimisation : add 'bypass' invariants that link the final exponent to the input using this. |
306 |
|
|
307 |
|
|
308 |
|
// Rounder flags |
309 |
618 |
prop noOverflow(!effectiveAdd); |
310 |
618 |
prop noUnderflow(true); |
311 |
|
//prop exact(); // Need to see if it cancels, see below |
312 |
618 |
prop subnormalExact(true); |
313 |
1452 |
prop noSignificandOverflow((effectiveAdd && ec.diffIsZero) || |
314 |
758 |
(!effectiveAdd && (ec.diffIsZero || ec.diffIsOne))); |
315 |
|
|
316 |
618 |
prop stickyBitIsZero(ec.diffIsZero || ec.diffIsOne); |
317 |
|
|
318 |
|
|
319 |
|
// Work out ordering |
320 |
1585 |
prop leftLarger(knownInCorrectOrder || |
321 |
992 |
(ec.leftIsMax && |
322 |
992 |
ITE(!ec.diffIsZero, |
323 |
|
prop(true), |
324 |
1026 |
left.getSignificand() >= right.getSignificand()))); |
325 |
|
// Optimisation : can we avoid this comparison completely and allow the result to be negative? |
326 |
|
// This may be hard as a compare is cheaper than a negate after, particularly as there has to be an ITE here |
327 |
|
|
328 |
|
// Extend the significands to give room for carry plus guard and sticky bits |
329 |
1168 |
ubv lsig((ITE(leftLarger, left.getSignificand(), right.getSignificand())).extend(1).append(ubv::zero(2))); |
330 |
1168 |
ubv ssig((ITE(leftLarger, right.getSignificand(), left.getSignificand())).extend(1).append(ubv::zero(2))); |
331 |
|
|
332 |
618 |
prop resultSign(ITE(leftLarger, |
333 |
|
left.getSign(), |
334 |
1134 |
prop(!isAdd ^ right.getSign()))); |
335 |
|
|
336 |
|
// Extended so no info lost, negate before shift so that sign-extension works |
337 |
1168 |
ubv negatedSmaller(conditionalNegate<t,ubv,prop>(!effectiveAdd, ssig)); |
338 |
|
|
339 |
1168 |
ubv shiftAmount(ec.absoluteExponentDifference.toUnsigned() // Safe as >= 0 |
340 |
|
.resize(negatedSmaller.getWidth())); // Safe as long as the significand has more bits than the exponent |
341 |
584 |
INVARIANT(exponentWidth <= significandWidth); |
342 |
|
|
343 |
|
|
344 |
|
// Shift the smaller significand |
345 |
1168 |
stickyRightShiftResult<t> shifted(stickyRightShift<t>(negatedSmaller, shiftAmount)); |
346 |
|
|
347 |
2336 |
ubv negatedAlignedSmaller(ITE(ec.diffIsGreaterThanPrecisionPlusOne, // Fast path the common case, +1 to avoid issues with the guard bit |
348 |
|
ITE(effectiveAdd, |
349 |
1168 |
ubv::zero(negatedSmaller.getWidth()), |
350 |
1168 |
~ubv::zero(negatedSmaller.getWidth())), |
351 |
|
shifted.signExtendedResult)); |
352 |
1168 |
ubv shiftedStickyBit(ITE(ec.diffIsGreaterThanPrecision, |
353 |
1168 |
ubv::one(negatedSmaller.getWidth()), |
354 |
|
shifted.stickyBit)); // Have to separate otherwise align up may convert it to the guard bit |
355 |
|
|
356 |
|
|
357 |
|
// Sum and re-align |
358 |
1168 |
ubv sum(lsig.modularAdd(negatedAlignedSmaller)); |
359 |
|
|
360 |
584 |
bwt sumWidth(sum.getWidth()); |
361 |
1168 |
ubv topBit(sum.extract(sumWidth - 1, sumWidth - 1)); |
362 |
1168 |
ubv alignedBit(sum.extract(sumWidth - 2, sumWidth - 2)); |
363 |
1168 |
ubv lowerBit(sum.extract(sumWidth - 3, sumWidth - 3)); |
364 |
|
|
365 |
|
|
366 |
618 |
prop overflow(!(topBit.isAllZeros())); |
367 |
618 |
prop cancel(topBit.isAllZeros() && alignedBit.isAllZeros()); |
368 |
618 |
prop minorCancel(cancel && lowerBit.isAllOnes()); |
369 |
618 |
prop majorCancel(cancel && lowerBit.isAllZeros()); |
370 |
618 |
prop fullCancel(majorCancel && sum.isAllZeros()); |
371 |
|
|
372 |
584 |
probabilityAnnotation<t,prop>(overflow, UNLIKELY); |
373 |
584 |
probabilityAnnotation<t,prop>(cancel, UNLIKELY); |
374 |
584 |
probabilityAnnotation<t,prop>(minorCancel, UNLIKELY); |
375 |
584 |
probabilityAnnotation<t,prop>(majorCancel, VERYUNLIKELY); |
376 |
584 |
probabilityAnnotation<t,prop>(fullCancel, VERYUNLIKELY); |
377 |
|
|
378 |
584 |
INVARIANT(IMPLIES(effectiveAdd && ec.diffIsZero, overflow)); |
379 |
584 |
INVARIANT(IMPLIES(overflow, effectiveAdd && (!ec.diffIsGreaterThanPrecision))); // That case can only overflow by rounding |
380 |
584 |
INVARIANT(IMPLIES(cancel, !effectiveAdd)); |
381 |
584 |
INVARIANT(IMPLIES(majorCancel, ec.diffIsZero || ec.diffIsOne)); |
382 |
|
|
383 |
584 |
probabilityAnnotation<t,prop>(overflow && ec.diffIsTwoToPrecision, UNLIKELY); |
384 |
584 |
probabilityAnnotation<t,prop>(cancel && ec.diffIsTwoToPrecision, UNLIKELY); |
385 |
584 |
probabilityAnnotation<t,prop>(cancel && ec.diffIsGreaterThanPrecision, VERYUNLIKELY); |
386 |
|
|
387 |
618 |
prop exact(cancel && (ec.diffIsZero || ec.diffIsOne)); // For completeness |
388 |
|
|
389 |
1168 |
ubv alignedSum(conditionalLeftShiftOne<t,ubv,prop>(minorCancel, |
390 |
|
conditionalRightShiftOne<t,ubv,prop>(overflow, sum))); |
391 |
|
|
392 |
1168 |
sbv exponentCorrectionTerm(ITE(minorCancel, |
393 |
|
-sbv::one(exponentWidth), |
394 |
|
ITE(overflow, |
395 |
|
sbv::one(exponentWidth), |
396 |
|
sbv::zero(exponentWidth)))); |
397 |
|
|
398 |
1168 |
sbv correctedExponent(ec.maxExponent + exponentCorrectionTerm); // Safe due to extension |
399 |
|
|
400 |
|
// Watch closely... |
401 |
4020 |
ubv stickyBit(ITE(stickyBitIsZero || majorCancel, |
402 |
1168 |
ubv::zero(alignedSum.getWidth()), |
403 |
1684 |
(shiftedStickyBit | ITE(!overflow, ubv::zero(1), sum.extract(0,0)).extend(alignedSum.getWidth() - 1)))); |
404 |
|
|
405 |
|
// We return something in an extended format |
406 |
|
// *. One extra exponent bit to deal with the 'overflow' case |
407 |
|
// *. Two extra significand bits for the guard and sticky bits |
408 |
584 |
fpt extendedFormat(format.exponentWidth() + 1, format.significandWidth() + 2); |
409 |
|
|
410 |
|
// Put it back together |
411 |
1168 |
unpackedFloat<t> sumResult(extendedFormat, resultSign, correctedExponent, (alignedSum | stickyBit).contract(1)); |
412 |
|
|
413 |
|
// Deal with the major cancellation case |
414 |
|
// It would be nice to use normaliseUpDetectZero but the sign |
415 |
|
// of the zero depends on the rounding mode. |
416 |
2268 |
unpackedFloat<t> additionResult(ITE(fullCancel, |
417 |
1100 |
unpackedFloat<t>::makeZero(extendedFormat, roundingMode == t::RTN()), |
418 |
|
ITE(majorCancel, |
419 |
|
sumResult.normaliseUp(extendedFormat), |
420 |
|
sumResult))); |
421 |
|
|
422 |
|
// Some thought is required here to convince yourself that |
423 |
|
// there will be no subnormal values that violate this. |
424 |
|
// See 'all subnormals generated by addition are exact' |
425 |
|
// and the extended exponent. |
426 |
584 |
POSTCONDITION(additionResult.valid(extendedFormat)); |
427 |
|
|
428 |
1168 |
return floatWithCustomRounderInfo<t>(additionResult, customRounderInfo<t>(noOverflow, noUnderflow, exact, subnormalExact, noSignificandOverflow)); |
429 |
|
} |
430 |
|
|
431 |
|
template <class t> |
432 |
|
unpackedFloat<t> dualPathArithmeticAdd (const typename t::fpt &format, |
433 |
|
const typename t::rm &roundingMode, |
434 |
|
const unpackedFloat<t> &left, |
435 |
|
const unpackedFloat<t> &right, |
436 |
|
const typename t::prop &isAdd) { |
437 |
|
|
438 |
|
typedef typename t::bwt bwt; |
439 |
|
typedef typename t::fpt fpt; |
440 |
|
typedef typename t::prop prop; |
441 |
|
typedef typename t::ubv ubv; |
442 |
|
typedef typename t::sbv sbv; |
443 |
|
|
444 |
|
PRECONDITION(left.valid(format)); |
445 |
|
PRECONDITION(right.valid(format)); |
446 |
|
|
447 |
|
// We return something in an extended format |
448 |
|
// *. One extra exponent bit to deal with the 'overflow' case |
449 |
|
// *. Two extra significand bits for the guard and sticky bits |
450 |
|
fpt extendedFormat(format.exponentWidth() + 1, format.significandWidth() + 2); |
451 |
|
|
452 |
|
|
453 |
|
// Compute exponent difference and swap the two arguments if needed |
454 |
|
sbv initialExponentDifference(expandingSubtract<t>(left.getExponent(), right.getExponent())); |
455 |
|
bwt edWidth(initialExponentDifference.getWidth()); |
456 |
|
sbv edWidthZero(sbv::zero(edWidth)); |
457 |
|
prop orderingCorrect( initialExponentDifference > edWidthZero || |
458 |
|
(initialExponentDifference == edWidthZero && |
459 |
|
left.getSignificand() >= right.getSignificand())); |
460 |
|
|
461 |
|
unpackedFloat<t> larger(ITE(orderingCorrect, left, right)); |
462 |
|
unpackedFloat<t> smaller(ITE(orderingCorrect, right, left)); |
463 |
|
sbv exponentDifference(ITE(orderingCorrect, |
464 |
|
initialExponentDifference, |
465 |
|
-initialExponentDifference)); |
466 |
|
|
467 |
|
prop resultSign(ITE(orderingCorrect, |
468 |
|
left.getSign(), |
469 |
|
prop(!isAdd ^ right.getSign()))); |
470 |
|
|
471 |
|
|
472 |
|
// Work out if an effective subtraction |
473 |
|
prop effectiveAdd(larger.getSign() ^ smaller.getSign() ^ isAdd); |
474 |
|
|
475 |
|
|
476 |
|
// Extend the significands to give room for carry plus guard and sticky bits |
477 |
|
ubv lsig( larger.getSignificand().extend(1).append(ubv::zero(2))); |
478 |
|
ubv ssig(smaller.getSignificand().extend(1).append(ubv::zero(2))); |
479 |
|
|
480 |
|
|
481 |
|
// This is a two-path adder, so determine which of the two paths to use |
482 |
|
// The near path is only needed for things that can cancel more than one bit |
483 |
|
prop farPath(exponentDifference > sbv::one(edWidth) || effectiveAdd); |
484 |
|
|
485 |
|
|
486 |
|
// Far path : Align |
487 |
|
ubv negatedSmaller(ITE(effectiveAdd, ssig, ssig.modularNegate())); // Extended so no info lost |
488 |
|
// Negate before shift so that sign-extension works |
489 |
|
|
490 |
|
sbv significandWidth(edWidth, lsig.getWidth()); |
491 |
|
prop noOverlap(exponentDifference > significandWidth); |
492 |
|
|
493 |
|
ubv shiftAmount(exponentDifference.toUnsigned() // Safe as >= 0 |
494 |
|
.resize(ssig.getWidth())); // This looses information but the case in which it does is handles by noOverlap |
495 |
|
|
496 |
|
|
497 |
|
ubv negatedAlignedSmaller(negatedSmaller.signExtendRightShift(shiftAmount)); |
498 |
|
ubv shiftedStickyBit(rightShiftStickyBit<t>(negatedSmaller, shiftAmount)); // Have to separate otherwise align up may convert it to the guard bit |
499 |
|
|
500 |
|
// Far path : Sum and re-align |
501 |
|
ubv sum(lsig.modularAdd(negatedAlignedSmaller)); |
502 |
|
|
503 |
|
bwt sumWidth(sum.getWidth()); |
504 |
|
ubv topBit(sum.extract(sumWidth - 1, sumWidth - 1)); |
505 |
|
ubv centerBit(sum.extract(sumWidth - 2, sumWidth - 2)); |
506 |
|
|
507 |
|
prop noOverflow(topBit.isAllZeros()); // Only correct if effectiveAdd is set |
508 |
|
prop noCancel(centerBit.isAllOnes()); |
509 |
|
|
510 |
|
|
511 |
|
|
512 |
|
// TODO : Add invariants |
513 |
|
|
514 |
|
ubv alignedSum(ITE(effectiveAdd, |
515 |
|
ITE(noOverflow, |
516 |
|
sum, |
517 |
|
(sum >> ubv::one(sumWidth)) | (sum & ubv::one(sumWidth))), // Cheap sticky right shift |
518 |
|
ITE(noCancel, |
519 |
|
sum, |
520 |
|
sum.modularLeftShift(ubv::one(sumWidth))))); // In the case when this looses data, the result is not used |
521 |
|
|
522 |
|
sbv extendedLargerExponent(larger.getExponent().extend(1)); // So that increment and decrement don't overflow |
523 |
|
sbv correctedExponent(ITE(effectiveAdd, |
524 |
|
ITE(noOverflow, |
525 |
|
extendedLargerExponent, |
526 |
|
extendedLargerExponent.increment()), |
527 |
|
ITE(noCancel, |
528 |
|
extendedLargerExponent, |
529 |
|
extendedLargerExponent.decrement()))); |
530 |
|
|
531 |
|
// Far path : Construct result |
532 |
|
unpackedFloat<t> farPathResult(extendedFormat, resultSign, correctedExponent, (alignedSum | shiftedStickyBit).contract(1)); |
533 |
|
|
534 |
|
|
535 |
|
|
536 |
|
|
537 |
|
// Near path : Align |
538 |
|
prop exponentDifferenceAllZeros(exponentDifference.isAllZeros()); |
539 |
|
ubv nearAlignedSmaller(ITE(exponentDifferenceAllZeros, ssig, ssig >> ubv::one(ssig.getWidth()))); |
540 |
|
|
541 |
|
|
542 |
|
// Near path : Sum and realign |
543 |
|
ubv nearSum(lsig - nearAlignedSmaller); |
544 |
|
// Optimisation : the two paths can be merged up to here to give a pseudo-two path encoding |
545 |
|
|
546 |
|
prop fullCancel(nearSum.isAllZeros()); |
547 |
|
prop nearNoCancel(nearSum.extract(sumWidth - 2, sumWidth - 2).isAllOnes()); |
548 |
|
|
549 |
|
ubv choppedNearSum(nearSum.extract(sumWidth - 3,1)); // In the case this is used, cut bits are all 0 |
550 |
|
unpackedFloat<t> cancellation(extendedFormat, |
551 |
|
resultSign, |
552 |
|
larger.getExponent().decrement(), |
553 |
|
choppedNearSum); |
554 |
|
|
555 |
|
|
556 |
|
// Near path : Construct result |
557 |
|
unpackedFloat<t> nearPathResult(extendedFormat, resultSign, extendedLargerExponent, nearSum.contract(1)); |
558 |
|
|
559 |
|
|
560 |
|
|
561 |
|
// Bring the paths together |
562 |
|
// Optimisation : fix the noOverlap / very far path for directed rounding modes |
563 |
|
unpackedFloat<t> additionResult(ITE(farPath, |
564 |
|
/* ITE(noOverlap, |
565 |
|
ITE((isAdd || orderingCorrect), |
566 |
|
larger, |
567 |
|
negate(format, larger)), |
568 |
|
farPathResult), */ |
569 |
|
farPathResult, |
570 |
|
ITE(fullCancel, |
571 |
|
unpackedFloat<t>::makeZero(extendedFormat, roundingMode == t::RTN()), |
572 |
|
ITE(nearNoCancel, |
573 |
|
nearPathResult, |
574 |
|
cancellation.normaliseUp(format).extend(1,2))))); |
575 |
|
|
576 |
|
// Some thought is required here to convince yourself that |
577 |
|
// there will be no subnormal values that violate this. |
578 |
|
// See 'all subnormals generated by addition are exact' |
579 |
|
// and the extended exponent. |
580 |
|
POSTCONDITION(additionResult.valid(extendedFormat)); |
581 |
|
|
582 |
|
return additionResult; |
583 |
|
} |
584 |
|
|
585 |
|
|
586 |
|
|
587 |
|
|
588 |
|
template <class t> |
589 |
|
unpackedFloat<t> dualPathAdd (const typename t::fpt &format, |
590 |
|
const typename t::rm &roundingMode, |
591 |
|
const unpackedFloat<t> &left, |
592 |
|
const unpackedFloat<t> &right, |
593 |
|
const typename t::prop &isAdd) { |
594 |
|
|
595 |
|
PRECONDITION(left.valid(format)); |
596 |
|
PRECONDITION(right.valid(format)); |
597 |
|
|
598 |
|
unpackedFloat<t> additionResult(dualPathArithmeticAdd(format, roundingMode, left, right, isAdd)); |
599 |
|
|
600 |
|
unpackedFloat<t> roundedAdditionResult(rounder(format, roundingMode, additionResult)); |
601 |
|
|
602 |
|
unpackedFloat<t> result(addAdditionSpecialCases(format, roundingMode, left, right, roundedAdditionResult, isAdd)); |
603 |
|
|
604 |
|
POSTCONDITION(result.valid(format)); |
605 |
|
|
606 |
|
return result; |
607 |
|
} |
608 |
|
|
609 |
|
template <class t> |
610 |
584 |
unpackedFloat<t> add (const typename t::fpt &format, |
611 |
|
const typename t::rm &roundingMode, |
612 |
|
const unpackedFloat<t> &left, |
613 |
|
const unpackedFloat<t> &right, |
614 |
|
const typename t::prop &isAdd) { |
615 |
|
|
616 |
|
//typedef typename t::bwt bwt; |
617 |
|
typedef typename t::prop prop; |
618 |
|
//typedef typename t::ubv ubv; |
619 |
|
//typedef typename t::sbv sbv; |
620 |
|
|
621 |
584 |
PRECONDITION(left.valid(format)); |
622 |
584 |
PRECONDITION(right.valid(format)); |
623 |
|
|
624 |
|
// Optimisation : add a flag which assumes that left and right are in the correct order |
625 |
618 |
prop knownInCorrectOrder(false); |
626 |
|
|
627 |
1168 |
exponentCompareInfo<t> ec(addExponentCompare<t>(left.getExponent().getWidth() + 1, left.getSignificand().getWidth(), |
628 |
|
left.getExponent(), right.getExponent(), knownInCorrectOrder)); |
629 |
|
|
630 |
1168 |
floatWithCustomRounderInfo<t> additionResult(arithmeticAdd(format, roundingMode, left, right, isAdd, knownInCorrectOrder, ec)); |
631 |
|
|
632 |
1168 |
unpackedFloat<t> roundedAdditionResult(customRounder(format, roundingMode, additionResult.uf, additionResult.known)); |
633 |
|
|
634 |
584 |
unpackedFloat<t> result(addAdditionSpecialCases(format, roundingMode, left, right, roundedAdditionResult, isAdd)); |
635 |
|
|
636 |
584 |
POSTCONDITION(result.valid(format)); |
637 |
|
|
638 |
1168 |
return result; |
639 |
|
} |
640 |
|
|
641 |
|
template <class t> |
642 |
|
unpackedFloat<t> addWithBypass (const typename t::fpt &format, |
643 |
|
const typename t::rm &roundingMode, |
644 |
|
const unpackedFloat<t> &left, |
645 |
|
const unpackedFloat<t> &right, |
646 |
|
const typename t::prop &isAdd) { |
647 |
|
|
648 |
|
//typedef typename t::bwt bwt; |
649 |
|
typedef typename t::prop prop; |
650 |
|
//typedef typename t::ubv ubv; |
651 |
|
//typedef typename t::sbv sbv; |
652 |
|
|
653 |
|
PRECONDITION(left.valid(format)); |
654 |
|
PRECONDITION(right.valid(format)); |
655 |
|
|
656 |
|
// Optimisation : add a flag which assumes that left and right are in the correct order |
657 |
|
prop knownInCorrectOrder(false); |
658 |
|
|
659 |
|
|
660 |
|
exponentCompareInfo<t> ec(addExponentCompare<t>(left.getExponent().getWidth() + 1, left.getSignificand().getWidth(), |
661 |
|
left.getExponent(), right.getExponent(), knownInCorrectOrder)); |
662 |
|
|
663 |
|
floatWithCustomRounderInfo<t> additionResult(arithmeticAdd(format, roundingMode, left, right, isAdd, knownInCorrectOrder, ec)); |
664 |
|
|
665 |
|
unpackedFloat<t> roundedAdditionResult(customRounder(format, roundingMode, additionResult.uf, additionResult.known)); |
666 |
|
|
667 |
|
// In the "very far path", i.e. exponent difference is greater than significand length + 1 |
668 |
|
// addition becomes max(left,right) or max(left,right)+/-1 ULP. This is rare in execution |
669 |
|
// (to the point of being a software quality issue) but common in theorem proving. |
670 |
|
// Given we have to have cases for "return left" and "return right" to handle zeros, |
671 |
|
// we might as well make use of these cases to handle when addition behaves like max... |
672 |
|
// Note that this is possible but more complex with just diffIsGreaterThanPrecision. |
673 |
|
|
674 |
|
prop enableBypass(ec.diffIsGreaterThanPrecisionPlusOne && |
675 |
|
!left.getNaN() && !left.getInf() && !left.getZero() && // Handle this as special cases |
676 |
|
!right.getNaN() && !right.getInf() && !right.getZero()); |
677 |
|
|
678 |
|
|
679 |
|
// Duplication but easier to recompute than to pass |
680 |
|
prop effectiveAdd((left.getSign() ^ right.getSign()) ^ isAdd); |
681 |
|
prop resultSign(ITE((knownInCorrectOrder || ec.leftIsMax), // CAUTION : only true in the enableBypass case! |
682 |
|
left.getSign(), |
683 |
|
prop(!isAdd ^ right.getSign()))); |
684 |
|
|
685 |
|
prop significandEven(true); // This is an optimisation that assumes only RNE uses this bit |
686 |
|
// This needs to be changed to implement things like roundToOdd |
687 |
|
// or for the diffIsGreaterThanPrecision case. |
688 |
|
prop farRoundUp(roundingDecision<t>(roundingMode, resultSign, significandEven, !effectiveAdd, prop(true), prop(false))); |
689 |
|
|
690 |
|
// Returns left or right unchanged if adding and rounded down or subtracting and rounded up |
691 |
|
prop roundInCorrectDirection(effectiveAdd ^ farRoundUp); |
692 |
|
|
693 |
|
prop returnLeft(enableBypass && ec.leftIsMax && roundInCorrectDirection); |
694 |
|
prop returnRight(enableBypass && !ec.leftIsMax && roundInCorrectDirection); |
695 |
|
|
696 |
|
unpackedFloat<t> result(addAdditionSpecialCasesWithBypass(format, roundingMode, left, right, returnLeft, returnRight, roundedAdditionResult, isAdd)); |
697 |
|
|
698 |
|
POSTCONDITION(result.valid(format)); |
699 |
|
|
700 |
|
return result; |
701 |
|
} |
702 |
|
|
703 |
|
|
704 |
|
// True if and only if adding these would result in a catastrophic cancellation |
705 |
|
// I.E. if the addition cancells out cancelAmount or more MSBs leaving only LSBs |
706 |
|
template <class t> |
707 |
|
typename t::prop isCatastrophicCancellation (const typename t::fpt &format, |
708 |
|
const unpackedFloat<t> &left, |
709 |
|
const unpackedFloat<t> &right, |
710 |
|
const typename t::bwt &cancelAmount, |
711 |
|
const typename t::prop &isAdd) { |
712 |
|
|
713 |
|
typedef typename t::bwt bwt; |
714 |
|
typedef typename t::prop prop; |
715 |
|
typedef typename t::ubv ubv; |
716 |
|
//typedef typename t::sbv sbv; |
717 |
|
|
718 |
|
PRECONDITION(left.valid(format)); |
719 |
|
PRECONDITION(right.valid(format)); |
720 |
|
PRECONDITION(cancelAmount >= 2); // cancel 0 is not meaningful |
721 |
|
// cancel 1 is common on subtract and arguably not an error |
722 |
|
PRECONDITION(cancelAmount <= format.significandWidth()); // can't cancel more than you have |
723 |
|
|
724 |
|
// 1. It has to be an effective subtraction |
725 |
|
// Duplication but easier to recompute than to pass |
726 |
|
prop effectiveAdd((left.getSign() ^ right.getSign()) ^ isAdd); |
727 |
|
|
728 |
|
// 2. It must be either normal or subnormal numbers |
729 |
|
prop leftSpecial(left.getNaN() || left.getInf() || left.getZero()); |
730 |
|
prop rightSpecial(right.getNaN() || right.getInf() || right.getZero()); |
731 |
|
|
732 |
|
// 3.A. exponents are equal and so are the leading cancelAmount bits |
733 |
|
// 3.B. exponent diff is one and the smaller one is 11111, larger one is 10000 |
734 |
|
|
735 |
|
// Optimisation : add a flag which assumes that left and right are in the correct order |
736 |
|
prop knownInCorrectOrder(false); |
737 |
|
exponentCompareInfo<t> ec(addExponentCompare<t>(left.getExponent().getWidth() + 1, left.getSignificand().getWidth(), |
738 |
|
left.getExponent(), right.getExponent(), knownInCorrectOrder)); |
739 |
|
|
740 |
|
// Can ignore the MSB of the significand as by invariants this is always 1 |
741 |
|
bwt significandWidth(format.significandWidth()); |
742 |
|
bwt topBit(significandWidth - 2); |
743 |
|
bwt bottomBit(significandWidth - cancelAmount); |
744 |
|
|
745 |
|
ubv leftExtract(left.getSignificand.extract(topBit, bottomBit)); |
746 |
|
ubv rightExtract(right.getSignificand.extract(topBit, bottomBit)); |
747 |
|
|
748 |
|
prop result(ITE(!effectiveAdd && !leftSpecial && !rightSpecial, |
749 |
|
ITE(ec.diffIsZero, |
750 |
|
leftExtract == rightExtract, |
751 |
|
ITE(ec.diffIsOne, |
752 |
|
ITE(ec.leftIsMax, |
753 |
|
leftExtract.isAllZeros() && rightExtract.isAllOnes(), |
754 |
|
rightExtract.isAllZeros() && leftExtract.isAllOnes()), |
755 |
|
false)), |
756 |
|
false)); |
757 |
|
|
758 |
|
return result; |
759 |
|
} |
760 |
|
|
761 |
|
} |
762 |
|
|
763 |
|
#endif |
764 |
|
|