GCC Code Coverage Report
Directory: . Exec Total Coverage
File: build-coverage/deps/include/symfpu/core/add.h Lines: 120 122 98.4 %
Date: 2021-05-22 Branches: 351 570 61.6 %

Line Exec Source
1
/*
2
** Copyright (C) 2018 Martin Brain
3
**
4
** See the file LICENSE for licensing information.
5
*/
6
7
/*
8
** add.h
9
**
10
** Martin Brain
11
** martin.brain@cs.ox.ac.uk
12
** 01/09/14
13
**
14
** Addition of arbitrary precision floats
15
**
16
** The current design is based on a two-path adder but it may be useful to use
17
** more paths.  There are five cases that are of interest:
18
**  1. effective add / very far
19
**     -> set the sticky bit only
20
**  2. effective add / far or near
21
**     -> align and add, realign down if needed
22
**  3. effective sub / very far
23
**     -> decrement, re-normalise and set sticky bits or (dependent on rounding-mode, skip entirely)
24
**  4. effective sub / far
25
**     -> align and subtract, realign up if needed
26
**  5. effective sub / near
27
**     -> align, subtract and normalise up
28
**
29
*/
30
31
32
/*
33
** Ideas
34
**  Optimisation : Collar the exponent difference, convert add to twice the width and thus unify the paths and simplify the shifting.
35
**  Enable the absolute max underapproximation
36
*/
37
38
39
#include "symfpu/core/unpackedFloat.h"
40
#include "symfpu/core/ite.h"
41
#include "symfpu/core/rounder.h"
42
#include "symfpu/core/sign.h"
43
#include "symfpu/core/operations.h"
44
45
46
#ifndef SYMFPU_ADD
47
#define SYMFPU_ADD
48
49
namespace symfpu {
50
51
  // There are a number of variants on how this should be done.
52
  // This is the implementation that handles all of them.
53
  // Below are restricted versions for particular special cases.
54
template <class t>
55
584
  unpackedFloat<t> addAdditionSpecialCasesComplete (const typename t::fpt &format,
56
						    const typename t::rm &roundingMode,
57
						    const unpackedFloat<t> &left,
58
						    const unpackedFloat<t> &leftID,
59
						    const unpackedFloat<t> &right,
60
						    const typename t::prop &returnLeft,
61
						    const typename t::prop &returnRight,
62
						    const unpackedFloat<t> &additionResult,
63
						    const typename t::prop &isAdd) {
64
65
  typedef typename t::prop prop;
66
67
  // NaN
68
618
  prop eitherArgumentNan(left.getNaN() || right.getNaN());
69
618
  prop bothInfinity(left.getInf() && right.getInf());
70
618
  prop signsMatch(left.getSign() == right.getSign());
71
  //prop compatableSigns(ITE(isAdd, signsMatch, !signsMatch));
72
618
  prop compatableSigns(isAdd ^ !signsMatch);
73
74
618
  prop generatesNaN(eitherArgumentNan || (bothInfinity && !compatableSigns));
75
76
77
  // Inf
78
1244
  prop generatesInf((bothInfinity && compatableSigns) ||
79
1856
		    ( left.getInf() && !right.getInf()) ||
80
1018
		    (!left.getInf() &&  right.getInf()));
81
82
618
  prop signOfInf(ITE(left.getInf(), left.getSign(), prop(isAdd ^ !right.getSign())));
83
84
85
  // Zero
86
618
  prop bothZero(left.getZero() && right.getZero());
87
618
  prop flipRightSign(!isAdd ^ right.getSign());
88
652
  prop signOfZero(ITE((roundingMode == t::RTN()),
89
1134
		      left.getSign() || flipRightSign,
90
1134
		      left.getSign() && flipRightSign));
91
92
618
  prop  idLeft(!left.getZero() &&  right.getZero());
93
618
  prop idRight( left.getZero() && !right.getZero());
94
95
  // At most one of idLeft, idRight, generatesNaN, generatesInf and bothZero is true.
96
  // If used in addition additionResult is guaranteed to not be NaN.
97
98
  // Subtle trick : as the input to this will have been rounded it will have
99
  // an ITE with the default values "on top", thus doing the special cases
100
  // first (inner) rather than last (outer) allows them to be compacted better
101
1100
  return ITE(idRight || returnRight,
102
	     ITE(isAdd,
103
		 right,
104
		 negate(format, right)),
105
1100
	     ITE(idLeft || returnLeft,
106
		 leftID,
107
		 ITE(generatesNaN,
108
		     unpackedFloat<t>::makeNaN(format),
109
		     ITE(generatesInf,
110
			 unpackedFloat<t>::makeInf(format, signOfInf),
111
			 ITE(bothZero,
112
			     unpackedFloat<t>::makeZero(format, signOfZero),
113
2818
			     additionResult)))));
114
 }
115
116
117
  // leftID is the value returned in the idLeft case (i.e. when left is not a
118
  // special number and right is zero).  This is needed by FMA as the flags
119
  // for left and leftID are computed differently and need to be handled differently.
120
template <class t>
121
  unpackedFloat<t> addAdditionSpecialCasesWithID (const typename t::fpt &format,
122
						  const typename t::rm &roundingMode,
123
						  const unpackedFloat<t> &left,
124
						  const unpackedFloat<t> &leftID,
125
						  const unpackedFloat<t> &right,
126
						  const unpackedFloat<t> &additionResult,
127
						  const typename t::prop &isAdd) {
128
  return addAdditionSpecialCasesComplete<t>(format, roundingMode, left, leftID,
129
					    right, typename t::prop(false), typename t::prop(false),
130
					    additionResult, isAdd);
131
  }
132
133
134
  // This is the usual case; use this one!
135
  template <class t>
136
584
  unpackedFloat<t> addAdditionSpecialCases (const typename t::fpt &format,
137
					    const typename t::rm &roundingMode,
138
					    const unpackedFloat<t> &left,
139
					    const unpackedFloat<t> &right,
140
					    const unpackedFloat<t> &additionResult,
141
					    const typename t::prop &isAdd) {
142
    return addAdditionSpecialCasesComplete<t>(format, roundingMode, left, left,
143
					      right, typename t::prop(false), typename t::prop(false),
144
584
					      additionResult, isAdd);
145
  }
146
147
148
  // As above but allows the (very) far path to be accelerated
149
  template <class t>
150
  unpackedFloat<t> addAdditionSpecialCasesWithBypass (const typename t::fpt &format,
151
						      const typename t::rm &roundingMode,
152
						      const unpackedFloat<t> &left,
153
						      const unpackedFloat<t> &right,
154
						      const typename t::prop &returnLeft,
155
						      const typename t::prop &returnRight,
156
						      const unpackedFloat<t> &additionResult,
157
						      const typename t::prop &isAdd) {
158
    return addAdditionSpecialCasesComplete<t>(format, roundingMode, left, left,
159
					      right, returnLeft, returnRight,
160
					      additionResult, isAdd);
161
  }
162
163
164
165
  /* Computes the normal / subnormal case only.
166
   * This allows multiple versions of the first phase to be used and
167
   * the first phase to be used for other things (e.g. FMA).
168
   */
169
170
template <class t>
171
584
struct exponentCompareInfo {
172
  typedef typename t::sbv sbv;
173
  typedef typename t::prop prop;
174
175
  prop leftIsMax;
176
  sbv maxExponent;
177
  sbv absoluteExponentDifference;
178
  prop diffIsZero;
179
  prop diffIsOne;
180
  prop diffIsGreaterThanPrecision;
181
  prop diffIsTwoToPrecision;
182
  prop diffIsGreaterThanPrecisionPlusOne;
183
184
584
  exponentCompareInfo(const prop &lil, const sbv &me, const sbv &aed,
185
		      const prop &diz, const prop &dio, const prop &digtp, const prop &dittp, const prop &disgtppo) :
186
    leftIsMax(lil), maxExponent(me), absoluteExponentDifference(aed),
187
584
      diffIsZero(diz), diffIsOne(dio), diffIsGreaterThanPrecision(digtp), diffIsTwoToPrecision(dittp), diffIsGreaterThanPrecisionPlusOne(disgtppo) {}
188
189
  exponentCompareInfo(const exponentCompareInfo<t> &old) :
190
    leftIsMax(old.leftIsMax), maxExponent(old.maxExponent), absoluteExponentDifference(old.absoluteExponentDifference),
191
    diffIsZero(old.diffIsZero), diffIsOne(old.diffIsOne),
192
    diffIsGreaterThanPrecision(old.diffIsGreaterThanPrecision),
193
    diffIsTwoToPrecision(old.diffIsTwoToPrecision),
194
    diffIsGreaterThanPrecisionPlusOne(old.diffIsGreaterThanPrecisionPlusOne) {}
195
};
196
197
template <class t>
198
584
  exponentCompareInfo<t> addExponentCompare(const typename t::bwt exponentWidth,
199
					    const typename t::bwt significandWidth,
200
					    const typename t::sbv &leftExponent,
201
					    const typename t::sbv &rightExponent,
202
					    const typename t::prop &knownInCorrectOrder) {
203
584
  PRECONDITION( leftExponent.getWidth() + 1 == exponentWidth);
204
584
  PRECONDITION(rightExponent.getWidth() + 1 == exponentWidth);
205
206
   typedef typename t::prop prop;
207
   typedef typename t::sbv sbv;
208
209
#if 0
210
   // The obvious implementation
211
212
   // Compute exponent distance
213
   sbv maxExponent(max<t,sbv>(leftExponent.extend(1), rightExponent.extend(1)));
214
   sbv minExponent(min<t,sbv>(leftExponent.extend(1), rightExponent.extend(1)));
215
   sbv absoluteExponentDifference(maxExponent - minExponent);
216
217
   prop leftIsMax(knownInCorrectOrder || leftExponent.extend(1) == maxExponent);
218
#else
219
220
   // A better implementation(?)
221
1168
   sbv exponentDifference(leftExponent.extend(1) - rightExponent.extend(1));
222
223
618
   prop signBit(exponentDifference.toUnsigned().extract(exponentWidth - 1, exponentWidth - 1).isAllOnes());
224
618
   prop leftIsMax(knownInCorrectOrder || !signBit);
225
226
1168
   sbv maxExponent(ITE(leftIsMax, leftExponent.extend(1), rightExponent.extend(1)));
227
1168
   sbv absoluteExponentDifference(ITE(leftIsMax, exponentDifference, exponentDifference.modularNegate()));  // Largest negative value not obtainable so negate is safe
228
#endif
229
230
584
   INVARIANT(sbv::zero(exponentWidth) <= absoluteExponentDifference);
231
232
   // Optimisation : compact these comparisons at the bit-level
233
618
   prop diffIsZero(absoluteExponentDifference == sbv::zero(exponentWidth));
234
618
   prop diffIsOne(absoluteExponentDifference == sbv::one(exponentWidth));
235
618
   prop diffIsGreaterThanPrecision(sbv(exponentWidth, significandWidth) < absoluteExponentDifference);  // Assumes this is representable
236
618
   prop diffIsTwoToPrecision(!diffIsZero && !diffIsOne && !diffIsGreaterThanPrecision);
237
618
   prop diffIsGreaterThanPrecisionPlusOne(sbv(exponentWidth, significandWidth + 1) < absoluteExponentDifference);
238
239
584
   probabilityAnnotation<t,prop>(diffIsZero, UNLIKELY);
240
584
   probabilityAnnotation<t,prop>(diffIsOne, UNLIKELY);
241
584
   probabilityAnnotation<t,prop>(diffIsGreaterThanPrecision, LIKELY);  // In proving if not execution
242
584
   probabilityAnnotation<t,prop>(diffIsGreaterThanPrecisionPlusOne, LIKELY);  // In proving if not execution
243
244
245
   return exponentCompareInfo<t>(leftIsMax,
246
				 maxExponent, absoluteExponentDifference,
247
1168
				 diffIsZero, diffIsOne, diffIsGreaterThanPrecision, diffIsTwoToPrecision, diffIsGreaterThanPrecisionPlusOne);
248
 }
249
250
251
252
// Note that the arithmetic part of add needs the rounding mode.
253
// This is an oddity due to the way that the sign of zero is generated.
254
255
 template <class t>
256
584
 struct floatWithCustomRounderInfo {
257
   unpackedFloat<t> uf;
258
   customRounderInfo<t> known;
259
260
584
 floatWithCustomRounderInfo(const unpackedFloat<t> &_uf, const customRounderInfo<t> &_known) : uf(_uf), known(_known) {}
261
 floatWithCustomRounderInfo(const floatWithCustomRounderInfo<t> &old) : uf(old.uf), known(old.known) {}
262
 };
263
264
 template <class t>
265
584
   floatWithCustomRounderInfo<t> arithmeticAdd (const typename t::fpt &format,
266
						const typename t::rm &roundingMode,
267
						const unpackedFloat<t> &left,
268
						const unpackedFloat<t> &right,
269
						const typename t::prop &isAdd,
270
						const typename t::prop &knownInCorrectOrder,
271
						const exponentCompareInfo<t> &ec) {
272
273
   typedef typename t::bwt bwt;
274
   typedef typename t::fpt fpt;
275
   typedef typename t::prop prop;
276
   typedef typename t::ubv ubv;
277
   typedef typename t::sbv sbv;
278
279
584
   PRECONDITION(left.valid(format));
280
584
   PRECONDITION(right.valid(format));
281
282
   // Work out if an effective subtraction
283
618
   prop effectiveAdd((left.getSign() ^ right.getSign()) ^ isAdd);
284
285
584
   bwt exponentWidth(left.getExponent().getWidth() + 1);
286
584
   bwt significandWidth(left.getSignificand().getWidth());
287
288
   /* Exponent difference and effective add implies a large amount about the output exponent and flags
289
   ** R denotes that this is possible via rounding up and incrementing the exponent
290
   **
291
   ** Case       A. max(l,r) + 1,    B. max(l,r)    C. max(l,r) - 1      D. max(l,r) - k       E. zero
292
   ** Eff. Add      Y                   Y
293
   **  diff = 0     Y, sticky 0
294
   **  diff = 1     Y, sticky 0, R      Y, sticky 0
295
   **  diff : [2,p] decreasing prob., R Y
296
   **  diff > p     R                   Y
297
   **
298
   ** Eff. Sub                          Y              Y                    Y, exact              Y, exact
299
   **  diff = 0                                        Y, exact             prob. drop with k     low prob.
300
   **  diff = 1                         Y, sticky 0    Y, exact             prob. drop with k
301
   **  diff : [2,p]                     Y, R           decreasing prob.
302
   **  diff > p                         Y, R           low prob.
303
   **
304
   */
305
   // Optimisation : add 'bypass' invariants that link the final exponent to the input using this.
306
307
308
   // Rounder flags
309
618
   prop noOverflow(!effectiveAdd);
310
618
   prop noUnderflow(true);
311
   //prop exact(); // Need to see if it cancels, see below
312
618
   prop subnormalExact(true);
313
1452
   prop noSignificandOverflow((effectiveAdd && ec.diffIsZero) ||
314
758
			      (!effectiveAdd && (ec.diffIsZero || ec.diffIsOne)));
315
316
618
   prop stickyBitIsZero(ec.diffIsZero || ec.diffIsOne);
317
318
319
   // Work out ordering
320
1585
   prop leftLarger(knownInCorrectOrder ||
321
992
		   (ec.leftIsMax &&
322
992
		    ITE(!ec.diffIsZero,
323
			prop(true),
324
1026
			left.getSignificand() >= right.getSignificand())));
325
   // Optimisation : can we avoid this comparison completely and allow the result to be negative?
326
   // This may be hard as a compare is cheaper than a negate after, particularly as there has to be an ITE here
327
328
   // Extend the significands to give room for carry plus guard and sticky bits
329
1168
   ubv lsig((ITE(leftLarger, left.getSignificand(), right.getSignificand())).extend(1).append(ubv::zero(2)));
330
1168
   ubv ssig((ITE(leftLarger, right.getSignificand(), left.getSignificand())).extend(1).append(ubv::zero(2)));
331
332
618
   prop resultSign(ITE(leftLarger,
333
		       left.getSign(),
334
1134
		       prop(!isAdd ^ right.getSign())));
335
336
   // Extended so no info lost, negate before shift so that sign-extension works
337
1168
   ubv negatedSmaller(conditionalNegate<t,ubv,prop>(!effectiveAdd, ssig));
338
339
1168
   ubv shiftAmount(ec.absoluteExponentDifference.toUnsigned() // Safe as >= 0
340
		   .resize(negatedSmaller.getWidth()));  // Safe as long as the significand has more bits than the exponent
341
584
   INVARIANT(exponentWidth <= significandWidth);
342
343
344
   // Shift the smaller significand
345
1168
   stickyRightShiftResult<t> shifted(stickyRightShift<t>(negatedSmaller, shiftAmount));
346
347
2336
   ubv negatedAlignedSmaller(ITE(ec.diffIsGreaterThanPrecisionPlusOne, // Fast path the common case, +1 to avoid issues with the guard bit
348
				 ITE(effectiveAdd,
349
1168
				      ubv::zero(negatedSmaller.getWidth()),
350
1168
				     ~ubv::zero(negatedSmaller.getWidth())),
351
				 shifted.signExtendedResult));
352
1168
   ubv shiftedStickyBit(ITE(ec.diffIsGreaterThanPrecision,
353
1168
			    ubv::one(negatedSmaller.getWidth()),
354
			    shifted.stickyBit));  // Have to separate otherwise align up may convert it to the guard bit
355
356
357
   // Sum and re-align
358
1168
   ubv sum(lsig.modularAdd(negatedAlignedSmaller));
359
360
584
   bwt sumWidth(sum.getWidth());
361
1168
   ubv topBit(sum.extract(sumWidth - 1, sumWidth - 1));
362
1168
   ubv alignedBit(sum.extract(sumWidth - 2, sumWidth - 2));
363
1168
   ubv lowerBit(sum.extract(sumWidth - 3, sumWidth - 3));
364
365
366
618
   prop overflow(!(topBit.isAllZeros()));
367
618
   prop cancel(topBit.isAllZeros() && alignedBit.isAllZeros());
368
618
   prop minorCancel(cancel && lowerBit.isAllOnes());
369
618
   prop majorCancel(cancel && lowerBit.isAllZeros());
370
618
   prop fullCancel(majorCancel && sum.isAllZeros());
371
372
584
   probabilityAnnotation<t,prop>(overflow, UNLIKELY);
373
584
   probabilityAnnotation<t,prop>(cancel, UNLIKELY);
374
584
   probabilityAnnotation<t,prop>(minorCancel, UNLIKELY);
375
584
   probabilityAnnotation<t,prop>(majorCancel, VERYUNLIKELY);
376
584
   probabilityAnnotation<t,prop>(fullCancel, VERYUNLIKELY);
377
378
584
   INVARIANT(IMPLIES(effectiveAdd && ec.diffIsZero, overflow));
379
584
   INVARIANT(IMPLIES(overflow, effectiveAdd && (!ec.diffIsGreaterThanPrecision))); // That case can only overflow by rounding
380
584
   INVARIANT(IMPLIES(cancel, !effectiveAdd));
381
584
   INVARIANT(IMPLIES(majorCancel, ec.diffIsZero || ec.diffIsOne));
382
383
584
   probabilityAnnotation<t,prop>(overflow && ec.diffIsTwoToPrecision, UNLIKELY);
384
584
   probabilityAnnotation<t,prop>(cancel && ec.diffIsTwoToPrecision, UNLIKELY);
385
584
   probabilityAnnotation<t,prop>(cancel && ec.diffIsGreaterThanPrecision, VERYUNLIKELY);
386
387
618
   prop exact(cancel && (ec.diffIsZero || ec.diffIsOne)); // For completeness
388
389
1168
   ubv alignedSum(conditionalLeftShiftOne<t,ubv,prop>(minorCancel,
390
						      conditionalRightShiftOne<t,ubv,prop>(overflow, sum)));
391
392
1168
   sbv exponentCorrectionTerm(ITE(minorCancel,
393
				  -sbv::one(exponentWidth),
394
				  ITE(overflow,
395
				      sbv::one(exponentWidth),
396
				      sbv::zero(exponentWidth))));
397
398
1168
   sbv correctedExponent(ec.maxExponent + exponentCorrectionTerm); // Safe due to extension
399
400
   // Watch closely...
401
4020
   ubv stickyBit(ITE(stickyBitIsZero || majorCancel,
402
1168
		     ubv::zero(alignedSum.getWidth()),
403
1684
		     (shiftedStickyBit | ITE(!overflow, ubv::zero(1), sum.extract(0,0)).extend(alignedSum.getWidth() - 1))));
404
405
   // We return something in an extended format
406
   //  *. One extra exponent bit to deal with the 'overflow' case
407
   //  *. Two extra significand bits for the guard and sticky bits
408
584
   fpt extendedFormat(format.exponentWidth() + 1, format.significandWidth() + 2);
409
410
   // Put it back together
411
1168
   unpackedFloat<t> sumResult(extendedFormat, resultSign, correctedExponent, (alignedSum | stickyBit).contract(1));
412
413
   // Deal with the major cancellation case
414
   // It would be nice to use normaliseUpDetectZero but the sign
415
   // of the zero depends on the rounding mode.
416
2268
   unpackedFloat<t> additionResult(ITE(fullCancel,
417
1100
				       unpackedFloat<t>::makeZero(extendedFormat, roundingMode == t::RTN()),
418
				       ITE(majorCancel,
419
					   sumResult.normaliseUp(extendedFormat),
420
					   sumResult)));
421
422
   // Some thought is required here to convince yourself that
423
   // there will be no subnormal values that violate this.
424
   // See 'all subnormals generated by addition are exact'
425
   // and the extended exponent.
426
584
   POSTCONDITION(additionResult.valid(extendedFormat));
427
428
1168
   return floatWithCustomRounderInfo<t>(additionResult, customRounderInfo<t>(noOverflow, noUnderflow, exact, subnormalExact, noSignificandOverflow));
429
 }
430
431
 template <class t>
432
   unpackedFloat<t> dualPathArithmeticAdd (const typename t::fpt &format,
433
					   const typename t::rm &roundingMode,
434
					   const unpackedFloat<t> &left,
435
					   const unpackedFloat<t> &right,
436
					   const typename t::prop &isAdd) {
437
438
   typedef typename t::bwt bwt;
439
   typedef typename t::fpt fpt;
440
   typedef typename t::prop prop;
441
   typedef typename t::ubv ubv;
442
   typedef typename t::sbv sbv;
443
444
   PRECONDITION(left.valid(format));
445
   PRECONDITION(right.valid(format));
446
447
   // We return something in an extended format
448
   //  *. One extra exponent bit to deal with the 'overflow' case
449
   //  *. Two extra significand bits for the guard and sticky bits
450
   fpt extendedFormat(format.exponentWidth() + 1, format.significandWidth() + 2);
451
452
453
   // Compute exponent difference and swap the two arguments if needed
454
   sbv initialExponentDifference(expandingSubtract<t>(left.getExponent(), right.getExponent()));
455
   bwt edWidth(initialExponentDifference.getWidth());
456
   sbv edWidthZero(sbv::zero(edWidth));
457
   prop orderingCorrect( initialExponentDifference >  edWidthZero ||
458
			(initialExponentDifference == edWidthZero &&
459
			 left.getSignificand() >= right.getSignificand()));
460
461
   unpackedFloat<t> larger(ITE(orderingCorrect, left, right));
462
   unpackedFloat<t> smaller(ITE(orderingCorrect, right, left));
463
   sbv exponentDifference(ITE(orderingCorrect,
464
			      initialExponentDifference,
465
			      -initialExponentDifference));
466
467
   prop resultSign(ITE(orderingCorrect,
468
		       left.getSign(),
469
		       prop(!isAdd ^ right.getSign())));
470
471
472
   // Work out if an effective subtraction
473
   prop effectiveAdd(larger.getSign() ^ smaller.getSign() ^ isAdd);
474
475
476
   // Extend the significands to give room for carry plus guard and sticky bits
477
   ubv lsig( larger.getSignificand().extend(1).append(ubv::zero(2)));
478
   ubv ssig(smaller.getSignificand().extend(1).append(ubv::zero(2)));
479
480
481
   // This is a two-path adder, so determine which of the two paths to use
482
   // The near path is only needed for things that can cancel more than one bit
483
   prop farPath(exponentDifference > sbv::one(edWidth) || effectiveAdd);
484
485
486
   // Far path : Align
487
   ubv negatedSmaller(ITE(effectiveAdd, ssig, ssig.modularNegate())); // Extended so no info lost
488
                                                                      // Negate before shift so that sign-extension works
489
490
   sbv significandWidth(edWidth, lsig.getWidth());
491
   prop noOverlap(exponentDifference > significandWidth);
492
493
   ubv shiftAmount(exponentDifference.toUnsigned() // Safe as >= 0
494
		   .resize(ssig.getWidth()));      // This looses information but the case in which it does is handles by noOverlap
495
496
497
   ubv negatedAlignedSmaller(negatedSmaller.signExtendRightShift(shiftAmount));
498
   ubv shiftedStickyBit(rightShiftStickyBit<t>(negatedSmaller, shiftAmount));  // Have to separate otherwise align up may convert it to the guard bit
499
500
   // Far path : Sum and re-align
501
   ubv sum(lsig.modularAdd(negatedAlignedSmaller));
502
503
   bwt sumWidth(sum.getWidth());
504
   ubv topBit(sum.extract(sumWidth - 1, sumWidth - 1));
505
   ubv centerBit(sum.extract(sumWidth - 2, sumWidth - 2));
506
507
   prop noOverflow(topBit.isAllZeros()); // Only correct if effectiveAdd is set
508
   prop noCancel(centerBit.isAllOnes());
509
510
511
512
   // TODO : Add invariants
513
514
   ubv alignedSum(ITE(effectiveAdd,
515
		      ITE(noOverflow,
516
			  sum,
517
			  (sum >> ubv::one(sumWidth)) | (sum & ubv::one(sumWidth))),  // Cheap sticky right shift
518
		      ITE(noCancel,
519
			  sum,
520
			  sum.modularLeftShift(ubv::one(sumWidth))))); // In the case when this looses data, the result is not used
521
522
   sbv extendedLargerExponent(larger.getExponent().extend(1));  // So that increment and decrement don't overflow
523
   sbv correctedExponent(ITE(effectiveAdd,
524
			     ITE(noOverflow,
525
				 extendedLargerExponent,
526
				 extendedLargerExponent.increment()),
527
			     ITE(noCancel,
528
				 extendedLargerExponent,
529
				 extendedLargerExponent.decrement())));
530
531
   // Far path : Construct result
532
   unpackedFloat<t> farPathResult(extendedFormat, resultSign, correctedExponent, (alignedSum | shiftedStickyBit).contract(1));
533
534
535
536
537
   // Near path : Align
538
   prop exponentDifferenceAllZeros(exponentDifference.isAllZeros());
539
   ubv nearAlignedSmaller(ITE(exponentDifferenceAllZeros, ssig, ssig >> ubv::one(ssig.getWidth())));
540
541
542
   // Near path : Sum and realign
543
   ubv nearSum(lsig - nearAlignedSmaller);
544
   // Optimisation : the two paths can be merged up to here to give a pseudo-two path encoding
545
546
   prop fullCancel(nearSum.isAllZeros());
547
   prop nearNoCancel(nearSum.extract(sumWidth - 2, sumWidth - 2).isAllOnes());
548
549
   ubv choppedNearSum(nearSum.extract(sumWidth - 3,1)); // In the case this is used, cut bits are all 0
550
   unpackedFloat<t> cancellation(extendedFormat,
551
				 resultSign,
552
				 larger.getExponent().decrement(),
553
				 choppedNearSum);
554
555
556
   // Near path : Construct result
557
   unpackedFloat<t> nearPathResult(extendedFormat, resultSign, extendedLargerExponent, nearSum.contract(1));
558
559
560
561
   // Bring the paths together
562
   // Optimisation : fix the noOverlap / very far path for directed rounding modes
563
   unpackedFloat<t> additionResult(ITE(farPath,
564
				       /* ITE(noOverlap,
565
					   ITE((isAdd || orderingCorrect),
566
					       larger,
567
					       negate(format, larger)),
568
					       farPathResult), */
569
				       farPathResult,
570
				       ITE(fullCancel,
571
					   unpackedFloat<t>::makeZero(extendedFormat, roundingMode == t::RTN()),
572
					   ITE(nearNoCancel,
573
					       nearPathResult,
574
					       cancellation.normaliseUp(format).extend(1,2)))));
575
576
   // Some thought is required here to convince yourself that
577
   // there will be no subnormal values that violate this.
578
   // See 'all subnormals generated by addition are exact'
579
   // and the extended exponent.
580
   POSTCONDITION(additionResult.valid(extendedFormat));
581
582
   return additionResult;
583
 }
584
585
586
587
588
 template <class t>
589
   unpackedFloat<t> dualPathAdd (const typename t::fpt &format,
590
				 const typename t::rm &roundingMode,
591
				 const unpackedFloat<t> &left,
592
				 const unpackedFloat<t> &right,
593
				 const typename t::prop &isAdd) {
594
595
   PRECONDITION(left.valid(format));
596
   PRECONDITION(right.valid(format));
597
598
   unpackedFloat<t> additionResult(dualPathArithmeticAdd(format, roundingMode, left, right, isAdd));
599
600
   unpackedFloat<t> roundedAdditionResult(rounder(format, roundingMode, additionResult));
601
602
   unpackedFloat<t> result(addAdditionSpecialCases(format, roundingMode, left, right, roundedAdditionResult, isAdd));
603
604
   POSTCONDITION(result.valid(format));
605
606
   return result;
607
 }
608
609
template <class t>
610
584
   unpackedFloat<t> add (const typename t::fpt &format,
611
			 const typename t::rm &roundingMode,
612
			 const unpackedFloat<t> &left,
613
			 const unpackedFloat<t> &right,
614
			 const typename t::prop &isAdd) {
615
616
   //typedef typename t::bwt bwt;
617
   typedef typename t::prop prop;
618
   //typedef typename t::ubv ubv;
619
   //typedef typename t::sbv sbv;
620
621
584
   PRECONDITION(left.valid(format));
622
584
   PRECONDITION(right.valid(format));
623
624
   // Optimisation : add a flag which assumes that left and right are in the correct order
625
618
   prop knownInCorrectOrder(false);
626
627
1168
   exponentCompareInfo<t> ec(addExponentCompare<t>(left.getExponent().getWidth() + 1, left.getSignificand().getWidth(),
628
						   left.getExponent(), right.getExponent(), knownInCorrectOrder));
629
630
1168
   floatWithCustomRounderInfo<t> additionResult(arithmeticAdd(format, roundingMode, left, right, isAdd, knownInCorrectOrder, ec));
631
632
1168
   unpackedFloat<t> roundedAdditionResult(customRounder(format, roundingMode, additionResult.uf, additionResult.known));
633
634
584
   unpackedFloat<t> result(addAdditionSpecialCases(format, roundingMode, left, right, roundedAdditionResult, isAdd));
635
636
584
   POSTCONDITION(result.valid(format));
637
638
1168
   return result;
639
 }
640
641
 template <class t>
642
   unpackedFloat<t> addWithBypass (const typename t::fpt &format,
643
				   const typename t::rm &roundingMode,
644
				   const unpackedFloat<t> &left,
645
				   const unpackedFloat<t> &right,
646
				   const typename t::prop &isAdd) {
647
648
   //typedef typename t::bwt bwt;
649
   typedef typename t::prop prop;
650
   //typedef typename t::ubv ubv;
651
   //typedef typename t::sbv sbv;
652
653
   PRECONDITION(left.valid(format));
654
   PRECONDITION(right.valid(format));
655
656
   // Optimisation : add a flag which assumes that left and right are in the correct order
657
   prop knownInCorrectOrder(false);
658
659
660
   exponentCompareInfo<t> ec(addExponentCompare<t>(left.getExponent().getWidth() + 1, left.getSignificand().getWidth(),
661
						   left.getExponent(), right.getExponent(), knownInCorrectOrder));
662
663
   floatWithCustomRounderInfo<t> additionResult(arithmeticAdd(format, roundingMode, left, right, isAdd, knownInCorrectOrder, ec));
664
665
   unpackedFloat<t> roundedAdditionResult(customRounder(format, roundingMode, additionResult.uf, additionResult.known));
666
667
   // In the "very far path", i.e. exponent difference is greater than significand length + 1
668
   // addition becomes max(left,right) or max(left,right)+/-1 ULP.  This is rare in execution
669
   // (to the point of being a software quality issue) but common in theorem proving.
670
   // Given we have to have cases for "return left" and "return right" to handle zeros,
671
   // we might as well make use of these cases to handle when addition behaves like max...
672
   // Note that this is possible but more complex with just diffIsGreaterThanPrecision.
673
674
   prop enableBypass(ec.diffIsGreaterThanPrecisionPlusOne &&
675
		      !left.getNaN() &&  !left.getInf() &&  !left.getZero() && // Handle this as special cases
676
		     !right.getNaN() && !right.getInf() && !right.getZero());
677
678
679
   // Duplication but easier to recompute than to pass
680
   prop effectiveAdd((left.getSign() ^ right.getSign()) ^ isAdd);
681
   prop resultSign(ITE((knownInCorrectOrder || ec.leftIsMax),  // CAUTION : only true in the enableBypass case!
682
		       left.getSign(),
683
		       prop(!isAdd ^ right.getSign())));
684
685
   prop significandEven(true);  // This is an optimisation that assumes only RNE uses this bit
686
                                // This needs to be changed to implement things like roundToOdd
687
                                // or for the diffIsGreaterThanPrecision case.
688
   prop farRoundUp(roundingDecision<t>(roundingMode, resultSign, significandEven, !effectiveAdd, prop(true), prop(false)));
689
690
   // Returns left or right unchanged if adding and rounded down or subtracting and rounded up
691
   prop roundInCorrectDirection(effectiveAdd ^ farRoundUp);
692
693
   prop  returnLeft(enableBypass &&  ec.leftIsMax && roundInCorrectDirection);
694
   prop returnRight(enableBypass && !ec.leftIsMax && roundInCorrectDirection);
695
696
   unpackedFloat<t> result(addAdditionSpecialCasesWithBypass(format, roundingMode, left, right, returnLeft, returnRight, roundedAdditionResult, isAdd));
697
698
   POSTCONDITION(result.valid(format));
699
700
   return result;
701
 }
702
703
704
 // True if and only if adding these would result in a catastrophic cancellation
705
 // I.E. if the addition cancells out cancelAmount or more MSBs leaving only LSBs
706
 template <class t>
707
   typename t::prop isCatastrophicCancellation (const typename t::fpt &format,
708
						const unpackedFloat<t> &left,
709
						const unpackedFloat<t> &right,
710
						const typename t::bwt &cancelAmount,
711
						const typename t::prop &isAdd) {
712
713
   typedef typename t::bwt bwt;
714
   typedef typename t::prop prop;
715
   typedef typename t::ubv ubv;
716
   //typedef typename t::sbv sbv;
717
718
   PRECONDITION(left.valid(format));
719
   PRECONDITION(right.valid(format));
720
   PRECONDITION(cancelAmount >= 2);   // cancel 0 is not meaningful
721
                                      // cancel 1 is common on subtract and arguably not an error
722
   PRECONDITION(cancelAmount <= format.significandWidth());  // can't cancel more than you have
723
724
   // 1. It has to be an effective subtraction
725
   // Duplication but easier to recompute than to pass
726
   prop effectiveAdd((left.getSign() ^ right.getSign()) ^ isAdd);
727
728
   // 2. It must be either normal or subnormal numbers
729
   prop leftSpecial(left.getNaN() || left.getInf() || left.getZero());
730
   prop rightSpecial(right.getNaN() || right.getInf() || right.getZero());
731
732
   // 3.A. exponents are equal and so are the leading cancelAmount bits
733
   // 3.B. exponent diff is one and the smaller one is 11111, larger one is 10000
734
735
   // Optimisation : add a flag which assumes that left and right are in the correct order
736
   prop knownInCorrectOrder(false);
737
   exponentCompareInfo<t> ec(addExponentCompare<t>(left.getExponent().getWidth() + 1, left.getSignificand().getWidth(),
738
						   left.getExponent(), right.getExponent(), knownInCorrectOrder));
739
740
   // Can ignore the MSB of the significand as by invariants this is always 1
741
   bwt significandWidth(format.significandWidth());
742
   bwt topBit(significandWidth - 2);
743
   bwt bottomBit(significandWidth - cancelAmount);
744
745
   ubv leftExtract(left.getSignificand.extract(topBit, bottomBit));
746
   ubv rightExtract(right.getSignificand.extract(topBit, bottomBit));
747
748
   prop result(ITE(!effectiveAdd && !leftSpecial && !rightSpecial,
749
		   ITE(ec.diffIsZero,
750
		       leftExtract == rightExtract,
751
		       ITE(ec.diffIsOne,
752
			   ITE(ec.leftIsMax,
753
			        leftExtract.isAllZeros() && rightExtract.isAllOnes(),
754
			       rightExtract.isAllZeros() &&  leftExtract.isAllOnes()),
755
			   false)),
756
		   false));
757
758
   return result;
759
 }
760
761
}
762
763
#endif
764