1 |
|
/* |
2 |
|
** Copyright (C) 2018 Martin Brain |
3 |
|
** |
4 |
|
** See the file LICENSE for licensing information. |
5 |
|
*/ |
6 |
|
|
7 |
|
/* |
8 |
|
** fma.h |
9 |
|
** |
10 |
|
** Martin Brain |
11 |
|
** martin.brain@cs.ox.ac.uk |
12 |
|
** 20/05/15 |
13 |
|
** |
14 |
|
** Fused multiply and add : |
15 |
|
** fma(R,A,B,C) = round(R, A * B + C) |
16 |
|
** |
17 |
|
*/ |
18 |
|
|
19 |
|
|
20 |
|
#include "symfpu/core/unpackedFloat.h" |
21 |
|
#include "symfpu/core/ite.h" |
22 |
|
#include "symfpu/core/rounder.h" |
23 |
|
#include "symfpu/core/multiply.h" |
24 |
|
#include "symfpu/core/convert.h" |
25 |
|
#include "symfpu/core/add.h" |
26 |
|
|
27 |
|
|
28 |
|
#ifndef SYMFPU_FMA |
29 |
|
#define SYMFPU_FMA |
30 |
|
|
31 |
|
namespace symfpu { |
32 |
|
|
33 |
|
template <class t> |
34 |
|
unpackedFloat<t> fma (const typename t::fpt &format, |
35 |
|
const typename t::rm &roundingMode, |
36 |
|
const unpackedFloat<t> &leftMultiply, |
37 |
|
const unpackedFloat<t> &rightMultiply, |
38 |
|
const unpackedFloat<t> &addArgument) { |
39 |
|
|
40 |
|
// typedef typename t::bwt bwt; |
41 |
|
typedef typename t::prop prop; |
42 |
|
//typedef typename t::ubv ubv; |
43 |
|
//typedef typename t::sbv sbv; |
44 |
|
typedef typename t::fpt fpt; |
45 |
|
|
46 |
|
PRECONDITION(leftMultiply.valid(format)); |
47 |
|
PRECONDITION(rightMultiply.valid(format)); |
48 |
|
PRECONDITION(addArgument.valid(format)); |
49 |
|
|
50 |
|
/* First multiply */ |
51 |
|
unpackedFloat<t> arithmeticMultiplyResult(arithmeticMultiply(format, leftMultiply, rightMultiply)); |
52 |
|
|
53 |
|
fpt extendedFormat(format.exponentWidth() + 1, format.significandWidth() * 2); |
54 |
|
INVARIANT(arithmeticMultiplyResult.valid(extendedFormat)); |
55 |
|
|
56 |
|
|
57 |
|
|
58 |
|
/* Then add */ |
59 |
|
|
60 |
|
// Rounding mode doesn't matter as this is a strict extension |
61 |
|
unpackedFloat<t> extendedAddArgument(convertFloatToFloat(format, extendedFormat, t::RTZ(), addArgument)); |
62 |
|
|
63 |
|
prop knownInCorrectOrder(false); |
64 |
|
exponentCompareInfo<t> ec(addExponentCompare<t>(arithmeticMultiplyResult.getExponent().getWidth() + 1, |
65 |
|
arithmeticMultiplyResult.getSignificand().getWidth(), |
66 |
|
arithmeticMultiplyResult.getExponent(), |
67 |
|
extendedAddArgument.getExponent(), |
68 |
|
knownInCorrectOrder)); |
69 |
|
|
70 |
|
unpackedFloat<t> additionResult(arithmeticAdd(extendedFormat, roundingMode, arithmeticMultiplyResult, extendedAddArgument, prop(true), knownInCorrectOrder, ec).uf); |
71 |
|
// Custom rounder flags are ignored as they are not applicable in this case |
72 |
|
|
73 |
|
fpt evenMoreExtendedFormat(extendedFormat.exponentWidth() + 1, extendedFormat.significandWidth() + 2); |
74 |
|
INVARIANT(additionResult.valid(evenMoreExtendedFormat)); |
75 |
|
|
76 |
|
|
77 |
|
/* Then round */ |
78 |
|
|
79 |
|
unpackedFloat<t> roundedResult(rounder(format, roundingMode, additionResult)); |
80 |
|
INVARIANT(roundedResult.valid(format)); |
81 |
|
|
82 |
|
// This result is correct as long as neither of multiplyResult or extendedAddArgument is |
83 |
|
// 0, Inf or NaN. Note that roundedResult may be zero from cancelation or underflow |
84 |
|
// or infinity due to rounding. If it is, it has the correct sign. |
85 |
|
|
86 |
|
|
87 |
|
|
88 |
|
/* Finally, the special cases */ |
89 |
|
|
90 |
|
// One disadvantage to having a flag for zero and default exponents and significands for zero |
91 |
|
// that are not (min, 0) is that the x + (+/-)0 case has to be handled by the addition special cases. |
92 |
|
// This means that you need the value of x, rounded to the correct format. |
93 |
|
// arithmeticMultiplyResult is in extended format, thus we have to use a second rounder just for this case. |
94 |
|
// It is not zero, inf or NaN so it only matters when addArgument is zero when it would be returned. |
95 |
|
unpackedFloat<t> roundedMultiplyResult(rounder(format, roundingMode, arithmeticMultiplyResult)); |
96 |
|
|
97 |
|
unpackedFloat<t> fullMultiplyResult(addMultiplySpecialCases(format, leftMultiply, rightMultiply, roundedMultiplyResult.getSign(), roundedMultiplyResult)); |
98 |
|
|
99 |
|
|
100 |
|
// We need the flags from the multiply special cases, determined on the arithemtic result, |
101 |
|
// i.e. handling special values and not the underflow / overflow of the result. |
102 |
|
// But we will use roundedMultiplyResult instead of the value so ... |
103 |
|
unpackedFloat<t> dummyZero(unpackedFloat<t>::makeZero(format, prop(false))); |
104 |
|
unpackedFloat<t> dummyValue(dummyZero.getSign(), dummyZero.getExponent(), dummyZero.getSignificand()); |
105 |
|
|
106 |
|
unpackedFloat<t> multiplyResultWithSpecialCases(addMultiplySpecialCases(format, leftMultiply, rightMultiply, arithmeticMultiplyResult.getSign(), dummyValue)); |
107 |
|
|
108 |
|
|
109 |
|
unpackedFloat<t> result(addAdditionSpecialCasesWithID(format, |
110 |
|
roundingMode, |
111 |
|
multiplyResultWithSpecialCases, |
112 |
|
fullMultiplyResult, // for the identity case |
113 |
|
addArgument, |
114 |
|
roundedResult, |
115 |
|
prop(true))); |
116 |
|
|
117 |
|
POSTCONDITION(result.valid(format)); |
118 |
|
|
119 |
|
return result; |
120 |
|
} |
121 |
|
|
122 |
|
/* |
123 |
|
* BUGS : |
124 |
|
* 1. sign of zero different for exact 0 and underflow |
125 |
|
* 2. large * -large + inf = inf not NaN |
126 |
|
* 3. rounder decision bugs : one looks like an issue with too-eager overflow, |
127 |
|
* one looks like a misplaced decision on highest subnormal exponent |
128 |
|
*/ |
129 |
|
|
130 |
|
template <class t> |
131 |
|
unpackedFloat<t> fmaBroken (const typename t::fpt &format, |
132 |
|
const typename t::rm &roundingMode, |
133 |
|
const unpackedFloat<t> &leftMultiply, |
134 |
|
const unpackedFloat<t> &rightMultiply, |
135 |
|
const unpackedFloat<t> &addArgument) { |
136 |
|
|
137 |
|
// typedef typename t::bwt bwt; |
138 |
|
typedef typename t::prop prop; |
139 |
|
//typedef typename t::ubv ubv; |
140 |
|
//typedef typename t::sbv sbv; |
141 |
|
typedef typename t::fpt fpt; |
142 |
|
|
143 |
|
PRECONDITION(leftMultiply.valid(format)); |
144 |
|
PRECONDITION(rightMultiply.valid(format)); |
145 |
|
PRECONDITION(addArgument.valid(format)); |
146 |
|
|
147 |
|
unpackedFloat<t> multiplyResult(arithmeticMultiply(format, leftMultiply, rightMultiply)); |
148 |
|
|
149 |
|
fpt extendedFormat(format.exponentWidth() + 1, format.significandWidth() * 2); |
150 |
|
INVARIANT(multiplyResult.valid(extendedFormat)); |
151 |
|
|
152 |
|
// Rounding mode doesn't matter as this is a strict extension |
153 |
|
unpackedFloat<t> extendedAddArgument(convertFloatToFloat(format, extendedFormat, t::RTZ(), addArgument)); |
154 |
|
|
155 |
|
unpackedFloat<t> additionResult(arithmeticAdd(extendedFormat, roundingMode, multiplyResult, extendedAddArgument, prop(true), prop(false)).uf); |
156 |
|
// Custom rounder flags are ignored as they are not applicable in this case |
157 |
|
|
158 |
|
unpackedFloat<t> roundedResult(rounder(format, roundingMode, additionResult)); |
159 |
|
|
160 |
|
// Note that multiplyResult.getSign() != roundedResult.getSign() in rare cases |
161 |
|
// the multiply special cases use the sign for zeros and infinities, thus the sign of the |
162 |
|
// result of the multiplication is needed (i.e. the xor of the sign of left and right multiply) |
163 |
|
// (-small, +inf, large) should trigger this as the desired result is -inf |
164 |
|
// but roundedResult.getSign() is positive. |
165 |
|
unpackedFloat<t> roundedResultWithMultiplyCases(addMultiplySpecialCases(format, |
166 |
|
leftMultiply, |
167 |
|
rightMultiply, |
168 |
|
multiplyResult.getSign(), |
169 |
|
roundedResult)); |
170 |
|
|
171 |
|
|
172 |
|
// One disadvantage to having a flag for zero and default exponents and significands for zero |
173 |
|
// that are not (min, 0) is that the x + 0 case has to be handled by the addition special cases. |
174 |
|
// This means that you need the value of x, rounded to the correct format. |
175 |
|
// multiplyResult is in extended format, thus we have to use a second rounder just for this case. |
176 |
|
// It is not zero, inf or NaN so it only matters when addArgument is zero when it would be returned. |
177 |
|
|
178 |
|
unpackedFloat<t> roundedMultiplyResult(rounder(format, roundingMode, multiplyResult)); |
179 |
|
// Optimisation : Try ITE before rounding so that only one rounder is needed |
180 |
|
|
181 |
|
|
182 |
|
// To make matters more awkward, we also need to apply the multiplicative special cases so that |
183 |
|
// (x*0) + y is correctly handled by the addition special cases. Without applying the |
184 |
|
// multiplicative ones, (x*0) would not be correctly flagged as 0. |
185 |
|
unpackedFloat<t> roundedMultiplyResultWithMultiplyCases(addMultiplySpecialCases(format, |
186 |
|
leftMultiply, |
187 |
|
rightMultiply, |
188 |
|
multiplyResult.getSign(), |
189 |
|
roundedMultiplyResult)); |
190 |
|
// Optimisation : consolidate the special cases and verify against this |
191 |
|
|
192 |
|
unpackedFloat<t> result(addAdditionSpecialCases(format, |
193 |
|
roundingMode, |
194 |
|
roundedMultiplyResultWithMultiplyCases, |
195 |
|
addArgument, |
196 |
|
roundedResultWithMultiplyCases, |
197 |
|
prop(true))); |
198 |
|
|
199 |
|
POSTCONDITION(result.valid(format)); |
200 |
|
|
201 |
|
return result; |
202 |
|
} |
203 |
|
|
204 |
|
|
205 |
|
} |
206 |
|
|
207 |
|
#endif |