GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/util/floatingpoint.h Lines: 55 61 90.2 %
Date: 2021-03-23 Branches: 2 4 50.0 %

Line Exec Source
1
/*********************                                                        */
2
/*! \file floatingpoint.h
3
 ** \verbatim
4
 ** Top contributors (to current version):
5
 **   Aina Niemetz, Martin Brain, Mathias Preiner
6
 ** Copyright (c) 2013  University of Oxford
7
 ** This file is part of the CVC4 project.
8
 ** Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
9
 ** in the top-level source directory and their institutional affiliations.
10
 ** All rights reserved.  See the file COPYING in the top-level source
11
 ** directory for licensing information.\endverbatim
12
 **
13
 ** \brief A floating-point value.
14
 **
15
 ** This file contains the data structures used by the constant and parametric
16
 ** types of the floating point theory.
17
 **/
18
#include "cvc4_public.h"
19
20
#ifndef CVC4__FLOATINGPOINT_H
21
#define CVC4__FLOATINGPOINT_H
22
23
#include <memory>
24
25
#include "util/bitvector.h"
26
#include "util/floatingpoint_size.h"
27
#include "util/rational.h"
28
#include "util/roundingmode.h"
29
30
/* -------------------------------------------------------------------------- */
31
32
namespace CVC4 {
33
34
/* -------------------------------------------------------------------------- */
35
36
class FloatingPointLiteral;
37
38
class FloatingPoint
39
{
40
 public:
41
  /**
42
   * Wrappers to represent results of non-total functions (e.g., min/max).
43
   * The Boolean flag is true if the result is defined, and false otherwise.
44
   */
45
  using PartialFloatingPoint = std::pair<FloatingPoint, bool>;
46
  using PartialBitVector = std::pair<BitVector, bool>;
47
  using PartialRational = std::pair<Rational, bool>;
48
49
  /**
50
   * Get the number of exponent bits in the unpacked format corresponding to a
51
   * given packed format.  These is the unpacked counter-parts of
52
   * FloatingPointSize::exponentWidth().
53
   */
54
  static uint32_t getUnpackedExponentWidth(FloatingPointSize& size);
55
  /**
56
   * Get the number of exponent bits in the unpacked format corresponding to a
57
   * given packed format.  These is the unpacked counter-parts of
58
   * FloatingPointSize::significandWidth().
59
   */
60
  static uint32_t getUnpackedSignificandWidth(FloatingPointSize& size);
61
62
  /** Constructors. */
63
  FloatingPoint(uint32_t e, uint32_t s, const BitVector& bv);
64
  FloatingPoint(const FloatingPointSize& size, const BitVector& bv);
65
  FloatingPoint(const FloatingPoint& fp);
66
  FloatingPoint(const FloatingPointSize& size,
67
                const RoundingMode& rm,
68
                const BitVector& bv,
69
                bool signedBV);
70
  FloatingPoint(const FloatingPointSize& size,
71
                const RoundingMode& rm,
72
                const Rational& r);
73
  /** Destructor. */
74
  ~FloatingPoint();
75
76
  /**
77
   * Create a FP NaN value of given size.
78
   * size: The FP size (format).
79
   */
80
  static FloatingPoint makeNaN(const FloatingPointSize& size);
81
  /**
82
   * Create a FP infinity value of given size.
83
   * size: The FP size (format).
84
   * sign: True for -oo, false otherwise.
85
   */
86
  static FloatingPoint makeInf(const FloatingPointSize& size, bool sign);
87
  /**
88
   * Create a FP zero value of given size.
89
   * size: The FP size (format).
90
   * sign: True for -zero, false otherwise.
91
   */
92
  static FloatingPoint makeZero(const FloatingPointSize& size, bool sign);
93
  /**
94
   * Create the smallest subnormal FP value of given size.
95
   * size: The FP size (format).
96
   * sign: True for negative sign, false otherwise.
97
   */
98
  static FloatingPoint makeMinSubnormal(const FloatingPointSize& size,
99
                                        bool sign);
100
  /**
101
   * Create the largest subnormal FP value of given size.
102
   * size: The FP size (format).
103
   * sign: True for negative sign, false otherwise.
104
   */
105
  static FloatingPoint makeMaxSubnormal(const FloatingPointSize& size,
106
                                        bool sign);
107
  /**
108
   * Create the smallest normal FP value of given size.
109
   * size: The FP size (format).
110
   * sign: True for negative sign, false otherwise.
111
   */
112
  static FloatingPoint makeMinNormal(const FloatingPointSize& size, bool sign);
113
  /**
114
   * Create the largest normal FP value of given size.
115
   * size: The FP size (format).
116
   * sign: True for negative sign, false otherwise.
117
   */
118
  static FloatingPoint makeMaxNormal(const FloatingPointSize& size, bool sign);
119
120
  /** Get the wrapped floating-point value. */
121
262
  const FloatingPointLiteral* getLiteral(void) const { return d_fpl.get(); }
122
123
  /**
124
   * Return a string representation of this floating-point.
125
   *
126
   * If printAsIndexed is true then it prints the bit-vector components of the
127
   * FP value as indexed symbols, otherwise in binary notation.
128
   */
129
  std::string toString(bool printAsIndexed = false) const;
130
131
  /** Return the packed (IEEE-754) representation of this floating-point. */
132
  BitVector pack(void) const;
133
134
  /** Floating-point absolute value. */
135
  FloatingPoint absolute(void) const;
136
  /** Floating-point negation. */
137
  FloatingPoint negate(void) const;
138
  /** Floating-point addition. */
139
  FloatingPoint plus(const RoundingMode& rm, const FloatingPoint& arg) const;
140
  /** Floating-point subtraction. */
141
  FloatingPoint sub(const RoundingMode& rm, const FloatingPoint& arg) const;
142
  /** Floating-point multiplication. */
143
  FloatingPoint mult(const RoundingMode& rm, const FloatingPoint& arg) const;
144
  /** Floating-point division. */
145
  FloatingPoint div(const RoundingMode& rm, const FloatingPoint& arg) const;
146
  /** Floating-point fused multiplication and addition. */
147
  FloatingPoint fma(const RoundingMode& rm,
148
                    const FloatingPoint& arg1,
149
                    const FloatingPoint& arg2) const;
150
  /** Floating-point square root. */
151
  FloatingPoint sqrt(const RoundingMode& rm) const;
152
  /** Floating-point round to integral. */
153
  FloatingPoint rti(const RoundingMode& rm) const;
154
  /** Floating-point remainder. */
155
  FloatingPoint rem(const FloatingPoint& arg) const;
156
157
  /**
158
   * Floating-point max (total version).
159
   * zeroCase: true to return the left (rather than the right operand) in case
160
   *           of max(-0,+0) or max(+0,-0).
161
   */
162
  FloatingPoint maxTotal(const FloatingPoint& arg, bool zeroCaseLeft) const;
163
  /**
164
   * Floating-point min (total version).
165
   * zeroCase: true to return the left (rather than the right operand) in case
166
   *           of min(-0,+0) or min(+0,-0).
167
   */
168
  FloatingPoint minTotal(const FloatingPoint& arg, bool zeroCaseLeft) const;
169
170
  /**
171
   * Floating-point max.
172
   *
173
   * Returns a partial floating-point, which is a pair of FloatingPoint and
174
   * bool. The boolean flag is true if the result is defined, and false if it
175
   * is undefined.
176
   */
177
  PartialFloatingPoint max(const FloatingPoint& arg) const;
178
  /** Floating-point min.
179
   *
180
   * Returns a partial floating-point, which is a pair of FloatingPoint and
181
   * bool. The boolean flag is true if the result is defined, and false if it
182
   * is undefined.
183
   */
184
  PartialFloatingPoint min(const FloatingPoint& arg) const;
185
186
  /** Equality (NOT: fp.eq but =) over floating-point values. */
187
  bool operator==(const FloatingPoint& fp) const;
188
  /** Floating-point less or equal than. */
189
  bool operator<=(const FloatingPoint& arg) const;
190
  /** Floating-point less than. */
191
  bool operator<(const FloatingPoint& arg) const;
192
193
  /** Get the exponent of this floating-point value. */
194
  BitVector getExponent() const;
195
  /** Get the significand of this floating-point value. */
196
  BitVector getSignificand() const;
197
  /** True if this value is a negative value. */
198
  bool getSign() const;
199
200
  /** Return true if this floating-point represents a normal value. */
201
  bool isNormal(void) const;
202
  /** Return true if this floating-point represents a subnormal value. */
203
  bool isSubnormal(void) const;
204
  /** Return true if this floating-point represents a zero value. */
205
  bool isZero(void) const;
206
  /** Return true if this floating-point represents an infinite value. */
207
  bool isInfinite(void) const;
208
  /** Return true if this floating-point represents a NaN value. */
209
  bool isNaN(void) const;
210
  /** Return true if this floating-point represents a negative value. */
211
  bool isNegative(void) const;
212
  /** Return true if this floating-point represents a positive value. */
213
  bool isPositive(void) const;
214
215
  /**
216
   * Convert this floating-point to a floating-point of given size, with
217
   * respect to given rounding mode.
218
   */
219
  FloatingPoint convert(const FloatingPointSize& target,
220
                        const RoundingMode& rm) const;
221
222
  /**
223
   * Convert this floating-point to a bit-vector of given size, with
224
   * respect to given rounding mode (total version).
225
   * Returns given bit-vector 'undefinedCase' in the undefined case.
226
   */
227
  BitVector convertToBVTotal(BitVectorSize width,
228
                             const RoundingMode& rm,
229
                             bool signedBV,
230
                             BitVector undefinedCase) const;
231
  /**
232
   * Convert this floating-point to a rational, with respect to given rounding
233
   * mode (total version).
234
   * Returns given rational 'undefinedCase' in the undefined case.
235
   */
236
  Rational convertToRationalTotal(Rational undefinedCase) const;
237
238
  /**
239
   * Convert this floating-point to a bit-vector of given size.
240
   *
241
   * Returns a partial bit-vector, which is a pair of BitVector and bool.
242
   * The boolean flag is true if the result is defined, and false if it
243
   * is undefined.
244
   */
245
  PartialBitVector convertToBV(BitVectorSize width,
246
                               const RoundingMode& rm,
247
                               bool signedBV) const;
248
  /**
249
   * Convert this floating-point to a Rational.
250
   *
251
   * Returns a partial Rational, which is a pair of Rational and bool.
252
   * The boolean flag is true if the result is defined, and false if it
253
   * is undefined.
254
   */
255
  PartialRational convertToRational(void) const;
256
257
  /** The floating-point size of this floating-point value. */
258
  FloatingPointSize d_fp_size;
259
260
 private:
261
  /**
262
   * Constructor.
263
   *
264
   * Note: This constructor takes ownership of 'fpl' and is not intended for
265
   *       public use.
266
   */
267
  FloatingPoint(const FloatingPointSize& fp_size, FloatingPointLiteral* fpl);
268
269
  /** The floating-point literal of this floating-point value. */
270
  std::unique_ptr<FloatingPointLiteral> d_fpl;
271
272
}; /* class FloatingPoint */
273
274
/**
275
 * Hash function for floating-point values.
276
 */
277
struct FloatingPointHashFunction
278
{
279
3381
  size_t operator()(const FloatingPoint& fp) const
280
  {
281
    FloatingPointSizeHashFunction fpshf;
282
    BitVectorHashFunction bvhf;
283
284
3381
    return fpshf(fp.d_fp_size) ^ bvhf(fp.pack());
285
  }
286
}; /* struct FloatingPointHashFunction */
287
288
/* -------------------------------------------------------------------------- */
289
290
/**
291
 * The parameter type for the conversions to floating point.
292
 */
293
3999
class FloatingPointConvertSort
294
{
295
 public:
296
  /** Constructors. */
297
79
  FloatingPointConvertSort(uint32_t _e, uint32_t _s) : d_fp_size(_e, _s) {}
298
3314
  FloatingPointConvertSort(const FloatingPointSize& fps) : d_fp_size(fps) {}
299
300
  /** Operator overload for comparison of conversion sorts. */
301
3504
  bool operator==(const FloatingPointConvertSort& fpcs) const
302
  {
303
3504
    return d_fp_size == fpcs.d_fp_size;
304
  }
305
306
  /** The floating-point size of this sort. */
307
  FloatingPointSize d_fp_size;
308
};
309
310
/** Hash function for conversion sorts. */
311
template <uint32_t key>
312
struct FloatingPointConvertSortHashFunction
313
{
314
3717
  inline size_t operator()(const FloatingPointConvertSort& fpcs) const
315
  {
316
    FloatingPointSizeHashFunction f;
317
3717
    return f(fpcs.d_fp_size) ^ (0x00005300 | (key << 24));
318
  }
319
}; /* struct FloatingPointConvertSortHashFunction */
320
321
/**
322
 * As different conversions are different parameterised kinds, there
323
 * is a need for different (C++) types for each one.
324
 */
325
326
/**
327
 * Conversion from floating-point to IEEE bit-vector (first bit represents the
328
 * sign, the following exponent width bits the exponent, and the remaining bits
329
 * the significand).
330
 */
331
473
class FloatingPointToFPIEEEBitVector : public FloatingPointConvertSort
332
{
333
 public:
334
  /** Constructors. */
335
2
  FloatingPointToFPIEEEBitVector(uint32_t _e, uint32_t _s)
336
2
      : FloatingPointConvertSort(_e, _s)
337
  {
338
2
  }
339
3324
  FloatingPointToFPIEEEBitVector(const FloatingPointConvertSort& old)
340
3324
      : FloatingPointConvertSort(old)
341
  {
342
3324
  }
343
};
344
345
/**
346
 * Conversion from floating-point to another floating-point (of possibly
347
 * different size).
348
 */
349
20
class FloatingPointToFPFloatingPoint : public FloatingPointConvertSort
350
{
351
 public:
352
  /** Constructors. */
353
2
  FloatingPointToFPFloatingPoint(uint32_t _e, uint32_t _s)
354
2
      : FloatingPointConvertSort(_e, _s)
355
  {
356
2
  }
357
6
  FloatingPointToFPFloatingPoint(const FloatingPointConvertSort& old)
358
6
      : FloatingPointConvertSort(old)
359
  {
360
6
  }
361
};
362
363
/**
364
 * Conversion from floating-point to Real.
365
 */
366
31
class FloatingPointToFPReal : public FloatingPointConvertSort
367
{
368
 public:
369
  /** Constructors. */
370
2
  FloatingPointToFPReal(uint32_t _e, uint32_t _s)
371
2
      : FloatingPointConvertSort(_e, _s)
372
  {
373
2
  }
374
20
  FloatingPointToFPReal(const FloatingPointConvertSort& old)
375
20
      : FloatingPointConvertSort(old)
376
  {
377
20
  }
378
};
379
380
/**
381
 * Conversion from floating-point to signed bit-vector value.
382
 */
383
10
class FloatingPointToFPSignedBitVector : public FloatingPointConvertSort
384
{
385
 public:
386
  /** Constructors. */
387
2
  FloatingPointToFPSignedBitVector(uint32_t _e, uint32_t _s)
388
2
      : FloatingPointConvertSort(_e, _s)
389
  {
390
2
  }
391
4
  FloatingPointToFPSignedBitVector(const FloatingPointConvertSort& old)
392
4
      : FloatingPointConvertSort(old)
393
  {
394
4
  }
395
};
396
397
/**
398
 * Conversion from floating-point to unsigned bit-vector value.
399
 */
400
4
class FloatingPointToFPUnsignedBitVector : public FloatingPointConvertSort
401
{
402
 public:
403
  /** Constructors. */
404
2
  FloatingPointToFPUnsignedBitVector(uint32_t _e, uint32_t _s)
405
2
      : FloatingPointConvertSort(_e, _s)
406
  {
407
2
  }
408
  FloatingPointToFPUnsignedBitVector(const FloatingPointConvertSort& old)
409
      : FloatingPointConvertSort(old)
410
  {
411
  }
412
};
413
414
107
class FloatingPointToFPGeneric : public FloatingPointConvertSort
415
{
416
 public:
417
  /** Constructors. */
418
69
  FloatingPointToFPGeneric(uint32_t _e, uint32_t _s)
419
69
      : FloatingPointConvertSort(_e, _s)
420
  {
421
69
  }
422
  FloatingPointToFPGeneric(const FloatingPointConvertSort& old)
423
      : FloatingPointConvertSort(old)
424
  {
425
  }
426
};
427
428
/**
429
 * Base type for floating-point to bit-vector conversion.
430
 */
431
class FloatingPointToBV
432
{
433
 public:
434
  /** Constructors. */
435
4
  FloatingPointToBV(uint32_t s) : d_bv_size(s) {}
436
4
  FloatingPointToBV(const FloatingPointToBV& old) : d_bv_size(old.d_bv_size) {}
437
  FloatingPointToBV(const BitVectorSize& old) : d_bv_size(old) {}
438
439
  /** Return the bit-width of the bit-vector to convert to. */
440
16
  operator uint32_t() const { return d_bv_size; }
441
442
  /** The bit-width of the bit-vector to converto to. */
443
  BitVectorSize d_bv_size;
444
};
445
446
/**
447
 * Conversion from floating-point to unsigned bit-vector value.
448
 */
449
2
class FloatingPointToUBV : public FloatingPointToBV
450
{
451
 public:
452
2
  FloatingPointToUBV(uint32_t _s) : FloatingPointToBV(_s) {}
453
  FloatingPointToUBV(const FloatingPointToBV& old) : FloatingPointToBV(old) {}
454
};
455
456
/**
457
 * Conversion from floating-point to signed bit-vector value.
458
 */
459
2
class FloatingPointToSBV : public FloatingPointToBV
460
{
461
 public:
462
2
  FloatingPointToSBV(uint32_t _s) : FloatingPointToBV(_s) {}
463
  FloatingPointToSBV(const FloatingPointToBV& old) : FloatingPointToBV(old) {}
464
};
465
466
/**
467
 * Conversion from floating-point to unsigned bit-vector value (total version).
468
 */
469
class FloatingPointToUBVTotal : public FloatingPointToBV
470
{
471
 public:
472
  FloatingPointToUBVTotal(uint32_t _s) : FloatingPointToBV(_s) {}
473
  FloatingPointToUBVTotal(const FloatingPointToBV& old) : FloatingPointToBV(old)
474
  {
475
  }
476
};
477
478
/**
479
 * Conversion from floating-point to signed bit-vector value (total version).
480
 */
481
class FloatingPointToSBVTotal : public FloatingPointToBV
482
{
483
 public:
484
  FloatingPointToSBVTotal(uint32_t _s) : FloatingPointToBV(_s) {}
485
  FloatingPointToSBVTotal(const FloatingPointToBV& old) : FloatingPointToBV(old)
486
  {
487
  }
488
};
489
490
/**
491
 * Hash function for floating-point to bit-vector conversions.
492
 */
493
template <uint32_t key>
494
struct FloatingPointToBVHashFunction
495
{
496
20
  inline size_t operator()(const FloatingPointToBV& fptbv) const
497
  {
498
    UnsignedHashFunction< ::CVC4::BitVectorSize> f;
499
20
    return (key ^ 0x46504256) ^ f(fptbv.d_bv_size);
500
  }
501
}; /* struct FloatingPointToBVHashFunction */
502
503
/* Note: It is not possible to pack a number without a size to pack to,
504
 * thus it is difficult to implement output stream operator overloads for
505
 * FloatingPointLiteral in a sensible way. Use FloatingPoint instead. */
506
507
/** Output stream operator overloading for floating-point values. */
508
std::ostream& operator<<(std::ostream& os, const FloatingPoint& fp);
509
510
/** Output stream operator overloading for floating-point formats. */
511
std::ostream& operator<<(std::ostream& os, const FloatingPointSize& fps);
512
513
/** Output stream operator overloading for floating-point conversion sorts. */
514
std::ostream& operator<<(std::ostream& os,
515
                         const FloatingPointConvertSort& fpcs);
516
517
}  // namespace CVC4
518
519
#endif /* CVC4__FLOATINGPOINT_H */