GCC Code Coverage Report
Directory: . Exec Total Coverage
File: src/util/floatingpoint.h Lines: 62 65 95.4 %
Date: 2021-08-17 Branches: 9 18 50.0 %

Line Exec Source
1
/******************************************************************************
2
 * Top contributors (to current version):
3
 *   Aina Niemetz, Martin Brain, Mathias Preiner
4
 * Copyright (c) 2013  University of Oxford
5
 *
6
 * This file is part of the cvc5 project.
7
 *
8
 * Copyright (c) 2009-2021 by the authors listed in the file AUTHORS
9
 * in the top-level source directory and their institutional affiliations.
10
 * All rights reserved.  See the file COPYING in the top-level source
11
 * directory for licensing information.
12
 * ****************************************************************************
13
 *
14
 * A floating-point value.
15
 *
16
 * This file contains the data structures used by the constant and parametric
17
 * types of the floating point theory.
18
 */
19
#include "cvc5_public.h"
20
21
#ifndef CVC5__FLOATINGPOINT_H
22
#define CVC5__FLOATINGPOINT_H
23
24
#include <memory>
25
26
#include "util/bitvector.h"
27
#include "util/floatingpoint_size.h"
28
#include "util/rational.h"
29
#include "util/roundingmode.h"
30
31
/* -------------------------------------------------------------------------- */
32
33
namespace cvc5 {
34
35
/* -------------------------------------------------------------------------- */
36
37
class FloatingPointLiteral;
38
39
class FloatingPoint
40
{
41
 public:
42
  /**
43
   * Wrappers to represent results of non-total functions (e.g., min/max).
44
   * The Boolean flag is true if the result is defined, and false otherwise.
45
   */
46
  using PartialFloatingPoint = std::pair<FloatingPoint, bool>;
47
  using PartialBitVector = std::pair<BitVector, bool>;
48
  using PartialRational = std::pair<Rational, bool>;
49
50
  /**
51
   * Get the number of exponent bits in the unpacked format corresponding to a
52
   * given packed format.  This is the unpacked counter-part of
53
   * FloatingPointSize::exponentWidth().
54
   */
55
  static uint32_t getUnpackedExponentWidth(FloatingPointSize& size);
56
  /**
57
   * Get the number of exponent bits in the unpacked format corresponding to a
58
   * given packed format.  This is the unpacked counter-part of
59
   * FloatingPointSize::significandWidth().
60
   */
61
  static uint32_t getUnpackedSignificandWidth(FloatingPointSize& size);
62
63
  /** Constructors. */
64
65
  /** Create a FP value from its IEEE bit-vector representation. */
66
  FloatingPoint(uint32_t e, uint32_t s, const BitVector& bv);
67
  FloatingPoint(const FloatingPointSize& size, const BitVector& bv);
68
69
  /**
70
   * Create a FP value from a signed or unsigned bit-vector value with
71
   * respect to given rounding mode.
72
   */
73
  FloatingPoint(const FloatingPointSize& size,
74
                const RoundingMode& rm,
75
                const BitVector& bv,
76
                bool signedBV);
77
78
  /**
79
   * Create a FP value from a rational value with respect to given rounding
80
   * mode.
81
   */
82
  FloatingPoint(const FloatingPointSize& size,
83
                const RoundingMode& rm,
84
                const Rational& r);
85
86
  /** Copy constructor. */
87
  FloatingPoint(const FloatingPoint& fp);
88
89
  /** Destructor. */
90
  ~FloatingPoint();
91
92
  /**
93
   * Create a FP NaN value of given size.
94
   * size: The FP size (format).
95
   */
96
  static FloatingPoint makeNaN(const FloatingPointSize& size);
97
  /**
98
   * Create a FP infinity value of given size.
99
   * size: The FP size (format).
100
   * sign: True for -oo, false otherwise.
101
   */
102
  static FloatingPoint makeInf(const FloatingPointSize& size, bool sign);
103
  /**
104
   * Create a FP zero value of given size.
105
   * size: The FP size (format).
106
   * sign: True for -zero, false otherwise.
107
   */
108
  static FloatingPoint makeZero(const FloatingPointSize& size, bool sign);
109
  /**
110
   * Create the smallest subnormal FP value of given size.
111
   * size: The FP size (format).
112
   * sign: True for negative sign, false otherwise.
113
   */
114
  static FloatingPoint makeMinSubnormal(const FloatingPointSize& size,
115
                                        bool sign);
116
  /**
117
   * Create the largest subnormal FP value of given size.
118
   * size: The FP size (format).
119
   * sign: True for negative sign, false otherwise.
120
   */
121
  static FloatingPoint makeMaxSubnormal(const FloatingPointSize& size,
122
                                        bool sign);
123
  /**
124
   * Create the smallest normal FP value of given size.
125
   * size: The FP size (format).
126
   * sign: True for negative sign, false otherwise.
127
   */
128
  static FloatingPoint makeMinNormal(const FloatingPointSize& size, bool sign);
129
  /**
130
   * Create the largest normal FP value of given size.
131
   * size: The FP size (format).
132
   * sign: True for negative sign, false otherwise.
133
   */
134
  static FloatingPoint makeMaxNormal(const FloatingPointSize& size, bool sign);
135
136
  /** Return the size of this FP value. */
137
  const FloatingPointSize& getSize() const;
138
139
  /** Get the wrapped floating-point value. */
140
256
  const FloatingPointLiteral* getLiteral(void) const { return d_fpl.get(); }
141
142
  /**
143
   * Return a string representation of this floating-point.
144
   *
145
   * If printAsIndexed is true then it prints the bit-vector components of the
146
   * FP value as indexed symbols, otherwise in binary notation.
147
   */
148
  std::string toString(bool printAsIndexed = false) const;
149
150
  /** Return the packed (IEEE-754) representation of this floating-point. */
151
  BitVector pack(void) const;
152
153
  /** Floating-point absolute value. */
154
  FloatingPoint absolute(void) const;
155
  /** Floating-point negation. */
156
  FloatingPoint negate(void) const;
157
  /** Floating-point addition. */
158
  FloatingPoint add(const RoundingMode& rm, const FloatingPoint& arg) const;
159
  /** Floating-point subtraction. */
160
  FloatingPoint sub(const RoundingMode& rm, const FloatingPoint& arg) const;
161
  /** Floating-point multiplication. */
162
  FloatingPoint mult(const RoundingMode& rm, const FloatingPoint& arg) const;
163
  /** Floating-point division. */
164
  FloatingPoint div(const RoundingMode& rm, const FloatingPoint& arg) const;
165
  /** Floating-point fused multiplication and addition. */
166
  FloatingPoint fma(const RoundingMode& rm,
167
                    const FloatingPoint& arg1,
168
                    const FloatingPoint& arg2) const;
169
  /** Floating-point square root. */
170
  FloatingPoint sqrt(const RoundingMode& rm) const;
171
  /** Floating-point round to integral. */
172
  FloatingPoint rti(const RoundingMode& rm) const;
173
  /** Floating-point remainder. */
174
  FloatingPoint rem(const FloatingPoint& arg) const;
175
176
  /**
177
   * Floating-point max (total version).
178
   * zeroCase: true to return the left (rather than the right operand) in case
179
   *           of max(-0,+0) or max(+0,-0).
180
   */
181
  FloatingPoint maxTotal(const FloatingPoint& arg, bool zeroCaseLeft) const;
182
  /**
183
   * Floating-point min (total version).
184
   * zeroCase: true to return the left (rather than the right operand) in case
185
   *           of min(-0,+0) or min(+0,-0).
186
   */
187
  FloatingPoint minTotal(const FloatingPoint& arg, bool zeroCaseLeft) const;
188
189
  /**
190
   * Floating-point max.
191
   *
192
   * Returns a partial floating-point, which is a pair of FloatingPoint and
193
   * bool. The boolean flag is true if the result is defined, and false if it
194
   * is undefined.
195
   */
196
  PartialFloatingPoint max(const FloatingPoint& arg) const;
197
  /** Floating-point min.
198
   *
199
   * Returns a partial floating-point, which is a pair of FloatingPoint and
200
   * bool. The boolean flag is true if the result is defined, and false if it
201
   * is undefined.
202
   */
203
  PartialFloatingPoint min(const FloatingPoint& arg) const;
204
205
  /** Equality (NOT: fp.eq but =) over floating-point values. */
206
  bool operator==(const FloatingPoint& fp) const;
207
  /** Floating-point less or equal than. */
208
  bool operator<=(const FloatingPoint& arg) const;
209
  /** Floating-point less than. */
210
  bool operator<(const FloatingPoint& arg) const;
211
212
  /** Get the exponent of this floating-point value. */
213
  BitVector getExponent() const;
214
  /** Get the significand of this floating-point value. */
215
  BitVector getSignificand() const;
216
  /** True if this value is a negative value. */
217
  bool getSign() const;
218
219
  /** Return true if this floating-point represents a normal value. */
220
  bool isNormal(void) const;
221
  /** Return true if this floating-point represents a subnormal value. */
222
  bool isSubnormal(void) const;
223
  /** Return true if this floating-point represents a zero value. */
224
  bool isZero(void) const;
225
  /** Return true if this floating-point represents an infinite value. */
226
  bool isInfinite(void) const;
227
  /** Return true if this floating-point represents a NaN value. */
228
  bool isNaN(void) const;
229
  /** Return true if this floating-point represents a negative value. */
230
  bool isNegative(void) const;
231
  /** Return true if this floating-point represents a positive value. */
232
  bool isPositive(void) const;
233
234
  /**
235
   * Convert this floating-point to a floating-point of given size, with
236
   * respect to given rounding mode.
237
   */
238
  FloatingPoint convert(const FloatingPointSize& target,
239
                        const RoundingMode& rm) const;
240
241
  /**
242
   * Convert this floating-point to a bit-vector of given size, with
243
   * respect to given rounding mode (total version).
244
   * Returns given bit-vector 'undefinedCase' in the undefined case.
245
   */
246
  BitVector convertToBVTotal(BitVectorSize width,
247
                             const RoundingMode& rm,
248
                             bool signedBV,
249
                             BitVector undefinedCase) const;
250
  /**
251
   * Convert this floating-point to a rational, with respect to given rounding
252
   * mode (total version).
253
   * Returns given rational 'undefinedCase' in the undefined case.
254
   */
255
  Rational convertToRationalTotal(Rational undefinedCase) const;
256
257
  /**
258
   * Convert this floating-point to a bit-vector of given size.
259
   *
260
   * Returns a partial bit-vector, which is a pair of BitVector and bool.
261
   * The boolean flag is true if the result is defined, and false if it
262
   * is undefined.
263
   */
264
  PartialBitVector convertToBV(BitVectorSize width,
265
                               const RoundingMode& rm,
266
                               bool signedBV) const;
267
  /**
268
   * Convert this floating-point to a Rational.
269
   *
270
   * Returns a partial Rational, which is a pair of Rational and bool.
271
   * The boolean flag is true if the result is defined, and false if it
272
   * is undefined.
273
   */
274
  PartialRational convertToRational(void) const;
275
276
 private:
277
  /**
278
   * Constructor.
279
   *
280
   * Note: This constructor takes ownership of 'fpl' and is not intended for
281
   *       public use.
282
   */
283
  FloatingPoint(FloatingPointLiteral* fpl);
284
285
  /** The floating-point literal of this floating-point value. */
286
  std::unique_ptr<FloatingPointLiteral> d_fpl;
287
288
}; /* class FloatingPoint */
289
290
/**
291
 * Hash function for floating-point values.
292
 */
293
struct FloatingPointHashFunction
294
{
295
3155
  size_t operator()(const FloatingPoint& fp) const
296
  {
297
    FloatingPointSizeHashFunction fpshf;
298
    BitVectorHashFunction bvhf;
299
300
3155
    return fpshf(fp.getSize()) ^ bvhf(fp.pack());
301
  }
302
}; /* struct FloatingPointHashFunction */
303
304
/* -------------------------------------------------------------------------- */
305
306
/**
307
 * The parameter type for the conversions to floating point.
308
 */
309
3898
class FloatingPointConvertSort
310
{
311
 public:
312
  /** Constructors. */
313
148
  FloatingPointConvertSort(uint32_t _e, uint32_t _s) : d_fp_size(_e, _s) {}
314
2973
  FloatingPointConvertSort(const FloatingPointSize& fps) : d_fp_size(fps) {}
315
316
  /** Operator overload for comparison of conversion sorts. */
317
3340
  bool operator==(const FloatingPointConvertSort& fpcs) const
318
  {
319
3340
    return d_fp_size == fpcs.d_fp_size;
320
  }
321
322
  /** Return the size of this FP convert sort. */
323
5693
  FloatingPointSize getSize() const { return d_fp_size; }
324
325
 private:
326
  /** The floating-point size of this sort. */
327
  FloatingPointSize d_fp_size;
328
};
329
330
/** Hash function for conversion sorts. */
331
template <uint32_t key>
332
struct FloatingPointConvertSortHashFunction
333
{
334
3715
  inline size_t operator()(const FloatingPointConvertSort& fpcs) const
335
  {
336
    FloatingPointSizeHashFunction f;
337
3715
    return f(fpcs.getSize()) ^ (0x00005300 | (key << 24));
338
  }
339
}; /* struct FloatingPointConvertSortHashFunction */
340
341
/**
342
 * As different conversions are different parameterised kinds, there
343
 * is a need for different (C++) types for each one.
344
 */
345
346
/**
347
 * Conversion from floating-point to IEEE bit-vector (first bit represents the
348
 * sign, the following exponent width bits the exponent, and the remaining bits
349
 * the significand).
350
 */
351
447
class FloatingPointToFPIEEEBitVector : public FloatingPointConvertSort
352
{
353
 public:
354
  /** Constructors. */
355
6
  FloatingPointToFPIEEEBitVector(uint32_t _e, uint32_t _s)
356
6
      : FloatingPointConvertSort(_e, _s)
357
  {
358
6
  }
359
3003
  FloatingPointToFPIEEEBitVector(const FloatingPointConvertSort& old)
360
3003
      : FloatingPointConvertSort(old)
361
  {
362
3003
  }
363
};
364
365
/**
366
 * Conversion from floating-point to another floating-point (of possibly
367
 * different size).
368
 */
369
54
class FloatingPointToFPFloatingPoint : public FloatingPointConvertSort
370
{
371
 public:
372
  /** Constructors. */
373
6
  FloatingPointToFPFloatingPoint(uint32_t _e, uint32_t _s)
374
6
      : FloatingPointConvertSort(_e, _s)
375
  {
376
6
  }
377
8
  FloatingPointToFPFloatingPoint(const FloatingPointConvertSort& old)
378
8
      : FloatingPointConvertSort(old)
379
  {
380
8
  }
381
};
382
383
/**
384
 * Conversion from floating-point to Real.
385
 */
386
46
class FloatingPointToFPReal : public FloatingPointConvertSort
387
{
388
 public:
389
  /** Constructors. */
390
6
  FloatingPointToFPReal(uint32_t _e, uint32_t _s)
391
6
      : FloatingPointConvertSort(_e, _s)
392
  {
393
6
  }
394
16
  FloatingPointToFPReal(const FloatingPointConvertSort& old)
395
16
      : FloatingPointConvertSort(old)
396
  {
397
16
  }
398
};
399
400
/**
401
 * Conversion from floating-point to signed bit-vector value.
402
 */
403
30
class FloatingPointToFPSignedBitVector : public FloatingPointConvertSort
404
{
405
 public:
406
  /** Constructors. */
407
6
  FloatingPointToFPSignedBitVector(uint32_t _e, uint32_t _s)
408
6
      : FloatingPointConvertSort(_e, _s)
409
  {
410
6
  }
411
16
  FloatingPointToFPSignedBitVector(const FloatingPointConvertSort& old)
412
16
      : FloatingPointConvertSort(old)
413
  {
414
16
  }
415
};
416
417
/**
418
 * Conversion from floating-point to unsigned bit-vector value.
419
 */
420
79
class FloatingPointToFPUnsignedBitVector : public FloatingPointConvertSort
421
{
422
 public:
423
  /** Constructors. */
424
18
  FloatingPointToFPUnsignedBitVector(uint32_t _e, uint32_t _s)
425
18
      : FloatingPointConvertSort(_e, _s)
426
  {
427
18
  }
428
24
  FloatingPointToFPUnsignedBitVector(const FloatingPointConvertSort& old)
429
24
      : FloatingPointConvertSort(old)
430
  {
431
24
  }
432
};
433
434
175
class FloatingPointToFPGeneric : public FloatingPointConvertSort
435
{
436
 public:
437
  /** Constructors. */
438
106
  FloatingPointToFPGeneric(uint32_t _e, uint32_t _s)
439
106
      : FloatingPointConvertSort(_e, _s)
440
  {
441
106
  }
442
  FloatingPointToFPGeneric(const FloatingPointConvertSort& old)
443
      : FloatingPointConvertSort(old)
444
  {
445
  }
446
};
447
448
/**
449
 * Base type for floating-point to bit-vector conversion.
450
 */
451
class FloatingPointToBV
452
{
453
 public:
454
  /** Constructors. */
455
15
  FloatingPointToBV(uint32_t s) : d_bv_size(s) {}
456
40
  FloatingPointToBV(const FloatingPointToBV& old) : d_bv_size(old.d_bv_size) {}
457
  FloatingPointToBV(const BitVectorSize& old) : d_bv_size(old) {}
458
459
  /** Return the bit-width of the bit-vector to convert to. */
460
78
  operator uint32_t() const { return d_bv_size; }
461
462
  /** The bit-width of the bit-vector to converto to. */
463
  BitVectorSize d_bv_size;
464
};
465
466
/**
467
 * Conversion from floating-point to unsigned bit-vector value.
468
 */
469
6
class FloatingPointToUBV : public FloatingPointToBV
470
{
471
 public:
472
6
  FloatingPointToUBV(uint32_t _s) : FloatingPointToBV(_s) {}
473
  FloatingPointToUBV(const FloatingPointToBV& old) : FloatingPointToBV(old) {}
474
};
475
476
/**
477
 * Conversion from floating-point to signed bit-vector value.
478
 */
479
19
class FloatingPointToSBV : public FloatingPointToBV
480
{
481
 public:
482
9
  FloatingPointToSBV(uint32_t _s) : FloatingPointToBV(_s) {}
483
  FloatingPointToSBV(const FloatingPointToBV& old) : FloatingPointToBV(old) {}
484
};
485
486
/**
487
 * Conversion from floating-point to unsigned bit-vector value (total version).
488
 */
489
class FloatingPointToUBVTotal : public FloatingPointToBV
490
{
491
 public:
492
  FloatingPointToUBVTotal(uint32_t _s) : FloatingPointToBV(_s) {}
493
  FloatingPointToUBVTotal(const FloatingPointToBV& old) : FloatingPointToBV(old)
494
  {
495
  }
496
};
497
498
/**
499
 * Conversion from floating-point to signed bit-vector value (total version).
500
 */
501
11
class FloatingPointToSBVTotal : public FloatingPointToBV
502
{
503
 public:
504
  FloatingPointToSBVTotal(uint32_t _s) : FloatingPointToBV(_s) {}
505
4
  FloatingPointToSBVTotal(const FloatingPointToBV& old) : FloatingPointToBV(old)
506
  {
507
4
  }
508
};
509
510
/**
511
 * Hash function for floating-point to bit-vector conversions.
512
 */
513
template <uint32_t key>
514
struct FloatingPointToBVHashFunction
515
{
516
91
  inline size_t operator()(const FloatingPointToBV& fptbv) const
517
  {
518
    UnsignedHashFunction< ::cvc5::BitVectorSize> f;
519
91
    return (key ^ 0x46504256) ^ f(fptbv.d_bv_size);
520
  }
521
}; /* struct FloatingPointToBVHashFunction */
522
523
/* Note: It is not possible to pack a number without a size to pack to,
524
 * thus it is difficult to implement output stream operator overloads for
525
 * FloatingPointLiteral in a sensible way. Use FloatingPoint instead. */
526
527
/** Output stream operator overloading for floating-point values. */
528
std::ostream& operator<<(std::ostream& os, const FloatingPoint& fp);
529
530
/** Output stream operator overloading for floating-point formats. */
531
std::ostream& operator<<(std::ostream& os, const FloatingPointSize& fps);
532
533
/** Output stream operator overloading for floating-point conversion sorts. */
534
std::ostream& operator<<(std::ostream& os,
535
                         const FloatingPointConvertSort& fpcs);
536
537
}  // namespace cvc5
538
539
#endif /* CVC5__FLOATINGPOINT_H */