C++ 与C+中的浮点值最接近的整数+；03_C++_Floating Point_Type Conversion_C++03_Integral

C++ 与C+中的浮点值最接近的整数+；03

c++ floating-point

C++ 与C+中的浮点值最接近的整数+；03,c++,floating-point,type-conversion,c++03,integral,C++,Floating Point,Type Conversion,C++03,Integral,对于某些整型，即使浮点值远远超出整型的可表示范围，如何找到最接近浮点型某个值的值或者更准确地说：设F为浮点类型（可能是float、double或long double）。设I为整数类型假设F和I都具有std:：numeric_limits的有效专门化给定一个F的可表示值，并且仅使用C++03，如何找到I的最接近的可表示值我追求的是一个纯粹的、高效的、线程安全的解决方案，这个解决方案除了C++03所保证的以外，对平台没有任何假设如果不存在这样的解决方案，是否可以使用C99/C++11

对于某些整型，即使浮点值远远超出整型的可表示范围，如何找到最接近浮点型某个值的值

或者更准确地说：

设

为浮点类型（可能是

float

、

double

或

long double

）。设

为整数类型

假设

和

都具有

std:：numeric_limits

的有效专门化

给定一个

的可表示值，并且仅使用C++03，如何找到

的最接近的可表示值

我追求的是一个纯粹的、高效的、线程安全的解决方案，这个解决方案除了C++03所保证的以外，对平台没有任何假设

如果不存在这样的解决方案，是否可以使用C99/C++11的新功能找到一个解决方案

使用C99的

lround（）

似乎有问题，因为报告域错误的方式非常简单。这些域错误能否以可移植且线程安全的方式捕获

注意：我知道Boost可能通过其

Boost:：numerics:：converter

模板提供了一个解决方案，但由于其高度复杂性和冗长性，我无法从中提取要点，因此我无法检查他们的解决方案是否做出了超出C++03的假设

当

的整数部分不是

的可表示值时，由于C++03未定义

I（f）

的结果，以下简单方法失败

template<class I, class F> I closest_int(F f)
{
  return I(f);
}

这次

I（f）

将始终有一个定义良好的结果，但是，由于

f（std:：numeric\u limits:：max（））

可能比

std:：numeric\u limits:：max（）

小得多，我们可能会返回

std:：numeric\u limits:：max（）

用于小于

std:：numeric\u limits:：max（）的多个整数值的浮点值。

请注意，所有的问题都会出现，因为转换

F（i）

是向上舍入，还是向下舍入到最接近的可表示浮点值尚未定义

以下是C++03（4.9浮点积分转换）中的相关部分：

整数类型或枚举类型的右值可以转换为浮点右值类型。如果可能的话，结果是准确的。否则，这是一个由实现定义的选择，可以选择下一个较低或较高的可表示值

对于大于等于64位的基数2（二进制）浮点类型和整数类型，我有一个实用的解决方案。见下文。评论应该是明确的。输出如下

// file: f2i.cpp
//
// compiled with MinGW x86 (gcc version 4.6.2) as:
//   g++ -Wall -O2 -std=c++03 f2i.cpp -o f2i.exe
#include <iostream>
#include <iomanip>
#include <limits>

using namespace std;

template<class I, class F> I truncAndCap(F f)
{
/*
  This function converts (by truncating the
  fractional part) the floating-point value f (of type F)
  into an integer value (of type I), avoiding undefined
  behavior by returning std::numeric_limits<I>::min() and
  std::numeric_limits<I>::max() when f is too small or
  too big to be converted to type I directly.

  2 problems:
  - F may fail to convert to I,
    which is undefined behavior and we want to avoid that.
  - I may not convert exactly into F
    - Direct I & F comparison fails because of I to F promotion,
      which can be inexact.

  This solution is for the most practical case when I and F
  are radix-2 (binary) integer and floating-point types.
*/
  int Idigits = numeric_limits<I>::digits;
  int Isigned = numeric_limits<I>::is_signed;

/*
  Calculate cutOffMax = 2 ^ std::numeric_limits<I>::digits
  (where ^ denotes exponentiation) as a value of type F.

  We assume that F is a radix-2 (binary) floating-point type AND
  it has a big enough exponent part to hold the value of
  std::numeric_limits<I>::digits.

  FLT_MAX_10_EXP/DBL_MAX_10_EXP/LDBL_MAX_10_EXP >= 37
  (guaranteed per C++ standard from 2003/C standard from 1999)
  corresponds to log2(1e37) ~= 122, so the type I can contain
  up to 122 bits. In practice, integers longer than 64 bits
  are extremely rare (if existent at all), especially on old systems
  of the 2003 C++ standard's time.
*/
  const F cutOffMax = F(I(1) << Idigits / 2) * F(I(1) << (Idigits / 2 + Idigits % 2));

  if (f >= cutOffMax)
    return numeric_limits<I>::max();

/*
  Calculate cutOffMin = - 2 ^ std::numeric_limits<I>::digits
  (where ^ denotes exponentiation) as a value of type F for
  signed I's OR cutOffMin = 0 for unsigned I's in a similar fashion.
*/
  const F cutOffMin = Isigned ? -F(I(1) << Idigits / 2) * F(I(1) << (Idigits / 2 + Idigits % 2)) : 0;

  if (f <= cutOffMin)
    return numeric_limits<I>::min();

/*
  Mathematically, we may still have a little problem (2 cases):
    cutOffMin < f < std::numeric_limits<I>::min()
    srd::numeric_limits<I>::max() < f < cutOffMax

  These cases are only possible when f isn't a whole number, when
  it's either std::numeric_limits<I>::min() - value in the range (0,1)
  or std::numeric_limits<I>::max() + value in the range (0,1).

  We can ignore this altogether because converting f to type I is
  guaranteed to truncate the fractional part off, and therefore
  I(f) will always be in the range
  [std::numeric_limits<I>::min(), std::numeric_limits<I>::max()].
*/

  return I(f);
}

template<class I, class F> void test(const char* msg, F f)
{
  I i = truncAndCap<I,F>(f);
  cout <<
    msg <<
    setiosflags(ios_base::showpos) <<
    setw(14) << setprecision(12) <<
    f << " -> " <<
    i <<
    resetiosflags(ios_base::showpos) <<
    endl;
}

#define TEST(I,F,VAL) \
  test<I,F>(#F " -> " #I ": ", VAL);

int main()
{
  TEST(short, float,     -1.75f);
  TEST(short, float,     -1.25f);
  TEST(short, float,     +0.00f);
  TEST(short, float,     +1.25f);
  TEST(short, float,     +1.75f);

  TEST(short, float, -32769.00f);
  TEST(short, float, -32768.50f);
  TEST(short, float, -32768.00f);
  TEST(short, float, -32767.75f);
  TEST(short, float, -32767.25f);
  TEST(short, float, -32767.00f);
  TEST(short, float, -32766.00f);
  TEST(short, float, +32766.00f);
  TEST(short, float, +32767.00f);
  TEST(short, float, +32767.25f);
  TEST(short, float, +32767.75f);
  TEST(short, float, +32768.00f);
  TEST(short, float, +32768.50f);
  TEST(short, float, +32769.00f);

  TEST(int, float, -2147483904.00f);
  TEST(int, float, -2147483648.00f);
  TEST(int, float, -16777218.00f);
  TEST(int, float, -16777216.00f);
  TEST(int, float, -16777215.00f);
  TEST(int, float, +16777215.00f);
  TEST(int, float, +16777216.00f);
  TEST(int, float, +16777218.00f);
  TEST(int, float, +2147483648.00f);
  TEST(int, float, +2147483904.00f);

  TEST(int, double, -2147483649.00);
  TEST(int, double, -2147483648.00);
  TEST(int, double, -2147483647.75);
  TEST(int, double, -2147483647.25);
  TEST(int, double, -2147483647.00);
  TEST(int, double, +2147483647.00);
  TEST(int, double, +2147483647.25);
  TEST(int, double, +2147483647.75);
  TEST(int, double, +2147483648.00);
  TEST(int, double, +2147483649.00);

  TEST(unsigned, double,          -1.00);
  TEST(unsigned, double,          +1.00);
  TEST(unsigned, double, +4294967295.00);
  TEST(unsigned, double, +4294967295.25);
  TEST(unsigned, double, +4294967295.75);
  TEST(unsigned, double, +4294967296.00);
  TEST(unsigned, double, +4294967297.00);

  return 0;
}

写得很好的问题。我希望它们都是这样。@AlexeyFrunze我想要'float->int'，然而，在我微弱的尝试中，我将最大和最小整数转换成了float，引用的意思是说明后一种转换的相反方向。我将尝试进行编辑，使其更加清晰。@AlexeyFrunze您是否再次删除了您的问题？或者我搞砸了什么？抱歉，我在理解问题后删除了评论。我知道这不是你的问题的核心，但如果你想要最接近的整数而不是截断的整数部分，你不应该使用I（f+0.5）吗？我收回那条[评论]。汇编输出看起来合理。起初，编译器将调用内联到

truncAndCap（）

，这就是为什么我在转换代码附近看到了很多不相关的东西（与

std:：cout

相关）。添加

-fno inline

表明

truncAndCap（）

很短。

template<class I, class F> I closest_int(F f)
{
  if (f <= std::numeric_limits<I>::min()) return std::numeric_limits<I>::min();
  if (std::numeric_limits<I>::max() <= f) return std::numeric_limits<I>::max();
  return I(f);
}

// file: f2i.cpp
//
// compiled with MinGW x86 (gcc version 4.6.2) as:
//   g++ -Wall -O2 -std=c++03 f2i.cpp -o f2i.exe
#include <iostream>
#include <iomanip>
#include <limits>

using namespace std;

template<class I, class F> I truncAndCap(F f)
{
/*
  This function converts (by truncating the
  fractional part) the floating-point value f (of type F)
  into an integer value (of type I), avoiding undefined
  behavior by returning std::numeric_limits<I>::min() and
  std::numeric_limits<I>::max() when f is too small or
  too big to be converted to type I directly.

  2 problems:
  - F may fail to convert to I,
    which is undefined behavior and we want to avoid that.
  - I may not convert exactly into F
    - Direct I & F comparison fails because of I to F promotion,
      which can be inexact.

  This solution is for the most practical case when I and F
  are radix-2 (binary) integer and floating-point types.
*/
  int Idigits = numeric_limits<I>::digits;
  int Isigned = numeric_limits<I>::is_signed;

/*
  Calculate cutOffMax = 2 ^ std::numeric_limits<I>::digits
  (where ^ denotes exponentiation) as a value of type F.

  We assume that F is a radix-2 (binary) floating-point type AND
  it has a big enough exponent part to hold the value of
  std::numeric_limits<I>::digits.

  FLT_MAX_10_EXP/DBL_MAX_10_EXP/LDBL_MAX_10_EXP >= 37
  (guaranteed per C++ standard from 2003/C standard from 1999)
  corresponds to log2(1e37) ~= 122, so the type I can contain
  up to 122 bits. In practice, integers longer than 64 bits
  are extremely rare (if existent at all), especially on old systems
  of the 2003 C++ standard's time.
*/
  const F cutOffMax = F(I(1) << Idigits / 2) * F(I(1) << (Idigits / 2 + Idigits % 2));

  if (f >= cutOffMax)
    return numeric_limits<I>::max();

/*
  Calculate cutOffMin = - 2 ^ std::numeric_limits<I>::digits
  (where ^ denotes exponentiation) as a value of type F for
  signed I's OR cutOffMin = 0 for unsigned I's in a similar fashion.
*/
  const F cutOffMin = Isigned ? -F(I(1) << Idigits / 2) * F(I(1) << (Idigits / 2 + Idigits % 2)) : 0;

  if (f <= cutOffMin)
    return numeric_limits<I>::min();

/*
  Mathematically, we may still have a little problem (2 cases):
    cutOffMin < f < std::numeric_limits<I>::min()
    srd::numeric_limits<I>::max() < f < cutOffMax

  These cases are only possible when f isn't a whole number, when
  it's either std::numeric_limits<I>::min() - value in the range (0,1)
  or std::numeric_limits<I>::max() + value in the range (0,1).

  We can ignore this altogether because converting f to type I is
  guaranteed to truncate the fractional part off, and therefore
  I(f) will always be in the range
  [std::numeric_limits<I>::min(), std::numeric_limits<I>::max()].
*/

  return I(f);
}

template<class I, class F> void test(const char* msg, F f)
{
  I i = truncAndCap<I,F>(f);
  cout <<
    msg <<
    setiosflags(ios_base::showpos) <<
    setw(14) << setprecision(12) <<
    f << " -> " <<
    i <<
    resetiosflags(ios_base::showpos) <<
    endl;
}

#define TEST(I,F,VAL) \
  test<I,F>(#F " -> " #I ": ", VAL);

int main()
{
  TEST(short, float,     -1.75f);
  TEST(short, float,     -1.25f);
  TEST(short, float,     +0.00f);
  TEST(short, float,     +1.25f);
  TEST(short, float,     +1.75f);

  TEST(short, float, -32769.00f);
  TEST(short, float, -32768.50f);
  TEST(short, float, -32768.00f);
  TEST(short, float, -32767.75f);
  TEST(short, float, -32767.25f);
  TEST(short, float, -32767.00f);
  TEST(short, float, -32766.00f);
  TEST(short, float, +32766.00f);
  TEST(short, float, +32767.00f);
  TEST(short, float, +32767.25f);
  TEST(short, float, +32767.75f);
  TEST(short, float, +32768.00f);
  TEST(short, float, +32768.50f);
  TEST(short, float, +32769.00f);

  TEST(int, float, -2147483904.00f);
  TEST(int, float, -2147483648.00f);
  TEST(int, float, -16777218.00f);
  TEST(int, float, -16777216.00f);
  TEST(int, float, -16777215.00f);
  TEST(int, float, +16777215.00f);
  TEST(int, float, +16777216.00f);
  TEST(int, float, +16777218.00f);
  TEST(int, float, +2147483648.00f);
  TEST(int, float, +2147483904.00f);

  TEST(int, double, -2147483649.00);
  TEST(int, double, -2147483648.00);
  TEST(int, double, -2147483647.75);
  TEST(int, double, -2147483647.25);
  TEST(int, double, -2147483647.00);
  TEST(int, double, +2147483647.00);
  TEST(int, double, +2147483647.25);
  TEST(int, double, +2147483647.75);
  TEST(int, double, +2147483648.00);
  TEST(int, double, +2147483649.00);

  TEST(unsigned, double,          -1.00);
  TEST(unsigned, double,          +1.00);
  TEST(unsigned, double, +4294967295.00);
  TEST(unsigned, double, +4294967295.25);
  TEST(unsigned, double, +4294967295.75);
  TEST(unsigned, double, +4294967296.00);
  TEST(unsigned, double, +4294967297.00);

  return 0;
}

float -> short:          -1.75 -> -1
float -> short:          -1.25 -> -1
float -> short:             +0 -> +0
float -> short:          +1.25 -> +1
float -> short:          +1.75 -> +1
float -> short:         -32769 -> -32768
float -> short:       -32768.5 -> -32768
float -> short:         -32768 -> -32768
float -> short:      -32767.75 -> -32767
float -> short:      -32767.25 -> -32767
float -> short:         -32767 -> -32767
float -> short:         -32766 -> -32766
float -> short:         +32766 -> +32766
float -> short:         +32767 -> +32767
float -> short:      +32767.25 -> +32767
float -> short:      +32767.75 -> +32767
float -> short:         +32768 -> +32767
float -> short:       +32768.5 -> +32767
float -> short:         +32769 -> +32767
float -> int:    -2147483904 -> -2147483648
float -> int:    -2147483648 -> -2147483648
float -> int:      -16777218 -> -16777218
float -> int:      -16777216 -> -16777216
float -> int:      -16777215 -> -16777215
float -> int:      +16777215 -> +16777215
float -> int:      +16777216 -> +16777216
float -> int:      +16777218 -> +16777218
float -> int:    +2147483648 -> +2147483647
float -> int:    +2147483904 -> +2147483647
double -> int:    -2147483649 -> -2147483648
double -> int:    -2147483648 -> -2147483648
double -> int: -2147483647.75 -> -2147483647
double -> int: -2147483647.25 -> -2147483647
double -> int:    -2147483647 -> -2147483647
double -> int:    +2147483647 -> +2147483647
double -> int: +2147483647.25 -> +2147483647
double -> int: +2147483647.75 -> +2147483647
double -> int:    +2147483648 -> +2147483647
double -> int:    +2147483649 -> +2147483647
double -> unsigned:             -1 -> 0
double -> unsigned:             +1 -> 1
double -> unsigned:    +4294967295 -> 4294967295
double -> unsigned: +4294967295.25 -> 4294967295
double -> unsigned: +4294967295.75 -> 4294967295
double -> unsigned:    +4294967296 -> 4294967295
double -> unsigned:    +4294967297 -> 4294967295