C++ 与C+中的浮点值最接近的整数+;03
对于某些整型,即使浮点值远远超出整型的可表示范围,如何找到最接近浮点型某个值的值 或者更准确地说: 设C++ 与C+中的浮点值最接近的整数+;03,c++,floating-point,type-conversion,c++03,integral,C++,Floating Point,Type Conversion,C++03,Integral,对于某些整型,即使浮点值远远超出整型的可表示范围,如何找到最接近浮点型某个值的值 或者更准确地说: 设F为浮点类型(可能是float、double或long double)。 设I为整数类型 假设F和I都具有std::numeric_limits的有效专门化 给定一个F的可表示值,并且仅使用C++03,如何找到I的最接近的可表示值 我追求的是一个纯粹的、高效的、线程安全的解决方案,这个解决方案除了C++03所保证的以外,对平台没有任何假设 如果不存在这样的解决方案,是否可以使用C99/C++11
F
为浮点类型(可能是float
、double
或long double
)。
设I
为整数类型
假设F
和I
都具有std::numeric_limits
的有效专门化
给定一个F
的可表示值,并且仅使用C++03,如何找到I
的最接近的可表示值
我追求的是一个纯粹的、高效的、线程安全的解决方案,这个解决方案除了C++03所保证的以外,对平台没有任何假设
如果不存在这样的解决方案,是否可以使用C99/C++11的新功能找到一个解决方案
使用C99的lround()
似乎有问题,因为报告域错误的方式非常简单。这些域错误能否以可移植且线程安全的方式捕获
注意:我知道Boost可能通过其Boost::numerics::converter
模板提供了一个解决方案,但由于其高度复杂性和冗长性,我无法从中提取要点,因此我无法检查他们的解决方案是否做出了超出C++03的假设
当f
的整数部分不是I
的可表示值时,由于C++03未定义I(f)
的结果,以下简单方法失败
template<class I, class F> I closest_int(F f)
{
return I(f);
}
这次I(f)
将始终有一个定义良好的结果,但是,由于f(std::numeric\u limits::max())
可能比std::numeric\u limits::max()
小得多,我们可能会返回std::numeric\u limits::max()
用于小于std::numeric\u limits::max()的多个整数值的浮点值。
请注意,所有的问题都会出现,因为转换F(i)
是向上舍入,还是向下舍入到最接近的可表示浮点值尚未定义
以下是C++03(4.9浮点积分转换)中的相关部分:
整数类型或枚举类型的右值可以转换为浮点右值
类型。如果可能的话,结果是准确的。否则,这是一个由实现定义的选择,可以选择下一个
较低或较高的可表示值
对于大于等于64位的基数2(二进制)浮点类型和整数类型,我有一个实用的解决方案。见下文。评论应该是明确的。输出如下
// file: f2i.cpp
//
// compiled with MinGW x86 (gcc version 4.6.2) as:
// g++ -Wall -O2 -std=c++03 f2i.cpp -o f2i.exe
#include <iostream>
#include <iomanip>
#include <limits>
using namespace std;
template<class I, class F> I truncAndCap(F f)
{
/*
This function converts (by truncating the
fractional part) the floating-point value f (of type F)
into an integer value (of type I), avoiding undefined
behavior by returning std::numeric_limits<I>::min() and
std::numeric_limits<I>::max() when f is too small or
too big to be converted to type I directly.
2 problems:
- F may fail to convert to I,
which is undefined behavior and we want to avoid that.
- I may not convert exactly into F
- Direct I & F comparison fails because of I to F promotion,
which can be inexact.
This solution is for the most practical case when I and F
are radix-2 (binary) integer and floating-point types.
*/
int Idigits = numeric_limits<I>::digits;
int Isigned = numeric_limits<I>::is_signed;
/*
Calculate cutOffMax = 2 ^ std::numeric_limits<I>::digits
(where ^ denotes exponentiation) as a value of type F.
We assume that F is a radix-2 (binary) floating-point type AND
it has a big enough exponent part to hold the value of
std::numeric_limits<I>::digits.
FLT_MAX_10_EXP/DBL_MAX_10_EXP/LDBL_MAX_10_EXP >= 37
(guaranteed per C++ standard from 2003/C standard from 1999)
corresponds to log2(1e37) ~= 122, so the type I can contain
up to 122 bits. In practice, integers longer than 64 bits
are extremely rare (if existent at all), especially on old systems
of the 2003 C++ standard's time.
*/
const F cutOffMax = F(I(1) << Idigits / 2) * F(I(1) << (Idigits / 2 + Idigits % 2));
if (f >= cutOffMax)
return numeric_limits<I>::max();
/*
Calculate cutOffMin = - 2 ^ std::numeric_limits<I>::digits
(where ^ denotes exponentiation) as a value of type F for
signed I's OR cutOffMin = 0 for unsigned I's in a similar fashion.
*/
const F cutOffMin = Isigned ? -F(I(1) << Idigits / 2) * F(I(1) << (Idigits / 2 + Idigits % 2)) : 0;
if (f <= cutOffMin)
return numeric_limits<I>::min();
/*
Mathematically, we may still have a little problem (2 cases):
cutOffMin < f < std::numeric_limits<I>::min()
srd::numeric_limits<I>::max() < f < cutOffMax
These cases are only possible when f isn't a whole number, when
it's either std::numeric_limits<I>::min() - value in the range (0,1)
or std::numeric_limits<I>::max() + value in the range (0,1).
We can ignore this altogether because converting f to type I is
guaranteed to truncate the fractional part off, and therefore
I(f) will always be in the range
[std::numeric_limits<I>::min(), std::numeric_limits<I>::max()].
*/
return I(f);
}
template<class I, class F> void test(const char* msg, F f)
{
I i = truncAndCap<I,F>(f);
cout <<
msg <<
setiosflags(ios_base::showpos) <<
setw(14) << setprecision(12) <<
f << " -> " <<
i <<
resetiosflags(ios_base::showpos) <<
endl;
}
#define TEST(I,F,VAL) \
test<I,F>(#F " -> " #I ": ", VAL);
int main()
{
TEST(short, float, -1.75f);
TEST(short, float, -1.25f);
TEST(short, float, +0.00f);
TEST(short, float, +1.25f);
TEST(short, float, +1.75f);
TEST(short, float, -32769.00f);
TEST(short, float, -32768.50f);
TEST(short, float, -32768.00f);
TEST(short, float, -32767.75f);
TEST(short, float, -32767.25f);
TEST(short, float, -32767.00f);
TEST(short, float, -32766.00f);
TEST(short, float, +32766.00f);
TEST(short, float, +32767.00f);
TEST(short, float, +32767.25f);
TEST(short, float, +32767.75f);
TEST(short, float, +32768.00f);
TEST(short, float, +32768.50f);
TEST(short, float, +32769.00f);
TEST(int, float, -2147483904.00f);
TEST(int, float, -2147483648.00f);
TEST(int, float, -16777218.00f);
TEST(int, float, -16777216.00f);
TEST(int, float, -16777215.00f);
TEST(int, float, +16777215.00f);
TEST(int, float, +16777216.00f);
TEST(int, float, +16777218.00f);
TEST(int, float, +2147483648.00f);
TEST(int, float, +2147483904.00f);
TEST(int, double, -2147483649.00);
TEST(int, double, -2147483648.00);
TEST(int, double, -2147483647.75);
TEST(int, double, -2147483647.25);
TEST(int, double, -2147483647.00);
TEST(int, double, +2147483647.00);
TEST(int, double, +2147483647.25);
TEST(int, double, +2147483647.75);
TEST(int, double, +2147483648.00);
TEST(int, double, +2147483649.00);
TEST(unsigned, double, -1.00);
TEST(unsigned, double, +1.00);
TEST(unsigned, double, +4294967295.00);
TEST(unsigned, double, +4294967295.25);
TEST(unsigned, double, +4294967295.75);
TEST(unsigned, double, +4294967296.00);
TEST(unsigned, double, +4294967297.00);
return 0;
}
写得很好的问题。我希望它们都是这样。@AlexeyFrunze我想要'float->int',然而,在我微弱的尝试中,我将最大和最小整数转换成了float,引用的意思是说明后一种转换的相反方向。我将尝试进行编辑,使其更加清晰。@AlexeyFrunze您是否再次删除了您的问题?或者我搞砸了什么?抱歉,我在理解问题后删除了评论。我知道这不是你的问题的核心,但如果你想要最接近的整数而不是截断的整数部分,你不应该使用I(f+0.5)吗?我收回那条[评论]。汇编输出看起来合理。起初,编译器将调用内联到
truncAndCap()
,这就是为什么我在转换代码附近看到了很多不相关的东西(与std::cout
相关)。添加-fno inline
表明truncAndCap()
很短。
template<class I, class F> I closest_int(F f)
{
if (f <= std::numeric_limits<I>::min()) return std::numeric_limits<I>::min();
if (std::numeric_limits<I>::max() <= f) return std::numeric_limits<I>::max();
return I(f);
}
// file: f2i.cpp
//
// compiled with MinGW x86 (gcc version 4.6.2) as:
// g++ -Wall -O2 -std=c++03 f2i.cpp -o f2i.exe
#include <iostream>
#include <iomanip>
#include <limits>
using namespace std;
template<class I, class F> I truncAndCap(F f)
{
/*
This function converts (by truncating the
fractional part) the floating-point value f (of type F)
into an integer value (of type I), avoiding undefined
behavior by returning std::numeric_limits<I>::min() and
std::numeric_limits<I>::max() when f is too small or
too big to be converted to type I directly.
2 problems:
- F may fail to convert to I,
which is undefined behavior and we want to avoid that.
- I may not convert exactly into F
- Direct I & F comparison fails because of I to F promotion,
which can be inexact.
This solution is for the most practical case when I and F
are radix-2 (binary) integer and floating-point types.
*/
int Idigits = numeric_limits<I>::digits;
int Isigned = numeric_limits<I>::is_signed;
/*
Calculate cutOffMax = 2 ^ std::numeric_limits<I>::digits
(where ^ denotes exponentiation) as a value of type F.
We assume that F is a radix-2 (binary) floating-point type AND
it has a big enough exponent part to hold the value of
std::numeric_limits<I>::digits.
FLT_MAX_10_EXP/DBL_MAX_10_EXP/LDBL_MAX_10_EXP >= 37
(guaranteed per C++ standard from 2003/C standard from 1999)
corresponds to log2(1e37) ~= 122, so the type I can contain
up to 122 bits. In practice, integers longer than 64 bits
are extremely rare (if existent at all), especially on old systems
of the 2003 C++ standard's time.
*/
const F cutOffMax = F(I(1) << Idigits / 2) * F(I(1) << (Idigits / 2 + Idigits % 2));
if (f >= cutOffMax)
return numeric_limits<I>::max();
/*
Calculate cutOffMin = - 2 ^ std::numeric_limits<I>::digits
(where ^ denotes exponentiation) as a value of type F for
signed I's OR cutOffMin = 0 for unsigned I's in a similar fashion.
*/
const F cutOffMin = Isigned ? -F(I(1) << Idigits / 2) * F(I(1) << (Idigits / 2 + Idigits % 2)) : 0;
if (f <= cutOffMin)
return numeric_limits<I>::min();
/*
Mathematically, we may still have a little problem (2 cases):
cutOffMin < f < std::numeric_limits<I>::min()
srd::numeric_limits<I>::max() < f < cutOffMax
These cases are only possible when f isn't a whole number, when
it's either std::numeric_limits<I>::min() - value in the range (0,1)
or std::numeric_limits<I>::max() + value in the range (0,1).
We can ignore this altogether because converting f to type I is
guaranteed to truncate the fractional part off, and therefore
I(f) will always be in the range
[std::numeric_limits<I>::min(), std::numeric_limits<I>::max()].
*/
return I(f);
}
template<class I, class F> void test(const char* msg, F f)
{
I i = truncAndCap<I,F>(f);
cout <<
msg <<
setiosflags(ios_base::showpos) <<
setw(14) << setprecision(12) <<
f << " -> " <<
i <<
resetiosflags(ios_base::showpos) <<
endl;
}
#define TEST(I,F,VAL) \
test<I,F>(#F " -> " #I ": ", VAL);
int main()
{
TEST(short, float, -1.75f);
TEST(short, float, -1.25f);
TEST(short, float, +0.00f);
TEST(short, float, +1.25f);
TEST(short, float, +1.75f);
TEST(short, float, -32769.00f);
TEST(short, float, -32768.50f);
TEST(short, float, -32768.00f);
TEST(short, float, -32767.75f);
TEST(short, float, -32767.25f);
TEST(short, float, -32767.00f);
TEST(short, float, -32766.00f);
TEST(short, float, +32766.00f);
TEST(short, float, +32767.00f);
TEST(short, float, +32767.25f);
TEST(short, float, +32767.75f);
TEST(short, float, +32768.00f);
TEST(short, float, +32768.50f);
TEST(short, float, +32769.00f);
TEST(int, float, -2147483904.00f);
TEST(int, float, -2147483648.00f);
TEST(int, float, -16777218.00f);
TEST(int, float, -16777216.00f);
TEST(int, float, -16777215.00f);
TEST(int, float, +16777215.00f);
TEST(int, float, +16777216.00f);
TEST(int, float, +16777218.00f);
TEST(int, float, +2147483648.00f);
TEST(int, float, +2147483904.00f);
TEST(int, double, -2147483649.00);
TEST(int, double, -2147483648.00);
TEST(int, double, -2147483647.75);
TEST(int, double, -2147483647.25);
TEST(int, double, -2147483647.00);
TEST(int, double, +2147483647.00);
TEST(int, double, +2147483647.25);
TEST(int, double, +2147483647.75);
TEST(int, double, +2147483648.00);
TEST(int, double, +2147483649.00);
TEST(unsigned, double, -1.00);
TEST(unsigned, double, +1.00);
TEST(unsigned, double, +4294967295.00);
TEST(unsigned, double, +4294967295.25);
TEST(unsigned, double, +4294967295.75);
TEST(unsigned, double, +4294967296.00);
TEST(unsigned, double, +4294967297.00);
return 0;
}
float -> short: -1.75 -> -1
float -> short: -1.25 -> -1
float -> short: +0 -> +0
float -> short: +1.25 -> +1
float -> short: +1.75 -> +1
float -> short: -32769 -> -32768
float -> short: -32768.5 -> -32768
float -> short: -32768 -> -32768
float -> short: -32767.75 -> -32767
float -> short: -32767.25 -> -32767
float -> short: -32767 -> -32767
float -> short: -32766 -> -32766
float -> short: +32766 -> +32766
float -> short: +32767 -> +32767
float -> short: +32767.25 -> +32767
float -> short: +32767.75 -> +32767
float -> short: +32768 -> +32767
float -> short: +32768.5 -> +32767
float -> short: +32769 -> +32767
float -> int: -2147483904 -> -2147483648
float -> int: -2147483648 -> -2147483648
float -> int: -16777218 -> -16777218
float -> int: -16777216 -> -16777216
float -> int: -16777215 -> -16777215
float -> int: +16777215 -> +16777215
float -> int: +16777216 -> +16777216
float -> int: +16777218 -> +16777218
float -> int: +2147483648 -> +2147483647
float -> int: +2147483904 -> +2147483647
double -> int: -2147483649 -> -2147483648
double -> int: -2147483648 -> -2147483648
double -> int: -2147483647.75 -> -2147483647
double -> int: -2147483647.25 -> -2147483647
double -> int: -2147483647 -> -2147483647
double -> int: +2147483647 -> +2147483647
double -> int: +2147483647.25 -> +2147483647
double -> int: +2147483647.75 -> +2147483647
double -> int: +2147483648 -> +2147483647
double -> int: +2147483649 -> +2147483647
double -> unsigned: -1 -> 0
double -> unsigned: +1 -> 1
double -> unsigned: +4294967295 -> 4294967295
double -> unsigned: +4294967295.25 -> 4294967295
double -> unsigned: +4294967295.75 -> 4294967295
double -> unsigned: +4294967296 -> 4294967295
double -> unsigned: +4294967297 -> 4294967295