Floating point 如何将压缩整数（16.16）定点转换为浮点？_Floating Point_Fixed Point

Floating point 如何将压缩整数（16.16）定点转换为浮点？

floating-point

Floating point 如何将压缩整数（16.16）定点转换为浮点？,floating-point,fixed-point,Floating Point,Fixed Point,如何将“32位有符号定点数字（16.16）”转换为浮点（fixed>>16）+（fixed&0xffff）/65536.0是否正常？那么-2.5呢？和-0.5 还是fixed/65536.0方法正确（PS：有符号定点“-0.5”在内存中看起来怎么样？我假设2的补码32位整数和运算符与C#一样工作如何进行转换？正确且易于理解与上面的正整数等效，但速度较慢，更难读取。你基本上是用分配定律把一个除法分成两个除法，然后用一个位移位写第一个除法对于负整数，fixed&0xffff不提供小数位

如何将“32位有符号定点数字（16.16）”转换为浮点

（fixed>>16）+（fixed&0xffff）/65536.0是否正常？那么-2.5呢？和-0.5
还是fixed/65536.0
方法正确
（PS：有符号定点“-0.5”在内存中看起来怎么样？
我假设2的补码32位整数和运算符与C#一样工作
如何进行转换？
正确且易于理解

与上面的正整数等效，但速度较慢，更难读取。你基本上是用分配定律把一个除法分成两个除法，然后用一个位移位写第一个除法
对于负整数，fixed&0xffff
不提供小数位，因此对于负数是不正确的
查看原始整数-1
，它应该映射到-1/65536
。此代码返回65535/65536


根据编译器的不同，执行以下操作可能会更快：
fixed * (1/65536.0)

但我认为大多数现代编译器已经进行了这种优化
带符号的定点“-0.5”在内存中是什么样子的？
反转转换为：
RoundToInt(float*65536)

设置float=-0.5
为我们提供了：-32768
类固定点utils{
class FixedPointUtils {
  public static final int ONE = 0x10000;

  /**
   * Convert an array of floats to 16.16 fixed-point
   * @param arr The array
   * @return A newly allocated array of fixed-point values.
   */
  public static int[] toFixed(float[] arr) {
    int[] res = new int[arr.length];
    toFixed(arr, res);
    return res;
  }

  /**
   * Convert a float to  16.16 fixed-point representation
   * @param val The value to convert
   * @return The resulting fixed-point representation
   */
  public static int toFixed(float val) {
    return (int)(val * 65536F);
  }

  /**
   * Convert an array of floats to 16.16 fixed-point
   * @param arr The array of floats
   * @param storage The location to store the fixed-point values.
   */
  public static void toFixed(float[] arr, int[] storage)
  {
    for (int i=0;i<storage.length;i++) {
      storage[i] = toFixed(arr[i]);
    }
  }

  /**
   * Convert a 16.16 fixed-point value to floating point
   * @param val The fixed-point value
   * @return The equivalent floating-point value.
   */
  public static float toFloat(int val) {
    return ((float)val)/65536.0f;
  }

  /**
   * Convert an array of 16.16 fixed-point values to floating point
   * @param arr The array to convert
   * @return A newly allocated array of floats.
   */
  public static float[] toFloat(int[] arr) {
    float[] res = new float[arr.length];
    toFloat(arr, res);
    return res;
  }

  /**
   * Convert an array of 16.16 fixed-point values to floating point
   * @param arr The array to convert
   * @param storage Pre-allocated storage for the result.
   */
  public static void toFloat(int[] arr, float[] storage)
  {
    for (int i=0;i<storage.length;i++) {
      storage[i] = toFloat(arr[i]);
    }
  }

}

公共静态最终整数=0x10000；
/**
*将浮点数组转换为16.16定点
*@param arr数组
*@返回新分配的定点值数组。
*/
公共静态int[]toFixed（float[]arr）{
int[]res=新的int[arr.length]；
toFixed（arr，res）；
返回res；
}
/**
*将浮点转换为16.16定点表示法
*@param val要转换的值
*@返回结果的定点表示
*/
公共静态整数固定（浮动值）{
返回（内部）（val*65536F）；
}
/**
*将浮点数组转换为16.16定点
*@param arr浮点数数组
*@param storage存储定点值的位置。
*/
固定的公共静态无效（浮点[]arr，整数[]存储）
{
对于（int i＝0；i＜p），在我读了一个答案后，我写了一个C++函数模板，非常方便。你可以通过小数部分的长度（例如，BMP文件格式使用2.30个不动点数）。如果省略了分数部分长度，函数假定分数和整数部分具有相同的长度。
#include <math.h> // for NaN
#include <limits.h> // for CHAR_BIT = 8

template<class T> inline double fixed_point2double(const T& x, int frac_digits = (CHAR_BIT * sizeof(T)) / 2 )
{
  if (frac_digits >= CHAR_BIT * sizeof(T)) return NAN;
  return double(x) / double( T(1) << frac_digits) );
}

#包含//用于NaN
#包含//字符位=8
模板内联双定点双（常量T&x，整数分形位数=（字符位*大小（T））/2）
{
如果（frac_digits>=CHAR_BIT*sizeof（T））返回NAN；
返回double（x）/double（T（1）=字符位*sizeof（T））返回NAN；
T res=0；
for（int i=0，shift=0；iclass FixedPointUtils {
  public static final int ONE = 0x10000;

  /**
   * Convert an array of floats to 16.16 fixed-point
   * @param arr The array
   * @return A newly allocated array of fixed-point values.
   */
  public static int[] toFixed(float[] arr) {
    int[] res = new int[arr.length];
    toFixed(arr, res);
    return res;
  }

  /**
   * Convert a float to  16.16 fixed-point representation
   * @param val The value to convert
   * @return The resulting fixed-point representation
   */
  public static int toFixed(float val) {
    return (int)(val * 65536F);
  }

  /**
   * Convert an array of floats to 16.16 fixed-point
   * @param arr The array of floats
   * @param storage The location to store the fixed-point values.
   */
  public static void toFixed(float[] arr, int[] storage)
  {
    for (int i=0;i<storage.length;i++) {
      storage[i] = toFixed(arr[i]);
    }
  }

  /**
   * Convert a 16.16 fixed-point value to floating point
   * @param val The fixed-point value
   * @return The equivalent floating-point value.
   */
  public static float toFloat(int val) {
    return ((float)val)/65536.0f;
  }

  /**
   * Convert an array of 16.16 fixed-point values to floating point
   * @param arr The array to convert
   * @return A newly allocated array of floats.
   */
  public static float[] toFloat(int[] arr) {
    float[] res = new float[arr.length];
    toFloat(arr, res);
    return res;
  }

  /**
   * Convert an array of 16.16 fixed-point values to floating point
   * @param arr The array to convert
   * @param storage Pre-allocated storage for the result.
   */
  public static void toFloat(int[] arr, float[] storage)
  {
    for (int i=0;i<storage.length;i++) {
      storage[i] = toFloat(arr[i]);
    }
  }

}

#include <math.h> // for NaN
#include <limits.h> // for CHAR_BIT = 8

template<class T> inline double fixed_point2double(const T& x, int frac_digits = (CHAR_BIT * sizeof(T)) / 2 )
{
  if (frac_digits >= CHAR_BIT * sizeof(T)) return NAN;
  return double(x) / double( T(1) << frac_digits) );
}

#include <math.h> // for NaN
#include <limits.h> // for CHAR_BIT = 8

template<class T> inline double read_little_endian_fixed_point(const unsigned char *x, int frac_digits = (CHAR_BIT * sizeof(T)) / 2)
// ! do not use for single byte types 'T'
{
  if (frac_digits >= CHAR_BIT * sizeof(T)) return NAN;

  T res = 0;

  for (int i = 0, shift = 0; i < sizeof(T); ++i, shift += CHAR_BIT)
    res |= ((T)x[i]) << shift;

  return double(res) / double( T(1) << frac_digits) );
}