流式细胞术FCS文件数据段，线性数据似乎有偏差_C_Bioinformatics

流式细胞术FCS文件数据段，线性数据似乎有偏差

流式细胞术FCS文件数据段，线性数据似乎有偏差,c,bioinformatics,C,Bioinformatics,最终和最后更新（我保证）正如Jonathan Leffler所暗示的，问题的核心是遍历数据。二进制数据在矩阵中“排列”。例如，如果我有3个事件和4个位宽为8的参数，则二进制数据 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 看起来像 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 我有两个for循环I&j，我需要用它来计算偏移量我最初有 (i * PAR * 2) + (j * PnB/8)

最终和最后更新（我保证）

正如Jonathan Leffler所暗示的，问题的核心是遍历数据。二进制数据在矩阵中“排列”。例如，如果我有3个事件和4个位宽为8的参数，则二进制数据

00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00

看起来像

00 00 00 00
00 00 00 00
00 00 00 00
00 00 00 00

我有两个for循环I&j，我需要用它来计算偏移量

我最初有

(i * PAR * 2) + (j * PnB/8)

其中PAR是参数个数，PNB是位宽度，I是从0到总事件，J是从0到PAR。这是不正确的，我不知道我是怎么得到这个公式的

我正在开发一个内部流量分析软件，遇到了一些问题。我用来测试软件的FCS样本数据文件是在MacOS 9 CellQuest上使用FACSCaliber生成的。当我提取FSC-H和SSC-H的数据点时，我不会得到与其他flow软件（即FlowJo）相同的结果。我了解MacOS 9 CellQuest上生成的数据是按大端顺序存储的，并相信我正确地转换了数据：

for (int i = 0; i < params[j-1].PnB/8; ++i)
{
    lebyte[i] = (bytes[(params[j-1].PnB/8)-1-i] & 0xff) << i*8u;
    cx |= lebyte[i];
}

但我还是得到了一个扭曲的数据。这可能与数据的转换方式有关。我只是在网上找不到任何信息，我相信FlowJo的制造商不会愿意分享这个秘密；）。我会继续找，看看能找到什么

更新3

抱歉，将其延长，但其他信息：

typedef struct _fcs_parameter {
    double f1;          // logarithmic decade
    double f2;          // minimum value on log scale
    unsigned int PnB;   // bitwidth
    unsigned int PnR;   // range
    fcs_events *events; // event data
    char *sname;        // short name
    char *lname;        // filter name
} fcs_parameter;

仔细查看您显示的

memset（）

和

memcpy（）

行。因为您没有显示如何设置

，也没有显示

params

结构数组中的内容，所以解释起来有点困难。但是，如果您在

PnB

成员中有任何不同的大小，那么我认为您的

memcpy（）

偏移量计算是伪造的

下面是一些代码和它的输出；您必须根据您的场景对其进行一些调整。最后一节尝试模拟

memset/memcpy

代码，因为没有对所显示的许多变量进行解释。它包括一种似乎更有意义的替代解释

假设您有C99，您可以将一些函数变成

静态内联

。还有其他一些代码也假定为C99。把它改成C89并不难，但我不会为你这么做

#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

static uint16_t convert_uint16be(uint8_t const *bytes)
{
    uint16_t r = (bytes[0] << 8) | bytes[1];
    return r;
}

static uint32_t convert_uint32be(uint8_t const *bytes)
{
    uint32_t r = (((((bytes[0] << 8) | bytes[1]) << 8) | bytes[2]) << 8) | bytes[3];
    return r;
}

static void print16(uint8_t const *bytes)
{
  uint16_t r1 = convert_uint16be(bytes);
  int16_t  r2 = convert_uint16be(bytes);
  printf("0x%.2X 0x%.2X = 0x%.4" PRIX16 " = %6" PRId16 "\n", bytes[0], bytes[1], r1, r2);
}

static void print32(uint8_t const *bytes)
{
  uint32_t r1 = convert_uint32be(bytes);
  int32_t  r2 = convert_uint32be(bytes);
  printf("0x%.2X 0x%.2X 0x%.2X 0x%.2X = 0x%.8" PRIX32 " = %11" PRId32 "\n", bytes[0], bytes[1], bytes[2], bytes[3], r1, r2);
}

int main(void)
{
    int PnB = 16; // bitwidth of data stored for a specific channel value
    // for example the data value for sample A is stored in 16 bits.
    char bytes[PnB/8];
    unsigned int lebyte[PnB/8];
    unsigned int cx = 0;
    unsigned int b0, b1;

    /*  |  [0] |  [1] |
     *  | 0xff | 0x03 |
     */
    bytes[0] = 0xff;
    bytes[1] = 0x03;

    // in big endian print out
    b0 = (bytes[0] & 0xff) << 8u;
    b1 = bytes[1] & 0xff;
    cx = b0 | b1;

    printf("%5d = 0x%.4X\n", cx, cx);

    // convert to little endian
    cx = 0;
    for (int i = 0; i < PnB/8; ++i)
    {
        lebyte[i] = (bytes[i] & 0xff) << i*8u;
        cx |= lebyte[i];
    }
    printf("%5d = 0x%.4X\n", cx, cx);

    print16((uint8_t *)bytes);

    uint8_t data[] =
    {
      0x00, 0x00, 0x00, 0x00,
      0x00, 0x00, 0x03, 0xFF,
      0x00, 0x00, 0xFF, 0xFF,
      0x08, 0x08, 0x09, 0xC0,
      0x80, 0x80, 0x90, 0x0C,
      0xFF, 0xFF, 0xED, 0xBC,
    };
    int data_size = sizeof(data) / sizeof(data[0]);

    for (int i = 0; i < data_size; i += 2)
      print16(&data[i]);
    for (int i = 0; i < data_size; i += 4)
      print32(&data[i]);

    {
      struct { int PnB; } params[] = { { 16 }, { 16 }, { 32 }, { 16 }, { 16 }, };
      int num_params = sizeof(params) / sizeof(params[0]);
      uint8_t value[4];
      int i = 0;
      int num = num_params;
      int offset = 0;
      for (int j = 1; j <= num; j++)
      {
        memset(value, '\0', sizeof(char)*params[j-1].PnB/8);
        printf("i = %2d; j = %2d; offset = %2d; calc = %2d; size = %2d\n",
               i, j, offset, ((i*7*2)+(j*params[j-1].PnB/8)), params[j-1].PnB/8);
        /* The calculation works plausibly when all params[n].PnB are the same
         * size, but not otherwise
         */
        memcpy(value, data+((i*7*2)+(j*params[j-1].PnB/8)), params[j-1].PnB/8);
        if (params[j].PnB == 16)
          print16(value);
        else
          print32(value);
        memcpy(value, data+offset, params[j-1].PnB/8);
        if (params[j].PnB == 16)
          print16(value);
        else
          print32(value);
        offset += params[j-1].PnB/8;
      }
    }

    return 0;
}

问题是我用来计算偏移量的公式

我应该使用以下方法：

for (int i = 0; i < data->TOT; ++i)
    {
        for (int j = 0; j < data->PAR; ++j)
        {

        // code removed for brevity

        memset(bytes, '\0', sizeof(char)*params[j].PnB/8);
        memcpy(bytes, databuf+((i*data->PAR*params[j].PnB/8)+(j*params[j].PnB/8)), params[j].PnB/8);

        // more code here
        }
}

for（int i=0；iTOT；++i）
{
对于（int j=0；jPAR；++j）
{
//为简洁起见，删除了代码
memset（字节，'\0'，大小（字符）*参数[j].PnB/8）；
memcpy（bytes，databuf+（（i*data->PAR*params[j].PnB/8）+（j*params[j].PnB/8）），params[j].PnB/8）；
//这里有更多代码
}
}

谢谢你的帮助！如果您没有提到PnB问题，我不会意识到问题是我计算偏移量的方式。

我想我们需要看到更多的代码。您的转换功能不明显。以大端顺序处理数据的常规方法是通过适当数量的字节进行单循环，根据需要生成32位或16位值，必要时担心符号等。你能给出一个十六进制转储，比如说32字节的数据吗？并展示如何设置PnB（以及PnR在哪里发挥作用），等等。因此我对原始源代码进行了更正（在原始帖子中进行了更改以节省空间）。十六进制输出（例如）字节[0]=0x04和字节[1]=FFFFFF04，在另一种情况下分别为0x00和0xCD。我想知道我是否读错了数据。我需要花点时间来检查一下。最好是对数据的前32个字节进行十六进制转储，并指示哪些字节来自16位整数，哪些字节来自32位整数。例如：

0x0000:7F 45 4C 46 02 01 00 00 00

和

0x0010:02 00 3E 00 01 00 00 00 E4 03 40 00 00

是两行16字节（通常来自ELF二进制）。我没有在那里标记边界…可能：

0x0000:[7F 45 4C 46][02 01][01 00][00 00 00 00][00 00 00 00][/code>作为一个半合理的f'冲洗。这里是文件中的前7组2字节（14字节）。比特宽度为16，格式应为big-endian（至少标头指示为big-endian）<代码>02 88 01 81 00 00 00 00

。我（根据标准）标记了边界，如so

[02 88][01 81][00 00][00][00 00][00 00][00][00][00][00][code>。如果我错了，请纠正我，但是如果它的大端号0288
将是8802
对吗？由于PnR=1024，因此超出了此文件的范围。如果数据是大端，则（十六进制）0288
为0x0288
。如果数据是little endian，则为0x8802
。阅读大端词比阅读小端词容易得多。方括号表示法是一项即时发明，而不是标准。如果存在16位和32位值的混合，它只允许进行明确的分组。对于统一的尺寸（全部16位），不需要太多的标记。谢谢，我会研究一下，然后再给你回复。在我的示例中，PnB是相同的（16），但很高兴知道，因为标准确实提到PnB可以在不同的通道之间变化。通过查看您的示例，我还意识到（I*7*2）+（j*params[j-1].PnB/8）
应该是（I*data->PAR*2）+（j*params[j-1].PnB/8）
其中data->PAR包含流文件中的参数数量。我使用的示例文件是7，所以我有一个静态数字。流数据是一个数字矩阵（假设列表存储），并按参数显示事件。我使用的示例文件是7个参数的40000个事件。我得到的肯定是一些时髦的东西。尽管在阴性和阳性样本之间
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

static uint16_t convert_uint16be(uint8_t const *bytes)
{
    uint16_t r = (bytes[0] << 8) | bytes[1];
    return r;
}

static uint32_t convert_uint32be(uint8_t const *bytes)
{
    uint32_t r = (((((bytes[0] << 8) | bytes[1]) << 8) | bytes[2]) << 8) | bytes[3];
    return r;
}

static void print16(uint8_t const *bytes)
{
  uint16_t r1 = convert_uint16be(bytes);
  int16_t  r2 = convert_uint16be(bytes);
  printf("0x%.2X 0x%.2X = 0x%.4" PRIX16 " = %6" PRId16 "\n", bytes[0], bytes[1], r1, r2);
}

static void print32(uint8_t const *bytes)
{
  uint32_t r1 = convert_uint32be(bytes);
  int32_t  r2 = convert_uint32be(bytes);
  printf("0x%.2X 0x%.2X 0x%.2X 0x%.2X = 0x%.8" PRIX32 " = %11" PRId32 "\n", bytes[0], bytes[1], bytes[2], bytes[3], r1, r2);
}

int main(void)
{
    int PnB = 16; // bitwidth of data stored for a specific channel value
    // for example the data value for sample A is stored in 16 bits.
    char bytes[PnB/8];
    unsigned int lebyte[PnB/8];
    unsigned int cx = 0;
    unsigned int b0, b1;

    /*  |  [0] |  [1] |
     *  | 0xff | 0x03 |
     */
    bytes[0] = 0xff;
    bytes[1] = 0x03;

    // in big endian print out
    b0 = (bytes[0] & 0xff) << 8u;
    b1 = bytes[1] & 0xff;
    cx = b0 | b1;

    printf("%5d = 0x%.4X\n", cx, cx);

    // convert to little endian
    cx = 0;
    for (int i = 0; i < PnB/8; ++i)
    {
        lebyte[i] = (bytes[i] & 0xff) << i*8u;
        cx |= lebyte[i];
    }
    printf("%5d = 0x%.4X\n", cx, cx);

    print16((uint8_t *)bytes);

    uint8_t data[] =
    {
      0x00, 0x00, 0x00, 0x00,
      0x00, 0x00, 0x03, 0xFF,
      0x00, 0x00, 0xFF, 0xFF,
      0x08, 0x08, 0x09, 0xC0,
      0x80, 0x80, 0x90, 0x0C,
      0xFF, 0xFF, 0xED, 0xBC,
    };
    int data_size = sizeof(data) / sizeof(data[0]);

    for (int i = 0; i < data_size; i += 2)
      print16(&data[i]);
    for (int i = 0; i < data_size; i += 4)
      print32(&data[i]);

    {
      struct { int PnB; } params[] = { { 16 }, { 16 }, { 32 }, { 16 }, { 16 }, };
      int num_params = sizeof(params) / sizeof(params[0]);
      uint8_t value[4];
      int i = 0;
      int num = num_params;
      int offset = 0;
      for (int j = 1; j <= num; j++)
      {
        memset(value, '\0', sizeof(char)*params[j-1].PnB/8);
        printf("i = %2d; j = %2d; offset = %2d; calc = %2d; size = %2d\n",
               i, j, offset, ((i*7*2)+(j*params[j-1].PnB/8)), params[j-1].PnB/8);
        /* The calculation works plausibly when all params[n].PnB are the same
         * size, but not otherwise
         */
        memcpy(value, data+((i*7*2)+(j*params[j-1].PnB/8)), params[j-1].PnB/8);
        if (params[j].PnB == 16)
          print16(value);
        else
          print32(value);
        memcpy(value, data+offset, params[j-1].PnB/8);
        if (params[j].PnB == 16)
          print16(value);
        else
          print32(value);
        offset += params[j-1].PnB/8;
      }
    }

    return 0;
}

65283 = 0xFF03
 1023 = 0x03FF
0xFF 0x03 = 0xFF03 =   -253
0x00 0x00 = 0x0000 =      0
0x00 0x00 = 0x0000 =      0
0x00 0x00 = 0x0000 =      0
0x03 0xFF = 0x03FF =   1023
0x00 0x00 = 0x0000 =      0
0xFF 0xFF = 0xFFFF =     -1
0x08 0x08 = 0x0808 =   2056
0x09 0xC0 = 0x09C0 =   2496
0x80 0x80 = 0x8080 = -32640
0x90 0x0C = 0x900C = -28660
0xFF 0xFF = 0xFFFF =     -1
0xED 0xBC = 0xEDBC =  -4676
0x00 0x00 0x00 0x00 = 0x00000000 =           0
0x00 0x00 0x03 0xFF = 0x000003FF =        1023
0x00 0x00 0xFF 0xFF = 0x0000FFFF =       65535
0x08 0x08 0x09 0xC0 = 0x080809C0 =   134744512
0x80 0x80 0x90 0x0C = 0x8080900C = -2139058164
0xFF 0xFF 0xED 0xBC = 0xFFFFEDBC =       -4676
i =  0; j =  1; offset =  0; calc =  2; size =  2
0x00 0x00 = 0x0000 =      0
0x00 0x00 = 0x0000 =      0
i =  0; j =  2; offset =  2; calc =  4; size =  2
0x00 0x00 0x00 0x00 = 0x00000000 =           0
0x00 0x00 0x00 0x00 = 0x00000000 =           0
i =  0; j =  3; offset =  4; calc = 12; size =  4
0x08 0x08 = 0x0808 =   2056
0x00 0x00 = 0x0000 =      0
i =  0; j =  4; offset =  8; calc =  8; size =  2
0x00 0x00 = 0x0000 =      0
0x00 0x00 = 0x0000 =      0
i =  0; j =  5; offset = 10; calc = 10; size =  2
0xFF 0xFF 0x03 0xFF = 0xFFFF03FF =      -64513
0xFF 0xFF 0x03 0xFF = 0xFFFF03FF =      -64513

for (int i = 0; i < data->TOT; ++i)
    {
        for (int j = 0; j < data->PAR; ++j)
        {

        // code removed for brevity

        memset(bytes, '\0', sizeof(char)*params[j].PnB/8);
        memcpy(bytes, databuf+((i*data->PAR*params[j].PnB/8)+(j*params[j].PnB/8)), params[j].PnB/8);

        // more code here
        }
}