Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/c/57.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C 使用数据包MMAP和数据包TX环发送数据比;“正常”;(无)_C_Performance_Sockets_Network Programming_Circular Buffer - Fatal编程技术网

C 使用数据包MMAP和数据包TX环发送数据比;“正常”;(无)

C 使用数据包MMAP和数据包TX环发送数据比;“正常”;(无),c,performance,sockets,network-programming,circular-buffer,C,Performance,Sockets,Network Programming,Circular Buffer,我正在用C编写一个流量生成器,使用PACKET_MMAP socket选项创建一个环形缓冲区,通过原始套接字发送数据。环形缓冲区充满要发送的以太网帧,并调用sendto。环形缓冲区的全部内容通过套接字发送,这应该比在内存中使用缓冲区并对缓冲区中需要发送的每一帧重复调用sendto提供更高的性能 当不使用数据包映射时,在调用sendto时,单个帧从用户地内存中的缓冲区复制到内核内存中的SK buf,然后内核必须将数据包复制到NIC访问的内存中进行DMA,并向NIC发送信号,将该帧DMA到其自己的硬

我正在用C编写一个流量生成器,使用PACKET_MMAP socket选项创建一个环形缓冲区,通过原始套接字发送数据。环形缓冲区充满要发送的以太网帧,并调用
sendto
。环形缓冲区的全部内容通过套接字发送,这应该比在内存中使用缓冲区并对缓冲区中需要发送的每一帧重复调用
sendto
提供更高的性能

当不使用数据包映射时,在调用
sendto
时,单个帧从用户地内存中的缓冲区复制到内核内存中的SK buf,然后内核必须将数据包复制到NIC访问的内存中进行DMA,并向NIC发送信号,将该帧DMA到其自己的硬件缓冲区中,并将其排队等待传输。当使用PACKET_MMAP socket选项时,mmapped内存由应用程序分配并链接到原始套接字。应用程序将数据包放入mmapped缓冲区,调用
sendto
,内核不必将数据包复制到SK buf中,它可以直接从mmapped缓冲区读取数据包。此外,可以从环形缓冲区读取数据包的“块”,而不是单个数据包/帧。因此,性能的提高是一次sys调用来复制多个帧,而对每个帧执行一次较少的复制操作来将其放入NIC硬件缓冲区

当我将使用数据包映射的套接字的性能与“正常”套接字(包含单个数据包的字符缓冲区)进行比较时,根本没有性能优势。这是为什么?当在Tx模式下使用PACKET_MMAP时,每个环块只能放入一个帧(而不是像Rx模式那样每个环块有多个帧),但是我正在创建256个块,所以我们应该在单个
发送到
呼叫中发送256个帧,对吗

使用数据包MMAP时的性能,
main()
调用
PACKET\u tx\u MMAP()

没有数据包映射的性能,
main()
调用
PACKET\u tx()

packet\u tx()
函数似乎比
packet\u tx\u mmap()
函数略快,但它也略短,因此我认为性能的最小提高只是
packet\u tx
中存在的代码行数略少。在我看来,这两个函数的性能几乎相同,为什么?为什么PACKET_MMAP没有更快,据我所知,应该有更少的sys调用和拷贝

void *packet_tx_mmap(void* thd_opt_p) {

    struct thd_opt *thd_opt = thd_opt_p;
    int32_t sock_fd = setup_socket_mmap(thd_opt_p);
    if (sock_fd == EXIT_FAILURE) exit(EXIT_FAILURE);

    struct tpacket2_hdr *hdr;
    uint8_t *data;
    int32_t send_ret = 0;
    uint16_t i;

    while(1) {

        for (i = 0; i < thd_opt->tpacket_req.tp_frame_nr; i += 1) {

            hdr = (void*)(thd_opt->mmap_buf + (thd_opt->tpacket_req.tp_frame_size * i));
            data = (uint8_t*)(hdr + TPACKET_ALIGN(TPACKET2_HDRLEN));

            memcpy(data, thd_opt->tx_buffer, thd_opt->frame_size);
            hdr->tp_len = thd_opt->frame_size;
            hdr->tp_status = TP_STATUS_SEND_REQUEST;

        }

        send_ret = sendto(sock_fd, NULL, 0, 0, NULL, 0);
        if (send_ret == -1) {
            perror("sendto error");
            exit(EXIT_FAILURE);
        }

        thd_opt->tx_pkts  += thd_opt->tpacket_req.tp_frame_nr;
        thd_opt->tx_bytes += send_ret;

    }

    return NULL;

}
插座设置功能的唯一区别如下所示,但本质上是设置插座RX_环或插座TX_环的要求:

// Set the TPACKET version, v2 for Tx and v3 for Rx
// (v2 supports packet level send(), v3 supports block level read())
int32_t sock_pkt_ver = -1;

if(thd_opt->sk_mode == SKT_TX) {
    static const int32_t sock_ver = TPACKET_V2;
    sock_pkt_ver = setsockopt(sock_fd, SOL_PACKET, PACKET_VERSION, &sock_ver, sizeof(sock_ver));
} else {
    static const int32_t sock_ver = TPACKET_V3;
    sock_pkt_ver = setsockopt(sock_fd, SOL_PACKET, PACKET_VERSION, &sock_ver, sizeof(sock_ver));
}

if (sock_pkt_ver < 0) {
    perror("Can't set socket packet version");
    return EXIT_FAILURE;
}


memset(&thd_opt->tpacket_req, 0, sizeof(struct tpacket_req));
memset(&thd_opt->tpacket_req3, 0, sizeof(struct tpacket_req3));

//thd_opt->block_sz = 4096; // These are set else where
//thd_opt->block_nr = 256;
//thd_opt->block_frame_sz = 4096;

int32_t sock_mmap_ring = -1;
if (thd_opt->sk_mode == SKT_TX) {

    thd_opt->tpacket_req.tp_block_size = thd_opt->block_sz;
    thd_opt->tpacket_req.tp_frame_size = thd_opt->block_sz;
    thd_opt->tpacket_req.tp_block_nr = thd_opt->block_nr;
    // Allocate per-frame blocks in Tx mode (TPACKET_V2)
    thd_opt->tpacket_req.tp_frame_nr = thd_opt->block_nr;

    sock_mmap_ring = setsockopt(sock_fd, SOL_PACKET , PACKET_TX_RING , (void*)&thd_opt->tpacket_req , sizeof(struct tpacket_req));

} else {

    thd_opt->tpacket_req3.tp_block_size = thd_opt->block_sz;
    thd_opt->tpacket_req3.tp_frame_size = thd_opt->block_frame_sz;
    thd_opt->tpacket_req3.tp_block_nr = thd_opt->block_nr;
    thd_opt->tpacket_req3.tp_frame_nr = (thd_opt->block_sz * thd_opt->block_nr) / thd_opt->block_frame_sz;
    thd_opt->tpacket_req3.tp_retire_blk_tov   = 1;
    thd_opt->tpacket_req3.tp_feature_req_word = 0;

    sock_mmap_ring = setsockopt(sock_fd, SOL_PACKET , PACKET_RX_RING , (void*)&thd_opt->tpacket_req3 , sizeof(thd_opt->tpacket_req3));
}

if (sock_mmap_ring == -1) {
    perror("Can't enable Tx/Rx ring for socket");
    return EXIT_FAILURE;
}


thd_opt->mmap_buf = NULL;
thd_opt->rd = NULL;

if (thd_opt->sk_mode == SKT_TX) {

    thd_opt->mmap_buf = mmap(NULL, (thd_opt->block_sz * thd_opt->block_nr), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED | MAP_POPULATE, sock_fd, 0);

    if (thd_opt->mmap_buf == MAP_FAILED) {
        perror("mmap failed");
        return EXIT_FAILURE;
    }


} else {

    thd_opt->mmap_buf = mmap(NULL, (thd_opt->block_sz * thd_opt->block_nr), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED | MAP_POPULATE, sock_fd, 0);

    if (thd_opt->mmap_buf == MAP_FAILED) {
        perror("mmap failed");
        return EXIT_FAILURE;
    }

    // Per bock rings in Rx mode (TPACKET_V3)
    thd_opt->rd = (struct iovec*)calloc(thd_opt->tpacket_req3.tp_block_nr * sizeof(struct iovec), 1);

    for (uint16_t i = 0; i < thd_opt->tpacket_req3.tp_block_nr; ++i) {
        thd_opt->rd[i].iov_base = thd_opt->mmap_buf + (i * thd_opt->tpacket_req3.tp_block_size);
        thd_opt->rd[i].iov_len  = thd_opt->tpacket_req3.tp_block_size;
    }


}
//设置TPACKET版本,v2用于发送,v3用于接收
//(v2支持数据包级发送(),v3支持数据块级读取())
int32_t sock_pkt_ver=-1;
如果(thd_opt->sk_mode==SKT_TX){
静态常数int32\u t sock\u ver=TPACKET\u V2;
sock_pkt_ver=setsockopt(sock_fd、SOL_PACKET、PACKET_VERSION和sock_ver、sizeof(sock_ver));
}否则{
静态常数int32\u t sock\u ver=TPACKET\u V3;
sock_pkt_ver=setsockopt(sock_fd、SOL_PACKET、PACKET_VERSION和sock_ver、sizeof(sock_ver));
}
如果(sock_pkt_ver<0){
perror(“无法设置套接字数据包版本”);
返回退出失败;
}
memset(&thd_opt->tpacket_req,0,sizeof(struct tpacket_req));
memset(&thd_opt->tPackage_req3,0,sizeof(struct tPackage_req3));
//thd_opt->block_sz=4096;//这些设置在其他地方
//thd_opt->block_nr=256;
//thd_opt->block_frame_sz=4096;
int32_t sock_mmap_环=-1;
如果(thd_opt->sk_mode==SKT_TX){
thd_opt->tPackage_req.tp_block_size=thd_opt->block_sz;
thd_opt->tPackage_req.tp_frame_size=thd_opt->block_sz;
thd_opt->tPackage_req.tp_block_nr=thd_opt->block_nr;
//在发送模式下分配每帧块(TPACKET_V2)
thd_opt->tPackage_req.tp_frame_nr=thd_opt->block_nr;
sock_mmap_ring=setsockopt(sock_fd,SOL_PACKET,PACKET_TX_ring,(void*)&thd_opt->tpacket_req,sizeof(struct tpacket_req));
}否则{
thd_opt->tPackage_req3.tp_block_size=thd_opt->block_sz;
thd_opt->tPackage_req3.tp_frame_size=thd_opt->block_frame_sz;
thd_opt->tPackage_req3.tp_block_nr=thd_opt->block_nr;
thd_opt->tPackage_req3.tp_frame_nr=(thd_opt->block_sz*thd_opt->block_nr)/thd_opt->block_frame_sz;
thd_opt->tPackage_req3.tp_retire_blk_tov=1;
thd_opt->tPackage_req3.tp_feature_req_word=0;
sock_mmap_ring=setsockopt(sock_fd,SOL_PACKET,PACKET_RX_ring,(void*)和thd_opt->tpacket_req3,大小(thd_opt->tpacket_req3));
}
如果(承插环==-1){
perror(“无法为插座启用Tx/Rx环”);
返回退出失败;
}
thd_opt->mmap_buf=NULL;
thd_opt->rd=NULL;
如果(thd_opt->sk_mode==SKT_TX){
thd_opt->mmap_buf=mmap(NULL,(thd_opt->block_sz*thd_opt->block_nr),保护读取,保护写入,映射共享,映射锁定,映射填充,sock_fd,0);
如果(thd_opt->mmap_buf==映射失败){
perror(“mmap失败”);
返回退出失败;
}
}否则{
thd_opt->mmap_buf=mmap(NULL,(thd_opt->block_sz*thd_opt->block_nr),保护读取,保护写入,映射共享,映射锁定,映射填充,sock_fd,0);
如果(thd_opt->mmap_buf==映射失败){
perror(“mmap失败”);
返回退出失败;
}
//Rx模式下的每个bock环(TPACKET_V3)
thd_opt->rd=(struct iovec*)calloc(thd_opt->tpacket_req3.tp_block_nr*sizeof(struct iovec),1);
对于(uint16\u t i=0;itPackage\u req3.tp\u block\n r;++i){
thd_opt->rd[i]。iov_base=thd_opt->mmap_buf+(i*thd_opt->tPackage_req3.tp_块大小);
thd_opt->rd[i]。iov_len=thd_opt->tpacket_req3.tp_block_size;
}
}
更新1:针对物理接口的结果 有人提到,在使用PACKET_MMAP时,我可能看不到性能差异的一个原因是,我向环回接口发送流量(首先,环回接口没有QDISC)。自从运行ei以来
void *packet_tx_mmap(void* thd_opt_p) {

    struct thd_opt *thd_opt = thd_opt_p;
    int32_t sock_fd = setup_socket_mmap(thd_opt_p);
    if (sock_fd == EXIT_FAILURE) exit(EXIT_FAILURE);

    struct tpacket2_hdr *hdr;
    uint8_t *data;
    int32_t send_ret = 0;
    uint16_t i;

    while(1) {

        for (i = 0; i < thd_opt->tpacket_req.tp_frame_nr; i += 1) {

            hdr = (void*)(thd_opt->mmap_buf + (thd_opt->tpacket_req.tp_frame_size * i));
            data = (uint8_t*)(hdr + TPACKET_ALIGN(TPACKET2_HDRLEN));

            memcpy(data, thd_opt->tx_buffer, thd_opt->frame_size);
            hdr->tp_len = thd_opt->frame_size;
            hdr->tp_status = TP_STATUS_SEND_REQUEST;

        }

        send_ret = sendto(sock_fd, NULL, 0, 0, NULL, 0);
        if (send_ret == -1) {
            perror("sendto error");
            exit(EXIT_FAILURE);
        }

        thd_opt->tx_pkts  += thd_opt->tpacket_req.tp_frame_nr;
        thd_opt->tx_bytes += send_ret;

    }

    return NULL;

}
void *packet_tx(void* thd_opt_p) {

    struct thd_opt *thd_opt = thd_opt_p;

    int32_t sock_fd = setup_socket(thd_opt_p); 

    if (sock_fd == EXIT_FAILURE) {
        printf("Can't create socket!\n");
        exit(EXIT_FAILURE);
    }

    while(1) {

        thd_opt->tx_bytes += sendto(sock_fd, thd_opt->tx_buffer,
                                    thd_opt->frame_size, 0,
                                    (struct sockaddr*)&thd_opt->bind_addr,
                                    sizeof(thd_opt->bind_addr));
        thd_opt->tx_pkts += 1;

    }

}
// Set the TPACKET version, v2 for Tx and v3 for Rx
// (v2 supports packet level send(), v3 supports block level read())
int32_t sock_pkt_ver = -1;

if(thd_opt->sk_mode == SKT_TX) {
    static const int32_t sock_ver = TPACKET_V2;
    sock_pkt_ver = setsockopt(sock_fd, SOL_PACKET, PACKET_VERSION, &sock_ver, sizeof(sock_ver));
} else {
    static const int32_t sock_ver = TPACKET_V3;
    sock_pkt_ver = setsockopt(sock_fd, SOL_PACKET, PACKET_VERSION, &sock_ver, sizeof(sock_ver));
}

if (sock_pkt_ver < 0) {
    perror("Can't set socket packet version");
    return EXIT_FAILURE;
}


memset(&thd_opt->tpacket_req, 0, sizeof(struct tpacket_req));
memset(&thd_opt->tpacket_req3, 0, sizeof(struct tpacket_req3));

//thd_opt->block_sz = 4096; // These are set else where
//thd_opt->block_nr = 256;
//thd_opt->block_frame_sz = 4096;

int32_t sock_mmap_ring = -1;
if (thd_opt->sk_mode == SKT_TX) {

    thd_opt->tpacket_req.tp_block_size = thd_opt->block_sz;
    thd_opt->tpacket_req.tp_frame_size = thd_opt->block_sz;
    thd_opt->tpacket_req.tp_block_nr = thd_opt->block_nr;
    // Allocate per-frame blocks in Tx mode (TPACKET_V2)
    thd_opt->tpacket_req.tp_frame_nr = thd_opt->block_nr;

    sock_mmap_ring = setsockopt(sock_fd, SOL_PACKET , PACKET_TX_RING , (void*)&thd_opt->tpacket_req , sizeof(struct tpacket_req));

} else {

    thd_opt->tpacket_req3.tp_block_size = thd_opt->block_sz;
    thd_opt->tpacket_req3.tp_frame_size = thd_opt->block_frame_sz;
    thd_opt->tpacket_req3.tp_block_nr = thd_opt->block_nr;
    thd_opt->tpacket_req3.tp_frame_nr = (thd_opt->block_sz * thd_opt->block_nr) / thd_opt->block_frame_sz;
    thd_opt->tpacket_req3.tp_retire_blk_tov   = 1;
    thd_opt->tpacket_req3.tp_feature_req_word = 0;

    sock_mmap_ring = setsockopt(sock_fd, SOL_PACKET , PACKET_RX_RING , (void*)&thd_opt->tpacket_req3 , sizeof(thd_opt->tpacket_req3));
}

if (sock_mmap_ring == -1) {
    perror("Can't enable Tx/Rx ring for socket");
    return EXIT_FAILURE;
}


thd_opt->mmap_buf = NULL;
thd_opt->rd = NULL;

if (thd_opt->sk_mode == SKT_TX) {

    thd_opt->mmap_buf = mmap(NULL, (thd_opt->block_sz * thd_opt->block_nr), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED | MAP_POPULATE, sock_fd, 0);

    if (thd_opt->mmap_buf == MAP_FAILED) {
        perror("mmap failed");
        return EXIT_FAILURE;
    }


} else {

    thd_opt->mmap_buf = mmap(NULL, (thd_opt->block_sz * thd_opt->block_nr), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED | MAP_POPULATE, sock_fd, 0);

    if (thd_opt->mmap_buf == MAP_FAILED) {
        perror("mmap failed");
        return EXIT_FAILURE;
    }

    // Per bock rings in Rx mode (TPACKET_V3)
    thd_opt->rd = (struct iovec*)calloc(thd_opt->tpacket_req3.tp_block_nr * sizeof(struct iovec), 1);

    for (uint16_t i = 0; i < thd_opt->tpacket_req3.tp_block_nr; ++i) {
        thd_opt->rd[i].iov_base = thd_opt->mmap_buf + (i * thd_opt->tpacket_req3.tp_block_size);
        thd_opt->rd[i].iov_len  = thd_opt->tpacket_req3.tp_block_size;
    }


}
$ sudo stap -e 'probe kernel.function("tpacket_snd") { printf("W00T!\n"); }' &
[1] 19961
$ sudo ./packet_mmap -c 1 eth0
[...]
STARTING TEST:
data offset = 32 bytes
start fill() thread
send 1 packets (+150 bytes)
end of task fill()
Loop until queue empty (0)
END (number of error:0)
W00T!
W00T!
# This is specific to net/packet/af_packet.c 3.13.0-116

function print_ts() {
  ts = gettimeofday_us();
  printf("[%10d.%06d] ", ts/1000000, ts%1000000);
}

#  325 static void __packet_set_status(struct packet_sock *po, void *frame, int status)
#  326 {
#  327  union tpacket_uhdr h;
#  328 
#  329  h.raw = frame;
#  330  switch (po->tp_version) {
#  331  case TPACKET_V1:
#  332      h.h1->tp_status = status;
#  333      flush_dcache_page(pgv_to_page(&h.h1->tp_status));
#  334      break;
#  335  case TPACKET_V2:
#  336      h.h2->tp_status = status;
#  337      flush_dcache_page(pgv_to_page(&h.h2->tp_status));
#  338      break;
#  339  case TPACKET_V3:
#  340  default:
#  341      WARN(1, "TPACKET version not supported.\n");
#  342      BUG();
#  343  }
#  344 
#  345  smp_wmb();
#  346 }

probe kernel.statement("__packet_set_status@net/packet/af_packet.c:334") {
  print_ts();
  printf("SET(V1): %d (0x%.16x)\n", $status, $frame);
}

probe kernel.statement("__packet_set_status@net/packet/af_packet.c:338") {
  print_ts();
  printf("SET(V2): %d\n", $status);
}

#  348 static int __packet_get_status(struct packet_sock *po, void *frame)
#  349 {
#  350  union tpacket_uhdr h;
#  351 
#  352  smp_rmb();
#  353 
#  354  h.raw = frame;
#  355  switch (po->tp_version) {
#  356  case TPACKET_V1:
#  357      flush_dcache_page(pgv_to_page(&h.h1->tp_status));
#  358      return h.h1->tp_status;
#  359  case TPACKET_V2:
#  360      flush_dcache_page(pgv_to_page(&h.h2->tp_status));
#  361      return h.h2->tp_status;
#  362  case TPACKET_V3:
#  363  default:
#  364      WARN(1, "TPACKET version not supported.\n");
#  365      BUG();
#  366      return 0;
#  367  }
#  368 }

probe kernel.statement("__packet_get_status@net/packet/af_packet.c:358") { 
  print_ts();
  printf("GET(V1): %d (0x%.16x)\n", $h->h1->tp_status, $frame); 
}

probe kernel.statement("__packet_get_status@net/packet/af_packet.c:361") { 
  print_ts();
  printf("GET(V2): %d\n", $h->h2->tp_status); 
}

# 2088 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
# 2089 {
# [...]
# 2136  do {
# 2137      ph = packet_current_frame(po, &po->tx_ring,
# 2138              TP_STATUS_SEND_REQUEST);
# 2139 
# 2140      if (unlikely(ph == NULL)) {
# 2141          schedule();
# 2142          continue;
# 2143      }
# 2144 
# 2145      status = TP_STATUS_SEND_REQUEST;
# 2146      hlen = LL_RESERVED_SPACE(dev);
# 2147      tlen = dev->needed_tailroom;
# 2148      skb = sock_alloc_send_skb(&po->sk,
# 2149              hlen + tlen + sizeof(struct sockaddr_ll),
# 2150              0, &err);
# 2151 
# 2152      if (unlikely(skb == NULL))
# 2153          goto out_status;
# 2154 
# 2155      tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
# 2156                    addr, hlen);
# [...]
# 2176      skb->destructor = tpacket_destruct_skb;
# 2177      __packet_set_status(po, ph, TP_STATUS_SENDING);
# 2178      atomic_inc(&po->tx_ring.pending);
# 2179 
# 2180      status = TP_STATUS_SEND_REQUEST;
# 2181      err = dev_queue_xmit(skb);
# 2182      if (unlikely(err > 0)) {
# [...]
# 2195      }
# 2196      packet_increment_head(&po->tx_ring);
# 2197      len_sum += tp_len;
# 2198  } while (likely((ph != NULL) ||
# 2199          ((!(msg->msg_flags & MSG_DONTWAIT)) &&
# 2200           (atomic_read(&po->tx_ring.pending))))
# 2201      );
# 2202 
# [...]
# 2213  return err;
# 2214 }

probe kernel.function("tpacket_snd") {
  print_ts();
  printf("tpacket_snd: args(%s)\n", $$parms);
}

probe kernel.statement("tpacket_snd@net/packet/af_packet.c:2140") {
  print_ts();
  printf("tpacket_snd:2140: current frame ph = 0x%.16x\n", $ph);
}

probe kernel.statement("tpacket_snd@net/packet/af_packet.c:2141") {
  print_ts();
  printf("tpacket_snd:2141: (ph==NULL) --> schedule()\n");
}

probe kernel.statement("tpacket_snd@net/packet/af_packet.c:2142") {
  print_ts();
  printf("tpacket_snd:2142: flags 0x%x, pending %d\n", 
     $msg->msg_flags, $po->tx_ring->pending->counter);
}

probe kernel.statement("tpacket_snd@net/packet/af_packet.c:2197") {
  print_ts();
  printf("tpacket_snd:2197: flags 0x%x, pending %d\n", 
     $msg->msg_flags, $po->tx_ring->pending->counter);
}

probe kernel.statement("tpacket_snd@net/packet/af_packet.c:2213") {
  print_ts();
  printf("tpacket_snd: return(%d)\n", $err);
}

# 1946 static void tpacket_destruct_skb(struct sk_buff *skb)
# 1947 {
# 1948  struct packet_sock *po = pkt_sk(skb->sk);
# 1949  void *ph;
# 1950 
# 1951  if (likely(po->tx_ring.pg_vec)) {
# 1952      __u32 ts;
# 1953 
# 1954      ph = skb_shinfo(skb)->destructor_arg;
# 1955      BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
# 1956      atomic_dec(&po->tx_ring.pending);
# 1957 
# 1958      ts = __packet_set_timestamp(po, ph, skb);
# 1959      __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
# 1960  }
# 1961 
# 1962  sock_wfree(skb);
# 1963 }

probe kernel.statement("tpacket_destruct_skb@net/packet/af_packet.c:1959") {
  print_ts();
  printf("tpacket_destruct_skb:1959: ph = 0x%.16x, ts = 0x%x, pending %d\n",
     $ph, $ts, $po->tx_ring->pending->counter);
}
[1492581245.839850] tpacket_snd: args(po=0xffff88016720ee38 msg=0x14)
[1492581245.839865] GET(V1): 1 (0xffff880241202000)
[1492581245.839873] tpacket_snd:2140: current frame ph = 0xffff880241202000
[1492581245.839887] SET(V1): 2 (0xffff880241202000)
[1492581245.839918] tpacket_snd:2197: flags 0x40, pending 1
[1492581245.839923] GET(V1): 1 (0xffff88013499c000)
[1492581245.839929] tpacket_snd:2140: current frame ph = 0xffff88013499c000
[1492581245.839935] SET(V1): 2 (0xffff88013499c000)
[1492581245.839946] tpacket_snd:2197: flags 0x40, pending 2
[1492581245.839951] GET(V1): 0 (0xffff88013499e000)
[1492581245.839957] tpacket_snd:2140: current frame ph = 0x0000000000000000
[1492581245.839961] tpacket_snd:2141: (ph==NULL) --> schedule()
[1492581245.839977] tpacket_snd:2142: flags 0x40, pending 2
[1492581245.839984] tpacket_snd: return(300)
[1492581245.840077] tpacket_snd: args(po=0x0 msg=0x14)
[1492581245.840089] GET(V1): 0 (0xffff88013499e000)
[1492581245.840098] tpacket_snd:2140: current frame ph = 0x0000000000000000
[1492581245.840093] tpacket_destruct_skb:1959: ph = 0xffff880241202000, ts = 0x0, pending 1
[1492581245.840102] tpacket_snd:2141: (ph==NULL) --> schedule()
[1492581245.840104] SET(V1): 0 (0xffff880241202000)
[1492581245.840112] tpacket_snd:2142: flags 0x40, pending 1
[1492581245.840116] tpacket_destruct_skb:1959: ph = 0xffff88013499c000, ts = 0x0, pending 0
[1492581245.840119] tpacket_snd: return(0)
[1492581245.840123] SET(V1): 0 (0xffff88013499c000)
$ sudo tc qdisc add dev eth0 root pfifo limit 50
$ tc -s -d qdisc show dev eth0
qdisc pfifo 8004: root refcnt 2 limit 50p
 Sent 42 bytes 1 pkt (dropped 0, overlimits 0 requeues 0) 
 backlog 0b 0p requeues 0 
# This is specific to net/packet/af_packet.c 3.13.0-116

function print_ts() {
  ts = gettimeofday_us();
  printf("[%10d.%06d] ", ts/1000000, ts%1000000);
}

# 2088 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
# 2089 {
# [...]
# 2213  return err;
# 2214 }

probe kernel.function("tpacket_snd") {
  print_ts();
  printf("tpacket_snd: args(%s)\n", $$parms);
}

probe kernel.statement("tpacket_snd@net/packet/af_packet.c:2213") {
  print_ts();
  printf("tpacket_snd: return(%d)\n", $err);
}
$ sudo ./packet_mmap -c 8096 -s 1500 eth0
[...]
STARTING TEST:
data offset = 32 bytes
start fill() thread
send 8096 packets (+12144000 bytes)
end of task fill()
Loop until queue empty (0)
END (number of error:0)
$ tc -s -d qdisc show dev eth0
qdisc pfifo 8004: root refcnt 2 limit 50p
 Sent 25755333 bytes 8606 pkt (dropped 1, overlimits 0 requeues 265) 
 backlog 0b 0p requeues 265 
[1492603552.938414] tpacket_snd: args(po=0xffff8801673ba338 msg=0x14)
[1492603553.036601] tpacket_snd: return(12144000)
[1492603553.036706] tpacket_snd: args(po=0x0 msg=0x14)
[1492603553.036716] tpacket_snd: return(0)
skb = sock_alloc_send_skb(&po->sk,
                 hlen + tlen + sizeof(struct sockaddr_ll),
                 0, &err);
# This is specific to net/packet/af_packet.c 3.13.0-116

function print_ts() {
  ts = gettimeofday_us();
  printf("[%10d.%06d] ", ts/1000000, ts%1000000);
}

# 2088 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
# 2089 {
# [...]
# 2133  if (size_max > dev->mtu + reserve + VLAN_HLEN)
# 2134      size_max = dev->mtu + reserve + VLAN_HLEN;
# 2135 
# 2136  do {
# [...]
# 2148      skb = sock_alloc_send_skb(&po->sk,
# 2149              hlen + tlen + sizeof(struct sockaddr_ll),
# 2150              msg->msg_flags & MSG_DONTWAIT, &err);
# 2151 
# 2152      if (unlikely(skb == NULL))
# 2153          goto out_status;
# [...]
# 2181      err = dev_queue_xmit(skb);
# 2182      if (unlikely(err > 0)) {
# 2183          err = net_xmit_errno(err);
# 2184          if (err && __packet_get_status(po, ph) ==
# 2185                 TP_STATUS_AVAILABLE) {
# 2186              /* skb was destructed already */
# 2187              skb = NULL;
# 2188              goto out_status;
# 2189          }
# 2190          /*
# 2191           * skb was dropped but not destructed yet;
# 2192           * let's treat it like congestion or err < 0
# 2193           */
# 2194          err = 0;
# 2195      }
# 2196      packet_increment_head(&po->tx_ring);
# 2197      len_sum += tp_len;
# 2198  } while (likely((ph != NULL) ||
# 2199          ((!(msg->msg_flags & MSG_DONTWAIT)) &&
# 2200           (atomic_read(&po->tx_ring.pending))))
# 2201      );
# [...]
# 2213  return err;
# 2214 }

probe kernel.function("tpacket_snd") {
  print_ts();
  printf("tpacket_snd: args(%s)\n", $$parms);
}

probe kernel.statement("tpacket_snd@net/packet/af_packet.c:2133") {
  print_ts();
  printf("tpacket_snd:2133: sk_sndbuf =  %d sk_wmem_alloc = %d\n", 
     $po->sk->sk_sndbuf, $po->sk->sk_wmem_alloc->counter);
}

probe kernel.statement("tpacket_snd@net/packet/af_packet.c:2153") {
  print_ts();
  printf("tpacket_snd:2153: sock_alloc_send_skb err = %d, sk_sndbuf =  %d sk_wmem_alloc = %d\n", 
     $err, $po->sk->sk_sndbuf, $po->sk->sk_wmem_alloc->counter);
}

probe kernel.statement("tpacket_snd@net/packet/af_packet.c:2182") {
  if ($err != 0) {
    print_ts();
    printf("tpacket_snd:2182: dev_queue_xmit err = %d\n", $err);
  }
}

probe kernel.statement("tpacket_snd@net/packet/af_packet.c:2187") {
  print_ts();
  printf("tpacket_snd:2187: destructed: net_xmit_errno = %d\n", $err);
}

probe kernel.statement("tpacket_snd@net/packet/af_packet.c:2194") {
  print_ts();
  printf("tpacket_snd:2194: *NOT* destructed: net_xmit_errno = %d\n", $err);
}

probe kernel.statement("tpacket_snd@net/packet/af_packet.c:2213") {
  print_ts();
  printf("tpacket_snd: return(%d) sk_sndbuf =  %d sk_wmem_alloc = %d\n", 
     $err, $po->sk->sk_sndbuf, $po->sk->sk_wmem_alloc->counter);
}
$ sudo tc qdisc add dev eth0 root pfifo limit 50
$ tc -s -d qdisc show dev eth0
qdisc pfifo 8001: root refcnt 2 limit 50p
 Sent 2154 bytes 21 pkt (dropped 0, overlimits 0 requeues 0) 
 backlog 0b 0p requeues 0 
$ sudo ./packet_mmap -c 200 -s 1500 eth0
[...]
c_sndbuf_sz:       1228800
[...]
STARTING TEST:
data offset = 32 bytes
send buff size = 1228800
got buff size = 425984
buff size smaller than desired, trying to force...
got buff size = 2457600
start fill() thread
send: No buffer space available
end of task fill()
send: No buffer space available
Loop until queue empty (-1)
[repeated another 17 times]
send 3 packets (+4500 bytes)
Loop until queue empty (4500)
Loop until queue empty (0)
END (number of error:0)
$  tc -s -d qdisc show dev eth0
qdisc pfifo 8001: root refcnt 2 limit 50p
 Sent 452850 bytes 335 pkt (dropped 19, overlimits 0 requeues 3) 
 backlog 0b 0p requeues 3 
[1492759330.907151] tpacket_snd: args(po=0xffff880393246c38 msg=0x14)
[1492759330.907162] tpacket_snd:2133: sk_sndbuf =  2457600 sk_wmem_alloc = 1
[1492759330.907491] tpacket_snd:2182: dev_queue_xmit err = 1
[1492759330.907494] tpacket_snd:2187: destructed: net_xmit_errno = -105
[1492759330.907500] tpacket_snd: return(-105) sk_sndbuf =  2457600 sk_wmem_alloc = 218639
[1492759330.907646] tpacket_snd: args(po=0x0 msg=0x14)
[1492759330.907653] tpacket_snd:2133: sk_sndbuf =  2457600 sk_wmem_alloc = 189337
[1492759330.907688] tpacket_snd:2182: dev_queue_xmit err = 1
[1492759330.907691] tpacket_snd:2187: destructed: net_xmit_errno = -105
[1492759330.907694] tpacket_snd: return(-105) sk_sndbuf =  2457600 sk_wmem_alloc = 189337
[repeated 17 times]
[1492759330.908541] tpacket_snd: args(po=0x0 msg=0x14)
[1492759330.908543] tpacket_snd:2133: sk_sndbuf =  2457600 sk_wmem_alloc = 189337
[1492759330.908554] tpacket_snd: return(4500) sk_sndbuf =  2457600 sk_wmem_alloc = 196099
[1492759330.908570] tpacket_snd: args(po=0x0 msg=0x14)
[1492759330.908572] tpacket_snd:2133: sk_sndbuf =  2457600 sk_wmem_alloc = 196099
[1492759330.908576] tpacket_snd: return(0) sk_sndbuf =  2457600 sk_wmem_alloc = 196099