C++ 大规模套接字操作的分段故障
这个问题困扰了我好几个星期,我在网上找不到任何解决办法。所以我必须向各位大师提出一个新问题 我试图读/写大量的套接字,请参阅下面的测试代码。当插座数量低于1500时,其表现正常。当套接字数超过1500时,程序将意外崩溃。我知道我应该使用命令C++ 大规模套接字操作的分段故障,c++,sockets,C++,Sockets,这个问题困扰了我好几个星期,我在网上找不到任何解决办法。所以我必须向各位大师提出一个新问题 我试图读/写大量的套接字,请参阅下面的测试代码。当插座数量低于1500时,其表现正常。当套接字数超过1500时,程序将意外崩溃。我知道我应该使用命令ulimit-n32768来增加打开文件的数量限制。但是程序仍然不能正确运行 #include <unistd.h> #include <stdio.h> #include <stdlib.h> #include <s
ulimit-n32768
来增加打开文件的数量限制。但是程序仍然不能正确运行
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <stdint.h>
#include <netdb.h>
#include <errno.h>
#include <malloc.h>
#include <string.h>
int main(int argc, char* argv[])
{
if (argc!=2)
{
printf("usage: test <number of sockets>\n");
return -1;
}
int socketsNum=atoi(argv[1]);
if (socketsNum<=0)
{
printf("error: invalid sockets number\n");
return -1;
}
int *socketHandles=(int*)malloc(sizeof(int)*socketsNum);
if (socketHandles==NULL)
{
printf("error: failed to alloc socket handle memory\n");
return -1;
}
for (int i=0;i<socketsNum;i++)
{
socketHandles[i]=-1;
}
printf("creating %d sockets ...\n",socketsNum);
int createdSocketsNum=0;
for (int i=0;i<socketsNum;i++)
{
int socketHandle=socket(AF_INET,SOCK_DGRAM,IPPROTO_UDP);
if (socketHandle==-1)
{
int lastError=errno;
printf("warning: socket() failed: index: %d, error: %d\n",i+1,lastError);
continue;
}
sockaddr_in sockAddr; // 0.0.0.0:0
memset(&sockAddr,0,sizeof(sockAddr));
sockAddr.sin_family = AF_INET;
sockAddr.sin_addr.s_addr = htonl(INADDR_ANY);
sockAddr.sin_port = htons(0);
if (bind( socketHandle, (sockaddr*) &sockAddr, sizeof(sockAddr)) == -1)
{
int lastError=errno;
printf("warning: bind() failed: index: %d, error: %d\n",i+1,lastError);
close(socketHandle);
continue;
}
socketHandles[i]=socketHandle;
createdSocketsNum++;
}
printf("created %d sockets.\n",createdSocketsNum);
//test reading;
printf("testing reading ...\n");
int readableNumber=0;
int unreadableNumber=0;
int readingSkippedNumber=0;
for (int i=0;i<socketsNum;i++)
{
int socketHandle=socketHandles[i];
if (socketHandle==-1)
{
readingSkippedNumber++;
continue;
}
fd_set rset;
FD_ZERO(&rset);
FD_SET(socketHandle, &rset);
struct timeval timeout = {0, 0};
int retCode=select(socketHandle + 1, &rset, NULL, NULL, &timeout);
if (retCode==-1)
{
int lastError=errno;
printf("warning: select() failed: index: %d, error: %d\n",i+1,lastError);
}
else if (retCode==0)
{
unreadableNumber++;
}
else
{
readableNumber++;
}
}
printf("readable: %d, unreadable: %d, skipped: %d, total: %d\n",readableNumber,unreadableNumber,readingSkippedNumber,socketsNum);
//test writing
printf("testing writing ...\n");
int writableNumber=0;
int unwritableNumber=0;
int writingSkippedNumber=0;
for (int i=0;i<socketsNum;i++)
{
int socketHandle=socketHandles[i];
if (socketHandle==-1)
{
writingSkippedNumber++;
continue;
}
fd_set wset;
FD_ZERO(&wset);
FD_SET(socketHandle, &wset);
struct timeval timeout = {0, 0};
int retCode=select(socketHandle + 1, NULL, &wset, NULL, &timeout);
if (retCode==-1)
{
int lastError=errno;
printf("warning: select() failed: index: %d, error: %d\n",i+1,lastError);
}
else if (retCode==0)
{
unwritableNumber++;
}
else
{
writableNumber++;
}
}
printf("writable: %d, unwritable: %d, skipped: %d, total: %d\n",writableNumber,unwritableNumber,writingSkippedNumber,socketsNum);
printf("closing ...\n");
for (int i=0;i<socketsNum;i++)
{
int socketHandle=socketHandles[i];
if (socketHandle==-1)
{
continue;
}
close(socketHandle);
}
free(socketHandles);
printf("completed!\n");
return 0;
}
配置:
ulimit -n 32768
一些典型结果:
/TestSockets 1500
的良好结果:
creating 1500 sockets ...
created 1500 sockets.
testing reading ...
readable: 0, unreadable: 1500, skipped: 0, total: 1500
testing writing ...
writable: 1372, unwritable: 128, skipped: 0, total: 1500
closing ...
completed!
creating 1900 sockets ...
created 1900 sockets.
testing reading ...
warning: select() failed: index: 1797, error: 9
...(more lines trimmed)
warning: select() failed: index: 1820, error: 9
warning: select() failed: index: 1821, error: 22
readable: 0, unreadable: 1878, skipped: 0, total: 1900
testing writing ...
warning: select() failed: index: 1641, error: 9
...(more lines trimmed)
warning: select() failed: index: 1660, error: 9
warning: select() failed: index: 1661, error: 22
writable: 1751, unwritable: 128, skipped: 0, total: 1900
closing ...
completed!
creating 2000 sockets ...
created 2000 sockets.
testing reading ...
Segmentation fault
/TestSockets 1900
的错误结果:
creating 1500 sockets ...
created 1500 sockets.
testing reading ...
readable: 0, unreadable: 1500, skipped: 0, total: 1500
testing writing ...
writable: 1372, unwritable: 128, skipped: 0, total: 1500
closing ...
completed!
creating 1900 sockets ...
created 1900 sockets.
testing reading ...
warning: select() failed: index: 1797, error: 9
...(more lines trimmed)
warning: select() failed: index: 1820, error: 9
warning: select() failed: index: 1821, error: 22
readable: 0, unreadable: 1878, skipped: 0, total: 1900
testing writing ...
warning: select() failed: index: 1641, error: 9
...(more lines trimmed)
warning: select() failed: index: 1660, error: 9
warning: select() failed: index: 1661, error: 22
writable: 1751, unwritable: 128, skipped: 0, total: 1900
closing ...
completed!
creating 2000 sockets ...
created 2000 sockets.
testing reading ...
Segmentation fault
注释:由于1900>1751+128,堆栈似乎已损坏/TestSockets 2000
的错误结果:
creating 1500 sockets ...
created 1500 sockets.
testing reading ...
readable: 0, unreadable: 1500, skipped: 0, total: 1500
testing writing ...
writable: 1372, unwritable: 128, skipped: 0, total: 1500
closing ...
completed!
creating 1900 sockets ...
created 1900 sockets.
testing reading ...
warning: select() failed: index: 1797, error: 9
...(more lines trimmed)
warning: select() failed: index: 1820, error: 9
warning: select() failed: index: 1821, error: 22
readable: 0, unreadable: 1878, skipped: 0, total: 1900
testing writing ...
warning: select() failed: index: 1641, error: 9
...(more lines trimmed)
warning: select() failed: index: 1660, error: 9
warning: select() failed: index: 1661, error: 22
writable: 1751, unwritable: 128, skipped: 0, total: 1900
closing ...
completed!
creating 2000 sockets ...
created 2000 sockets.
testing reading ...
Segmentation fault
creating 2000 sockets ...
created 2000 sockets.
testing reading ...
Program received signal SIGSEGV, Segmentation fault.
0x08048b79 in main (argc=2, argv=0xffffd3b4) at TestSockets.cpp:78
78 int socketHandle=socketHandles[i];
(gdb) print socketHandles
$1 = (int *) 0x0
(gdb) info local
socketHandle = 0
rset = {fds_bits = {0 <repeats 32 times>}}
timeout = {tv_sec = 0, tv_usec = 0}
retCode = 0
i = 1601
socketsNum = 2000
unreadableNumber = 1601
unwritableNumber = 134514249
socketHandles = 0x0
createdSocketsNum = 2000
readableNumber = 0
readingSkippedNumber = 0
writableNumber = -136436764
writingSkippedNumber = 0
(gdb) info stack
#0 0x08048b79 in main (argc=2, argv=0xffffd3b4) at TestSockets.cpp:78
正在创建2000个套接字。。。
创建了2000个套接字。
测试阅读。。。
程序接收信号SIGSEGV,分段故障。
TestSockets处的main中的0x08048b79(argc=2,argv=0xffffd3b4)。cpp:78
78 int socketHandle=socketHandles[i];
(gdb)打印插座
$1=(整数*)0x0
(gdb)本地信息
socketHandle=0
rset={fds_位={0}
超时={tv_sec=0,tv_usec=0}
retCode=0
i=1601
socketsNum=2000
无法读取编号=1601
未包装编号=134514249
socketHandles=0x0
createdSocketsNum=2000
readableNumber=0
ReadingSkppedNumber=0
writableNumber=-136436764
WritingSkppedNumber=0
(gdb)信息堆栈
#TestSockets处的main中0 0x08048b79(argc=2,argv=0xffffd3b4)。cpp:78
一个fd_集
受文件描述符的最大值(而不是同时设置的文件描述符数量)的限制。通常是1024
因此,如果套接字值大于1023,则根本无法对其使用select
我知道的操作系统不支持重新定义
FD_SETSIZE
。您可能能够在程序中成功地重新定义fd_集
,但是select
将只适用于fd_集
,我已经解决了这个令人头痛的问题。windows和Linux上的fd_设置完全不同。在Linux上,如果套接字句柄值大于FD_SETSIZE,则Linux版本FD_SET宏将出现溢出问题。我做了一个变通办法,为Linux上的fd_设置分配足够的缓冲区。比如,
char rsetBuffer[10240];
memset(rsetBuffer,0,10240);
fd_set& rset=(fd_set&)rsetBuffer;
FD_ZERO(&rset);
FD_SET(socketHandle, &rset);
p、 s.windows和Linux上fd_集结构和fd_集宏的定义:
在windows上:
typedef struct fd_set {
u_int fd_count; /* how many are SET? */
SOCKET fd_array[FD_SETSIZE]; /* an array of SOCKETs */
} fd_set;
#define FD_SET(fd, set) do { \
u_int __i; \
for (__i = 0; __i < ((fd_set FAR *)(set))->fd_count; __i++) { \
if (((fd_set FAR *)(set))->fd_array[__i] == (fd)) { \
break; \
} \
} \
if (__i == ((fd_set FAR *)(set))->fd_count) { \
if (((fd_set FAR *)(set))->fd_count < FD_SETSIZE) { \
((fd_set FAR *)(set))->fd_array[__i] = (fd); \
((fd_set FAR *)(set))->fd_count++; \
} \
} \
} while(0)
您是否尝试在调试器中运行以捕获运行中的崩溃?这将帮助您定位它发生的位置(在代码中),并让您检查相关变量的值以确保它们正常。此外,请尝试使用内存解压缩工具,例如,它将帮助您查找缓冲区溢出等。关于调试器的使用,请尝试使用调试信息构建,在构建时添加
-g
标志。然后在崩溃发生时获取堆栈跟踪(使用bt
命令),查看崩溃发生在代码中的何处(如果调试器尚未停止)。拥有调试信息对于Valgrind能够报告所发现问题的确切位置至关重要。请编辑您的问题以添加关键信息,如堆栈跟踪或崩溃位置。我在浏览其他类似问题时,遇到了一条我认为与您直接相关的评论:“如果您获得的文件描述符的值高达FD_SETSIZE,则无法将该描述符放入FD_集中。”有点令人沮丧的是,它在select()上崩溃,内存损坏,没有任何适当的错误状态。我可以确认!在我的FreeBSD盒上,我在1500时遇到seg故障,但在2048年定义FS_SETSIZE后,我可以在2000年成功运行。对于需要使用更多套接字的应用程序,有什么解决方法?@M.M,请参阅我的答复。这是一个变通的例子。@Tony我认为你的答案不一定有效。(可能它在您的系统上对您有效,但可能对其他人或其他系统无效)这不是一个正确的解决方案。它不会崩溃,但也不会工作。我在回答中写下了原因。我是通过艰苦的方式学会的。如果你也想通过艰苦的方式学习,欢迎你。