C++ C/C++;argv内存管理

C++ C/C++;argv内存管理,c++,c,memory-management,C++,C,Memory Management,一种标准的C/C++程序格式 int main(int argc, char *argv[]){} 我想知道调用main时,argv数据在内存中是如何排列的。我从Node.js repo获得了这个函数。它的工作原理就像内存是这样排列的: argv_area|NULL|argv_data_area 操作系统真的以这种方式处理argv的内存吗 就操作系统依赖性而言,只需讨论Linux 64位您在Node.js中链接的代码实际上并不假设任何关于argv及其引用的布局。相反,您提到的布局是函数的输出

一种标准的C/C++程序格式

int main(int argc, char *argv[]){}
我想知道调用
main
时,
argv
数据在内存中是如何排列的。我从Node.js repo获得了这个函数。它的工作原理就像内存是这样排列的:

argv_area|NULL|argv_data_area
操作系统真的以这种方式处理argv的内存吗


就操作系统依赖性而言,只需讨论Linux 64位

您在Node.js中链接的代码实际上并不假设任何关于argv及其引用的布局。相反,您提到的布局是函数的输出格式。事实上,它确实像您所说的那样创建了一个数组,但它不需要像这样排列输入(如果这样做了,它可以只执行一个
memcpy()

换句话说,您描述的特殊布局对于任何程序的argv都是足够的,但不是必需的

Node为什么会这样做?就在评论中:

  // Logic to duplicate argv as Init() modifies arguments
  // that are passed into it.
  char **argv_copy = copy_argv(argc, argv);

  // This needs to run *before* V8::Initialize()
  // Use copy here as to not modify the original argv:
  Init(argc, argv_copy);

为什么会这样?因为
Init()。对我来说,这似乎有点迂回,但您发现的argv复制例程的全部目的只是为了让程序能够使用它作为argv,而不是对它们使用的确切布局有任何特定的要求。不过它是一个简单的布局,以后只需要一个
free()

原始的
argv
通常作为单个连续的
char*
值块处理,紧接着是环境的另一个
char*
值块(
int main中的
envp
)(int-argc,char**argv,char**envp)
main()
的变体,也是由
environ
指向的)。然后是参数字符串和环境字符串本身

参数列表和环境可能不是由
malloc()
本身创建的-参数和环境是通过
execve()
系统调用设置的

三年前的某个时候,我在玩“从main以外的函数中查找
argv[0]
”,并编写了如下代码。它仍然适用于Mac OS X Mavericks(10.9.4-最初测试的版本是Snow Leopard 10.6)和Ubuntu14.04。(有更好的、但特定于平台的方法来获取
argv[0]
来自一个函数,但这是一个函数,因此我不会使用此技术,但它在一些常见平台上确实有效。)

#包括“posixver.h”
#包括
#包括
#包括
#include/*putenv(),setenv()*/
外部字符**environ;/*应在中声明*/
/*
**练习的目的是:只考虑环境(因为这就是全部)
**可用于库函数)尝试查找argv[0](和
**因此,argc)。
**
**在某些平台上,内存的布局使得
**参数(argc)可用,后跟参数向量,
**其次是环境向量。
**
**argv环境
**            |                                |
**v v
**| argc | argv0 | argv1 | | | argvN | 0 | env0 | env1 | | envN | 0|
**
**这适用于:
**--Solaris 10(32位、64位SPARC)
**--MacOS X 10.6(雪豹、32位和64位)
**--Linux(x86/64、32位和64位上的RHEL 5)
**
**遗憾的是,在另外两个Unix系统上,情况并非如此
**平台。argv0前面的值似乎是0。
**--AIX 6.1(32位、64位)
**--HP-UX 11.23 IA64(32位,64位)
**次标准POSIX支持(无setenv())和C99支持(无%zd)。
**
**注意:如果调用putenv()或setenv()来添加环境变量,
**然后environ的基址发生了根本性的变化,从
**堆叠到堆上,所有下注均已取消。修改现有
**变量不是问题。
**
**通过观察是否
**environ指向的地址是大小的128k倍以上
**来自局部变量地址的指针。
**
**这段代码名义上是难以置信的特定于机器的,但实际上是
**非常便于携带。
*/
typedef结构参数
{
字符**argv;
尺寸参数;
}争论;
静态无效打印\u cpp(常量字符*标记,整数i,字符**ptr)
{
uintptr_t p=(uintptr_t)ptr;
printf(“%s[%d]=0x%PRIXPTR”(0x%PRIXPTR)(%s)\n”,
标签,i,p,(uintptr_t)(*ptr),(*ptr==0?”:*ptr));
}
枚举{MAX_DELTA=sizeof(void*)*128*1024};
静态参数find_argv0(void)
{
静态字符*dummy[]={',0};
参数args;
uintptruêt i;
字符**基=环境-1;
uintpttr_t delta=((uintpttr_t)和base>(uintpttr_t)environment)?(uintpttr_t)和base-(uintpttr_t)environment:(uintpttr_t)environment-(uintpttr_t)和base;
如果(增量<最大值_增量)
{
对于(i=2;(uintptr\u t)(*(环境-i)+2)!=i&(uintptr\u t)(*(环境-i))!=0;i++)
打印cpp(“测试”,i,环境-i);
args.argc=i-2;
args.argv=environ-i+1;
}
其他的
{
args.argc=1;
args.argv=虚拟;
}
printf(“argc=%zd\n”,args.argc);
对于(i=0;i环境)?argv-environ:environ-argv;
printf(“环境=0x%lX;argv=0x%lX(增量:0x%lX)\n”,(无符号长)环境,(无符号长)argv,增量);
对于(i=0;i(无符号长)argc+1)
返回0;
对于(i=1;i#include "posixver.h"
#include <inttypes.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>     /* putenv(), setenv() */

extern char **environ;  /* Should be declared in <unistd.h> */

/*
** The object of the exercise is: given just environ (since that is all
** that is available to a library function) attempt to find argv[0] (and
** hence argc).
**
** On some platforms, the layout of memory is such that the number of
** arguments (argc) is available, followed by the argument vector,
** followed by the environment vector.
**
**          argv                            environ
**            |                                |
**            v                                v
** | argc | argv0 | argv1 | ... | argvN | 0 | env0 | env1 | ... | envN | 0 |
**
** This applies to:
** -- Solaris 10 (32-bit, 64-bit SPARC)
** -- MacOS X 10.6 (Snow Leopard, 32-bit and 64-bit)
** -- Linux (RHEL 5 on x86/64, 32-bit and 64-bit)
**
** Sadly, this is not quite what happens on the other two Unix
** platforms.  The value preceding argv0 seems to be a 0.
** -- AIX 6.1          (32-bit, 64-bit)
** -- HP-UX 11.23 IA64 (32-bit, 64-bit)
**       Sub-standard POSIX support (no setenv()) and C99 support (no %zd).
**
** NB: If putenv() or setenv() is called to add an environment variable,
** then the base address of environ changes radically, moving off the
** stack onto heap, and all bets are off.  Modifying an existing
** variable is not a problem.
**
** Spotting the change from stack to heap is done by observing whether
** the address pointed to by environ is more than 128 K times the size
** of a pointer from the address of a local variable.
**
** This code is nominally incredibly machine-specific - but actually
** works remarkably portably.
*/

typedef struct Arguments
{
    char   **argv;
    size_t   argc;
} Arguments;

static void print_cpp(const char *tag, int i, char **ptr)
{
    uintptr_t p = (uintptr_t)ptr;
    printf("%s[%d] = 0x%" PRIXPTR " (0x%" PRIXPTR ") (%s)\n",
            tag, i, p, (uintptr_t)(*ptr), (*ptr == 0 ? "<null>" : *ptr));
}

enum { MAX_DELTA = sizeof(void *) * 128 * 1024 };

static Arguments find_argv0(void)
{
    static char *dummy[] = { "<unknown>", 0 };
    Arguments args;
    uintptr_t i;
    char **base = environ - 1;
    uintptr_t delta = ((uintptr_t)&base > (uintptr_t)environ) ? (uintptr_t)&base - (uintptr_t)environ : (uintptr_t)environ - (uintptr_t)&base;
    if (delta < MAX_DELTA)
    {
        for (i = 2; (uintptr_t)(*(environ - i) + 2) != i && (uintptr_t)(*(environ - i)) != 0; i++)
            print_cpp("test", i, environ-i);
        args.argc = i - 2;
        args.argv = environ - i + 1;
    }
    else
    {
        args.argc = 1;
        args.argv = dummy;
    }

    printf("argc    = %zd\n", args.argc);
    for (i = 0; i <= args.argc; i++)
        print_cpp("argv", i, &args.argv[i]);

    return args;
}

static void print_arguments(void)
{
    Arguments args = find_argv0();
    printf("Command name and arguments\n");
    printf("argc    = %zd\n", args.argc);
    for (size_t i = 0; i <= args.argc; i++)
        printf("argv[%zd] = %s\n", i, (args.argv[i] ? args.argv[i] : "<null>"));
}

static int check_environ(int argc, char **argv)
{
    size_t n = argc;
    size_t i;
    unsigned long delta = (argv > environ) ? argv - environ : environ - argv;
    printf("environ = 0x%lX; argv = 0x%lX (delta: 0x%lX)\n", (unsigned long)environ, (unsigned long)argv, delta);
    for (i = 0; i <= n; i++)
        print_cpp("chkv", i, &argv[i]);
    if (delta > (unsigned long)argc + 1)
        return 0;

    for (i = 1; i < n + 2; i++)
    {
        printf("chkr[%zd] = 0x%lX (0x%lX) (%s)\n", i, (unsigned long)(environ - i), (unsigned long)(*(environ - i)),
                (*(environ-i) ? *(environ-i) : "<null>"));
        fflush(0);
    }
    i = n + 2;
    printf("chkF[%zd] = 0x%lX (0x%lX)\n", i, (unsigned long)(environ - i), (unsigned long)(*(environ - i)));
    i = n + 3;
    printf("chkF[%zd] = 0x%lX (0x%lX)\n", i, (unsigned long)(environ - i), (unsigned long)(*(environ - i)));
    return 1;
}

int main(int argc, char **argv)
{
    printf("Before setting environment\n");
    if (check_environ(argc, argv))
        print_arguments();

    //putenv("TZ=US/Pacific");
    setenv("SHELL", "/bin/csh", 1);

    printf("After modifying environment\n");
    if (check_environ(argc, argv) == 0)
        printf("Modifying environment messed everything up\n");
    print_arguments();

    putenv("CODSWALLOP=nonsense");

    printf("After adding to environment\n");
    if (check_environ(argc, argv) == 0)
        printf("Adding environment messed everything up\n");
    print_arguments();

    return 0;
}
Before setting environment
environ = 0x7FFF584D04C8; argv = 0x7FFF584D0498 (delta: 0x6)
chkv[0] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
chkv[1] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
chkv[2] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
chkv[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
chkv[4] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
chkv[5] = 0x7FFF584D04C0 (0x0) (<null>)
chkr[1] = 0x7FFF584D04C0 (0x0) (<null>)
chkr[2] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
chkr[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
chkr[4] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
chkr[5] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
chkr[6] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
chkF[7] = 0x7FFF584D0490 (0x5)
chkF[8] = 0x7FFF584D0488 (0x0)
test[2] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
test[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
test[4] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
test[5] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
test[6] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
argc    = 5
argv[0] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
argv[1] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
argv[2] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
argv[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
argv[4] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
argv[5] = 0x7FFF584D04C0 (0x0) (<null>)
Command name and arguments
argc    = 5
argv[0] = ./find_argv0
argv[1] = macedonian
argv[2] = obelisk
argv[3] = mental breakdown
argv[4] = testing: 1, 2, 3
argv[5] = <null>
After modifying environment
environ = 0x7FFF584D04C8; argv = 0x7FFF584D0498 (delta: 0x6)
chkv[0] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
chkv[1] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
chkv[2] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
chkv[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
chkv[4] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
chkv[5] = 0x7FFF584D04C0 (0x0) (<null>)
chkr[1] = 0x7FFF584D04C0 (0x0) (<null>)
chkr[2] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
chkr[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
chkr[4] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
chkr[5] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
chkr[6] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
chkF[7] = 0x7FFF584D0490 (0x5)
chkF[8] = 0x7FFF584D0488 (0x0)
test[2] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
test[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
test[4] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
test[5] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
test[6] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
argc    = 5
argv[0] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
argv[1] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
argv[2] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
argv[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
argv[4] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
argv[5] = 0x7FFF584D04C0 (0x0) (<null>)
Command name and arguments
argc    = 5
argv[0] = ./find_argv0
argv[1] = macedonian
argv[2] = obelisk
argv[3] = mental breakdown
argv[4] = testing: 1, 2, 3
argv[5] = <null>
After adding to environment
environ = 0x7FB1EA403B60; argv = 0x7FFF584D0498 (delta: 0x9ADC19927)
chkv[0] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
chkv[1] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
chkv[2] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
chkv[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
chkv[4] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
chkv[5] = 0x7FFF584D04C0 (0x0) (<null>)
Adding environment messed everything up
argc    = 1
argv[0] = 0x107730040 (0x10772FEC0) (<unknown>)
argv[1] = 0x107730048 (0x0) (<null>)
Command name and arguments
argc    = 1
argv[0] = <unknown>
argv[1] = <null>