Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/77.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/8/http/4.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 将NULL分配给数据帧的列而不是子集是一个好主意吗?_R_Dataframe_Null_Subset - Fatal编程技术网

R 将NULL分配给数据帧的列而不是子集是一个好主意吗?

R 将NULL分配给数据帧的列而不是子集是一个好主意吗?,r,dataframe,null,subset,R,Dataframe,Null,Subset,让我们假设一个havedf: df <- data.frame(A = 1 : 3, B = 2 : 4, C = 3 : 5, D = 4 : 6) 但是,我今天了解到以下代码也可以工作: df$A = NULL 这让我想问一个问题: 将NULL分配给数据帧的列而不是子集是一个好主意吗 除了子集返回一个新对象之外,这两者之间的隐含差异(例如语义、性能)是什么?我试图用traceem、address和mem\u change来探索它 不同的方法: #subset my_df <-

让我们假设一个have
df

df <- data.frame(A = 1 : 3, B = 2 : 4, C = 3 : 5, D = 4 : 6)
但是,我今天了解到以下代码也可以工作:

df$A = NULL
这让我想问一个问题:

NULL
分配给数据帧的列而不是子集是一个好主意吗


除了
子集
返回一个新对象之外,这两者之间的隐含差异(例如语义、性能)是什么?

我试图用
traceem
address
mem\u change
来探索它

不同的方法:

#subset
my_df <- subset(my_df, select = -A)
具有[]的子集也具有不同的最终地址

完整代码:

method_name
<memory address from tracemem >
<address of df>
(Possibly tracemem results if object is copied)
memory change when column is deleted
<address of df after column deleted>
.create_data <- function() {
  suppressWarnings(my_df <-
                     data.frame(matrix(rnorm(1000000),
                                       ncol = length(LETTERS))))
  colnames(my_df) <- copy(LETTERS)
  my_df
}

library(pryr)
library(data.table)

  ##### subset
  message("subset")
  my_df  <- .create_data()

  tracemem(my_df)
  address(my_df)

  mem_change(my_df <- subset(my_df, select = -A))

  address(my_df)
  untracemem(my_df)
  rm(my_df)
  invisible(gc())

  ##### <- NULL
  message("<- NULL")
  my_df <- .create_data()

  tracemem(my_df)
  address(my_df)

  mem_change(my_df$A <-  NULL)

  address(my_df)
  untracemem(my_df)
  rm(my_df)
  invisible(gc())

  ##### set from data.table
  message("set from data.table")
  my_df <- .create_data()

  tracemem(my_df)
  address(my_df)

  mem_change(set(my_df, j = "A", value = NULL))

  address(my_df)
  untracemem(my_df)
  rm(my_df)
  invisible(gc())

  ##### subset with []
  message("subset with []")
  my_df <- .create_data()

  tracemem(my_df)
  address(my_df)

  mem_change(my_df <- my_df[, colnames(my_df)[-1]])

  address(my_df)
  untracemem(my_df)
  rm(my_df)
  invisible(gc())

.create_data我试图用
tracemem
address
mem_change
来探索它

不同的方法:

#subset
my_df <- subset(my_df, select = -A)
具有[]的子集也具有不同的最终地址

完整代码:

method_name
<memory address from tracemem >
<address of df>
(Possibly tracemem results if object is copied)
memory change when column is deleted
<address of df after column deleted>
.create_data <- function() {
  suppressWarnings(my_df <-
                     data.frame(matrix(rnorm(1000000),
                                       ncol = length(LETTERS))))
  colnames(my_df) <- copy(LETTERS)
  my_df
}

library(pryr)
library(data.table)

  ##### subset
  message("subset")
  my_df  <- .create_data()

  tracemem(my_df)
  address(my_df)

  mem_change(my_df <- subset(my_df, select = -A))

  address(my_df)
  untracemem(my_df)
  rm(my_df)
  invisible(gc())

  ##### <- NULL
  message("<- NULL")
  my_df <- .create_data()

  tracemem(my_df)
  address(my_df)

  mem_change(my_df$A <-  NULL)

  address(my_df)
  untracemem(my_df)
  rm(my_df)
  invisible(gc())

  ##### set from data.table
  message("set from data.table")
  my_df <- .create_data()

  tracemem(my_df)
  address(my_df)

  mem_change(set(my_df, j = "A", value = NULL))

  address(my_df)
  untracemem(my_df)
  rm(my_df)
  invisible(gc())

  ##### subset with []
  message("subset with []")
  my_df <- .create_data()

  tracemem(my_df)
  address(my_df)

  mem_change(my_df <- my_df[, colnames(my_df)[-1]])

  address(my_df)
  untracemem(my_df)
  rm(my_df)
  invisible(gc())

.create_data With subsetting,您将获得另一个data.frame,而原始结果保持不变(当然,在您的情况下,您将新结果指定给相同的名称,因此原始结果将丢失)。通过赋值,您修改了data.frame(这在内部并不十分准确,但从用户的角度来看),并且丢失了列。因此,如果您想要另一个data.frame,同时保留原始数据,则可以使用子集。如果要修改data.frame,请删除该列。通过子集设置,您将获得另一个data.frame,而原始结果保持不变(当然,在您的情况下,您将新结果指定给相同的名称,因此原始结果将丢失)。通过赋值,您修改了data.frame(这在内部并不十分准确,但从用户的角度来看),并且丢失了列。因此,如果您想要另一个data.frame,同时保留原始数据,则可以使用子集。如果要修改data.frame,请删除该列。
subset
[1] "<0x7f92c1504610>"
[1] "0x7f92c1504610"
-178 kB
[1] "0x7f92c1503a10"
<- NULL
[1] "<0x7f92c17b80e0>"
[1] "0x7f92c17b80e0"
tracemem[0x7f92c17b80e0 -> 0x7f92c1719a90]: eval eval mem_change 
tracemem[0x7f92c1719a90 -> 0x7f92c1746400]: $<-.data.frame $<- eval eval mem_change 
tracemem[0x7f92c1746400 -> 0x7f92c17006c0]: $<-.data.frame $<- eval eval mem_change 
-290 kB
[1] "0x7f92c17312e0"
set from data.table
[1] "<0x7f92c16227c0>"
[1] "0x7f92c16227c0"
-303 kB
[1] "0x7f92c16227c0"
subset with []
[1] "<0x7f92c165cfa0>"
[1] "0x7f92c165cfa0"
-300 kB
[1] "0x7f92c161e950"
.create_data <- function() {
  suppressWarnings(my_df <-
                     data.frame(matrix(rnorm(1000000),
                                       ncol = length(LETTERS))))
  colnames(my_df) <- copy(LETTERS)
  my_df
}

library(pryr)
library(data.table)

  ##### subset
  message("subset")
  my_df  <- .create_data()

  tracemem(my_df)
  address(my_df)

  mem_change(my_df <- subset(my_df, select = -A))

  address(my_df)
  untracemem(my_df)
  rm(my_df)
  invisible(gc())

  ##### <- NULL
  message("<- NULL")
  my_df <- .create_data()

  tracemem(my_df)
  address(my_df)

  mem_change(my_df$A <-  NULL)

  address(my_df)
  untracemem(my_df)
  rm(my_df)
  invisible(gc())

  ##### set from data.table
  message("set from data.table")
  my_df <- .create_data()

  tracemem(my_df)
  address(my_df)

  mem_change(set(my_df, j = "A", value = NULL))

  address(my_df)
  untracemem(my_df)
  rm(my_df)
  invisible(gc())

  ##### subset with []
  message("subset with []")
  my_df <- .create_data()

  tracemem(my_df)
  address(my_df)

  mem_change(my_df <- my_df[, colnames(my_df)[-1]])

  address(my_df)
  untracemem(my_df)
  rm(my_df)
  invisible(gc())