Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/67.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
为r中的新列从另一个数据框查询数据_R_Database_Merge - Fatal编程技术网

为r中的新列从另一个数据框查询数据

为r中的新列从另一个数据框查询数据,r,database,merge,R,Database,Merge,好的,我对R中的数据帧管理和操作是新手,在查询和向数据帧添加信息以进行分析时遇到困难。我有3个数据帧 df1 df2 df3 我想向df1添加如下列:使用County与df2匹配,并添加适当的区域。然后使用ResZip与df3(zip)匹配,以获得适当的纬度和经度。我知道我需要在需要的地方更改列名,但不确定如何执行这些查询。我习惯于访问,但无法转换为R。我尝试过合并,但获得的附加信息太多。非常感谢您的帮助。除了您提供的数据之外,这将起作用。df2和df3之间没有匹配的FIP。如果您了解SQL,那

好的,我对R中的数据帧管理和操作是新手,在查询和向数据帧添加信息以进行分析时遇到困难。我有3个数据帧 df1

df2

df3


我想向df1添加如下列:使用County与df2匹配,并添加适当的区域。然后使用ResZip与df3(zip)匹配,以获得适当的纬度和经度。我知道我需要在需要的地方更改列名,但不确定如何执行这些查询。我习惯于访问,但无法转换为R。我尝试过合并,但获得的附加信息太多。非常感谢您的帮助。

除了您提供的数据之外,这将起作用。df2和df3之间没有匹配的FIP。如果您了解SQL,那么您可以进入access中的SQL视图并进行转换,然后在R中使用sqldf。接下来,如果您使用dput函数并复制该输出,人们可以更容易地帮助您

library(sqldf)        
df1 = structure(list(Year = c(2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 
2005L, 2005L, 2005L, 2005L, 2005L, 2005L), HNo = c(218050003L, 
218050003L, 218050008L, 218050008L, 218050008L, 218050008L, 218050008L, 
218050008L, 218050008L, 218050008L, 218050008L, 218050008L), 
    Month = c(10L, 10L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 
    11L), Day = c(8L, 10L, 1L, 10L, 12L, 16L, 23L, 28L, 12L, 
    18L, 22L, 11L), County = structure(c(1L, 1L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("MIDDLESEX", "NEWLONDON"
    ), class = "factor"), ST = structure(c(1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "CT", class = "factor"), 
    ResState = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L), .Label = "CT", class = "factor"), ResZIP = c(6037L, 
    6037L, 6355L, 6355L, 6355L, 6355L, 6355L, 6355L, 6355L, 6355L, 
    6355L, 6355L)), .Names = c("Year", "HNo", "Month", "Day", 
"County", "ST", "ResState", "ResZIP"), class = "data.frame", row.names = c(NA, 
-12L))

df2 =   structure(list(FID = c(590L, 591L, 593L, 594L, 642L, 647L, 651L, 
652L), County = structure(c(3L, 2L, 7L, 8L, 6L, 1L, 4L, 5L), .Label = c("Fairfield", 
"Hartford", "Litchfield", "Middlesex", "NewHaven", "NewLondon", 
"Tolland", "Windham"), class = "factor"), STATE_NAME = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Connecticut", class = "factor"), 
    STATE_FIPS = c(9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L), CNTY_FIPS = c(5L, 
    3L, 13L, 15L, 11L, 1L, 7L, 9L), FIPS = c(9005L, 9003L, 9013L, 
    9015L, 9011L, 9001L, 9007L, 9009L), AREA = c(929.3449, 742.8998, 
    411.904, 522.058, 706.352, 661.2935, 379.362, 623.3514)), .Names = c("FID", 
"County", "STATE_NAME", "STATE_FIPS", "CNTY_FIPS", "FIPS", "AREA"
), class = "data.frame", row.names = c(NA, -8L))


df3 = structure(list(zip = c(6001L, 6002L, 6006L, 6010L, 6011L, 6030L, 
6034L, 6045L, 6049L), city = structure(c(1L, 2L, 7L, 3L, 3L, 
4L, 4L, 5L, 6L), .Label = c("Avon", "Bloomfield", "Bristol", 
"Farmington", "Manchester", "Melrose", "Windsor"), class = "factor"), 
    state = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "CT", class = "factor"), 
    latitude = c(41.7897, 41.8328, 41.87964, 41.68225, 41.79178, 
    41.79178, 41.79178, 41.79178, 41.79178), longitude = c(-72.86431, 
    -72.72642, -72.73427, -72.93365, -72.71883, -72.71883, -72.71883, 
    -72.71883, -72.71883), fips = c(9003L, 9003L, 9003L, 9003L, 
    9003L, 9003L, 9003L, 9003L, 9003L)), .Names = c("zip", "city", 
"state", "latitude", "longitude", "fips"), class = "data.frame", row.names = c(NA, 
-9L))


sqldf("Select tbl1.*, tbl2.Area,tbl3.latitude,tbl3.longitude 
                    from df1 tbl1
                    inner join df2 tbl2 on upper(tbl1.County) = upper(tbl2.County)
                    inner join df3 tbl3 on tbl2.FIPS = tbl3.fips")

这将起作用,除非在您提供的数据中,df2和df3之间没有匹配的FIP。如果您了解SQL,那么您可以进入access中的SQL视图并进行转换,然后在R中使用sqldf。接下来,如果您使用dput函数并复制该输出,人们可以更容易地帮助您

library(sqldf)        
df1 = structure(list(Year = c(2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 
2005L, 2005L, 2005L, 2005L, 2005L, 2005L), HNo = c(218050003L, 
218050003L, 218050008L, 218050008L, 218050008L, 218050008L, 218050008L, 
218050008L, 218050008L, 218050008L, 218050008L, 218050008L), 
    Month = c(10L, 10L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 
    11L), Day = c(8L, 10L, 1L, 10L, 12L, 16L, 23L, 28L, 12L, 
    18L, 22L, 11L), County = structure(c(1L, 1L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("MIDDLESEX", "NEWLONDON"
    ), class = "factor"), ST = structure(c(1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "CT", class = "factor"), 
    ResState = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L), .Label = "CT", class = "factor"), ResZIP = c(6037L, 
    6037L, 6355L, 6355L, 6355L, 6355L, 6355L, 6355L, 6355L, 6355L, 
    6355L, 6355L)), .Names = c("Year", "HNo", "Month", "Day", 
"County", "ST", "ResState", "ResZIP"), class = "data.frame", row.names = c(NA, 
-12L))

df2 =   structure(list(FID = c(590L, 591L, 593L, 594L, 642L, 647L, 651L, 
652L), County = structure(c(3L, 2L, 7L, 8L, 6L, 1L, 4L, 5L), .Label = c("Fairfield", 
"Hartford", "Litchfield", "Middlesex", "NewHaven", "NewLondon", 
"Tolland", "Windham"), class = "factor"), STATE_NAME = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Connecticut", class = "factor"), 
    STATE_FIPS = c(9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L), CNTY_FIPS = c(5L, 
    3L, 13L, 15L, 11L, 1L, 7L, 9L), FIPS = c(9005L, 9003L, 9013L, 
    9015L, 9011L, 9001L, 9007L, 9009L), AREA = c(929.3449, 742.8998, 
    411.904, 522.058, 706.352, 661.2935, 379.362, 623.3514)), .Names = c("FID", 
"County", "STATE_NAME", "STATE_FIPS", "CNTY_FIPS", "FIPS", "AREA"
), class = "data.frame", row.names = c(NA, -8L))


df3 = structure(list(zip = c(6001L, 6002L, 6006L, 6010L, 6011L, 6030L, 
6034L, 6045L, 6049L), city = structure(c(1L, 2L, 7L, 3L, 3L, 
4L, 4L, 5L, 6L), .Label = c("Avon", "Bloomfield", "Bristol", 
"Farmington", "Manchester", "Melrose", "Windsor"), class = "factor"), 
    state = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "CT", class = "factor"), 
    latitude = c(41.7897, 41.8328, 41.87964, 41.68225, 41.79178, 
    41.79178, 41.79178, 41.79178, 41.79178), longitude = c(-72.86431, 
    -72.72642, -72.73427, -72.93365, -72.71883, -72.71883, -72.71883, 
    -72.71883, -72.71883), fips = c(9003L, 9003L, 9003L, 9003L, 
    9003L, 9003L, 9003L, 9003L, 9003L)), .Names = c("zip", "city", 
"state", "latitude", "longitude", "fips"), class = "data.frame", row.names = c(NA, 
-9L))


sqldf("Select tbl1.*, tbl2.Area,tbl3.latitude,tbl3.longitude 
                    from df1 tbl1
                    inner join df2 tbl2 on upper(tbl1.County) = upper(tbl2.County)
                    inner join df3 tbl3 on tbl2.FIPS = tbl3.fips")

您可以试试
dplyr
动词
(实际上是函数)很容易理解和理解。也就是说,提供的样本数据帧似乎没有匹配

library(dplyr)
res1 = left_join(df1, df2 %>% select(County, AREA),
                 by=c("County"="County"))
res2 = left_join(res1, df3 %>% select(zip, latitude, longitude),
                 by=c("ResZIP"="zip"))
res2
#    Year       HNo Month Day    County ST ResState ResZIP AREA latitude longitude
# 1  2005 218050003    10   8 MIDDLESEX CT       CT   6037   NA       NA        NA
# 2  2005 218050003    10  10 MIDDLESEX CT       CT   6037   NA       NA        NA
# 3  2005 218050008     9   1 NEWLONDON CT       CT   6355   NA       NA        NA
# 4  2005 218050008     9  10 NEWLONDON CT       CT   6355   NA       NA        NA
# 5  2005 218050008     9  12 NEWLONDON CT       CT   6355   NA       NA        NA
# 6  2005 218050008     9  16 NEWLONDON CT       CT   6355   NA       NA        NA
# 7  2005 218050008     9  23 NEWLONDON CT       CT   6355   NA       NA        NA
# 8  2005 218050008     9  28 NEWLONDON CT       CT   6355   NA       NA        NA
# 9  2005 218050008    10  12 NEWLONDON CT       CT   6355   NA       NA        NA
# 10 2005 218050008    10  18 NEWLONDON CT       CT   6355   NA       NA        NA
# 11 2005 218050008    10  22 NEWLONDON CT       CT   6355   NA       NA        NA
# 12 2005 218050008    11  11 NEWLONDON CT       CT   6355   NA       NA        NA

您可以试试
dplyr
动词
(实际上是函数)很容易理解和理解。也就是说,提供的样本数据帧似乎没有匹配

library(dplyr)
res1 = left_join(df1, df2 %>% select(County, AREA),
                 by=c("County"="County"))
res2 = left_join(res1, df3 %>% select(zip, latitude, longitude),
                 by=c("ResZIP"="zip"))
res2
#    Year       HNo Month Day    County ST ResState ResZIP AREA latitude longitude
# 1  2005 218050003    10   8 MIDDLESEX CT       CT   6037   NA       NA        NA
# 2  2005 218050003    10  10 MIDDLESEX CT       CT   6037   NA       NA        NA
# 3  2005 218050008     9   1 NEWLONDON CT       CT   6355   NA       NA        NA
# 4  2005 218050008     9  10 NEWLONDON CT       CT   6355   NA       NA        NA
# 5  2005 218050008     9  12 NEWLONDON CT       CT   6355   NA       NA        NA
# 6  2005 218050008     9  16 NEWLONDON CT       CT   6355   NA       NA        NA
# 7  2005 218050008     9  23 NEWLONDON CT       CT   6355   NA       NA        NA
# 8  2005 218050008     9  28 NEWLONDON CT       CT   6355   NA       NA        NA
# 9  2005 218050008    10  12 NEWLONDON CT       CT   6355   NA       NA        NA
# 10 2005 218050008    10  18 NEWLONDON CT       CT   6355   NA       NA        NA
# 11 2005 218050008    10  22 NEWLONDON CT       CT   6355   NA       NA        NA
# 12 2005 218050008    11  11 NEWLONDON CT       CT   6355   NA       NA        NA

太好了,谢谢。有办法在两列上匹配吗?是的,有。建议将其作为单独的问题与代表性的样本数据一起发布。太好了,谢谢。有办法在两列上匹配吗?是的,有。但建议将其作为单独的问题发布,并提供具有代表性的样本数据。
library(dplyr)
res1 = left_join(df1, df2 %>% select(County, AREA),
                 by=c("County"="County"))
res2 = left_join(res1, df3 %>% select(zip, latitude, longitude),
                 by=c("ResZIP"="zip"))
res2
#    Year       HNo Month Day    County ST ResState ResZIP AREA latitude longitude
# 1  2005 218050003    10   8 MIDDLESEX CT       CT   6037   NA       NA        NA
# 2  2005 218050003    10  10 MIDDLESEX CT       CT   6037   NA       NA        NA
# 3  2005 218050008     9   1 NEWLONDON CT       CT   6355   NA       NA        NA
# 4  2005 218050008     9  10 NEWLONDON CT       CT   6355   NA       NA        NA
# 5  2005 218050008     9  12 NEWLONDON CT       CT   6355   NA       NA        NA
# 6  2005 218050008     9  16 NEWLONDON CT       CT   6355   NA       NA        NA
# 7  2005 218050008     9  23 NEWLONDON CT       CT   6355   NA       NA        NA
# 8  2005 218050008     9  28 NEWLONDON CT       CT   6355   NA       NA        NA
# 9  2005 218050008    10  12 NEWLONDON CT       CT   6355   NA       NA        NA
# 10 2005 218050008    10  18 NEWLONDON CT       CT   6355   NA       NA        NA
# 11 2005 218050008    10  22 NEWLONDON CT       CT   6355   NA       NA        NA
# 12 2005 218050008    11  11 NEWLONDON CT       CT   6355   NA       NA        NA