为r中的新列从另一个数据框查询数据_R_Database_Merge

为r中的新列从另一个数据框查询数据

r database merge

为r中的新列从另一个数据框查询数据,r,database,merge,R,Database,Merge,好的，我对R中的数据帧管理和操作是新手，在查询和向数据帧添加信息以进行分析时遇到困难。我有3个数据帧 df1 df2 df3 我想向df1添加如下列：使用County与df2匹配，并添加适当的区域。然后使用ResZip与df3（zip）匹配，以获得适当的纬度和经度。我知道我需要在需要的地方更改列名，但不确定如何执行这些查询。我习惯于访问，但无法转换为R。我尝试过合并，但获得的附加信息太多。非常感谢您的帮助。除了您提供的数据之外，这将起作用。df2和df3之间没有匹配的FIP。如果您了解SQL，那

好的，我对R中的数据帧管理和操作是新手，在查询和向数据帧添加信息以进行分析时遇到困难。我有3个数据帧 df1

df2

df3

我想向df1添加如下列：使用County与df2匹配，并添加适当的区域。然后使用ResZip与df3（zip）匹配，以获得适当的纬度和经度。我知道我需要在需要的地方更改列名，但不确定如何执行这些查询。我习惯于访问，但无法转换为R。我尝试过合并，但获得的附加信息太多。非常感谢您的帮助。

除了您提供的数据之外，这将起作用。df2和df3之间没有匹配的FIP。如果您了解SQL，那么您可以进入access中的SQL视图并进行转换，然后在R中使用sqldf。接下来，如果您使用dput函数并复制该输出，人们可以更容易地帮助您

library(sqldf)        
df1 = structure(list(Year = c(2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 
2005L, 2005L, 2005L, 2005L, 2005L, 2005L), HNo = c(218050003L, 
218050003L, 218050008L, 218050008L, 218050008L, 218050008L, 218050008L, 
218050008L, 218050008L, 218050008L, 218050008L, 218050008L), 
    Month = c(10L, 10L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 
    11L), Day = c(8L, 10L, 1L, 10L, 12L, 16L, 23L, 28L, 12L, 
    18L, 22L, 11L), County = structure(c(1L, 1L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("MIDDLESEX", "NEWLONDON"
    ), class = "factor"), ST = structure(c(1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "CT", class = "factor"), 
    ResState = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L), .Label = "CT", class = "factor"), ResZIP = c(6037L, 
    6037L, 6355L, 6355L, 6355L, 6355L, 6355L, 6355L, 6355L, 6355L, 
    6355L, 6355L)), .Names = c("Year", "HNo", "Month", "Day", 
"County", "ST", "ResState", "ResZIP"), class = "data.frame", row.names = c(NA, 
-12L))

df2 =   structure(list(FID = c(590L, 591L, 593L, 594L, 642L, 647L, 651L, 
652L), County = structure(c(3L, 2L, 7L, 8L, 6L, 1L, 4L, 5L), .Label = c("Fairfield", 
"Hartford", "Litchfield", "Middlesex", "NewHaven", "NewLondon", 
"Tolland", "Windham"), class = "factor"), STATE_NAME = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Connecticut", class = "factor"), 
    STATE_FIPS = c(9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L), CNTY_FIPS = c(5L, 
    3L, 13L, 15L, 11L, 1L, 7L, 9L), FIPS = c(9005L, 9003L, 9013L, 
    9015L, 9011L, 9001L, 9007L, 9009L), AREA = c(929.3449, 742.8998, 
    411.904, 522.058, 706.352, 661.2935, 379.362, 623.3514)), .Names = c("FID", 
"County", "STATE_NAME", "STATE_FIPS", "CNTY_FIPS", "FIPS", "AREA"
), class = "data.frame", row.names = c(NA, -8L))


df3 = structure(list(zip = c(6001L, 6002L, 6006L, 6010L, 6011L, 6030L, 
6034L, 6045L, 6049L), city = structure(c(1L, 2L, 7L, 3L, 3L, 
4L, 4L, 5L, 6L), .Label = c("Avon", "Bloomfield", "Bristol", 
"Farmington", "Manchester", "Melrose", "Windsor"), class = "factor"), 
    state = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "CT", class = "factor"), 
    latitude = c(41.7897, 41.8328, 41.87964, 41.68225, 41.79178, 
    41.79178, 41.79178, 41.79178, 41.79178), longitude = c(-72.86431, 
    -72.72642, -72.73427, -72.93365, -72.71883, -72.71883, -72.71883, 
    -72.71883, -72.71883), fips = c(9003L, 9003L, 9003L, 9003L, 
    9003L, 9003L, 9003L, 9003L, 9003L)), .Names = c("zip", "city", 
"state", "latitude", "longitude", "fips"), class = "data.frame", row.names = c(NA, 
-9L))


sqldf("Select tbl1.*, tbl2.Area,tbl3.latitude,tbl3.longitude 
                    from df1 tbl1
                    inner join df2 tbl2 on upper(tbl1.County) = upper(tbl2.County)
                    inner join df3 tbl3 on tbl2.FIPS = tbl3.fips")

这将起作用，除非在您提供的数据中，df2和df3之间没有匹配的FIP。如果您了解SQL，那么您可以进入access中的SQL视图并进行转换，然后在R中使用sqldf。接下来，如果您使用dput函数并复制该输出，人们可以更容易地帮助您

library(sqldf)        
df1 = structure(list(Year = c(2005L, 2005L, 2005L, 2005L, 2005L, 2005L, 
2005L, 2005L, 2005L, 2005L, 2005L, 2005L), HNo = c(218050003L, 
218050003L, 218050008L, 218050008L, 218050008L, 218050008L, 218050008L, 
218050008L, 218050008L, 218050008L, 218050008L, 218050008L), 
    Month = c(10L, 10L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 
    11L), Day = c(8L, 10L, 1L, 10L, 12L, 16L, 23L, 28L, 12L, 
    18L, 22L, 11L), County = structure(c(1L, 1L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("MIDDLESEX", "NEWLONDON"
    ), class = "factor"), ST = structure(c(1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "CT", class = "factor"), 
    ResState = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L), .Label = "CT", class = "factor"), ResZIP = c(6037L, 
    6037L, 6355L, 6355L, 6355L, 6355L, 6355L, 6355L, 6355L, 6355L, 
    6355L, 6355L)), .Names = c("Year", "HNo", "Month", "Day", 
"County", "ST", "ResState", "ResZIP"), class = "data.frame", row.names = c(NA, 
-12L))

df2 =   structure(list(FID = c(590L, 591L, 593L, 594L, 642L, 647L, 651L, 
652L), County = structure(c(3L, 2L, 7L, 8L, 6L, 1L, 4L, 5L), .Label = c("Fairfield", 
"Hartford", "Litchfield", "Middlesex", "NewHaven", "NewLondon", 
"Tolland", "Windham"), class = "factor"), STATE_NAME = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Connecticut", class = "factor"), 
    STATE_FIPS = c(9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L), CNTY_FIPS = c(5L, 
    3L, 13L, 15L, 11L, 1L, 7L, 9L), FIPS = c(9005L, 9003L, 9013L, 
    9015L, 9011L, 9001L, 9007L, 9009L), AREA = c(929.3449, 742.8998, 
    411.904, 522.058, 706.352, 661.2935, 379.362, 623.3514)), .Names = c("FID", 
"County", "STATE_NAME", "STATE_FIPS", "CNTY_FIPS", "FIPS", "AREA"
), class = "data.frame", row.names = c(NA, -8L))


df3 = structure(list(zip = c(6001L, 6002L, 6006L, 6010L, 6011L, 6030L, 
6034L, 6045L, 6049L), city = structure(c(1L, 2L, 7L, 3L, 3L, 
4L, 4L, 5L, 6L), .Label = c("Avon", "Bloomfield", "Bristol", 
"Farmington", "Manchester", "Melrose", "Windsor"), class = "factor"), 
    state = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "CT", class = "factor"), 
    latitude = c(41.7897, 41.8328, 41.87964, 41.68225, 41.79178, 
    41.79178, 41.79178, 41.79178, 41.79178), longitude = c(-72.86431, 
    -72.72642, -72.73427, -72.93365, -72.71883, -72.71883, -72.71883, 
    -72.71883, -72.71883), fips = c(9003L, 9003L, 9003L, 9003L, 
    9003L, 9003L, 9003L, 9003L, 9003L)), .Names = c("zip", "city", 
"state", "latitude", "longitude", "fips"), class = "data.frame", row.names = c(NA, 
-9L))


sqldf("Select tbl1.*, tbl2.Area,tbl3.latitude,tbl3.longitude 
                    from df1 tbl1
                    inner join df2 tbl2 on upper(tbl1.County) = upper(tbl2.County)
                    inner join df3 tbl3 on tbl2.FIPS = tbl3.fips")

您可以试试

dplyr

。

动词

（实际上是函数）很容易理解和理解。也就是说，提供的样本数据帧似乎没有匹配

library(dplyr)
res1 = left_join(df1, df2 %>% select(County, AREA),
                 by=c("County"="County"))
res2 = left_join(res1, df3 %>% select(zip, latitude, longitude),
                 by=c("ResZIP"="zip"))
res2
#    Year       HNo Month Day    County ST ResState ResZIP AREA latitude longitude
# 1  2005 218050003    10   8 MIDDLESEX CT       CT   6037   NA       NA        NA
# 2  2005 218050003    10  10 MIDDLESEX CT       CT   6037   NA       NA        NA
# 3  2005 218050008     9   1 NEWLONDON CT       CT   6355   NA       NA        NA
# 4  2005 218050008     9  10 NEWLONDON CT       CT   6355   NA       NA        NA
# 5  2005 218050008     9  12 NEWLONDON CT       CT   6355   NA       NA        NA
# 6  2005 218050008     9  16 NEWLONDON CT       CT   6355   NA       NA        NA
# 7  2005 218050008     9  23 NEWLONDON CT       CT   6355   NA       NA        NA
# 8  2005 218050008     9  28 NEWLONDON CT       CT   6355   NA       NA        NA
# 9  2005 218050008    10  12 NEWLONDON CT       CT   6355   NA       NA        NA
# 10 2005 218050008    10  18 NEWLONDON CT       CT   6355   NA       NA        NA
# 11 2005 218050008    10  22 NEWLONDON CT       CT   6355   NA       NA        NA
# 12 2005 218050008    11  11 NEWLONDON CT       CT   6355   NA       NA        NA

您可以试试

dplyr

。

动词

（实际上是函数）很容易理解和理解。也就是说，提供的样本数据帧似乎没有匹配

library(dplyr)
res1 = left_join(df1, df2 %>% select(County, AREA),
                 by=c("County"="County"))
res2 = left_join(res1, df3 %>% select(zip, latitude, longitude),
                 by=c("ResZIP"="zip"))
res2
#    Year       HNo Month Day    County ST ResState ResZIP AREA latitude longitude
# 1  2005 218050003    10   8 MIDDLESEX CT       CT   6037   NA       NA        NA
# 2  2005 218050003    10  10 MIDDLESEX CT       CT   6037   NA       NA        NA
# 3  2005 218050008     9   1 NEWLONDON CT       CT   6355   NA       NA        NA
# 4  2005 218050008     9  10 NEWLONDON CT       CT   6355   NA       NA        NA
# 5  2005 218050008     9  12 NEWLONDON CT       CT   6355   NA       NA        NA
# 6  2005 218050008     9  16 NEWLONDON CT       CT   6355   NA       NA        NA
# 7  2005 218050008     9  23 NEWLONDON CT       CT   6355   NA       NA        NA
# 8  2005 218050008     9  28 NEWLONDON CT       CT   6355   NA       NA        NA
# 9  2005 218050008    10  12 NEWLONDON CT       CT   6355   NA       NA        NA
# 10 2005 218050008    10  18 NEWLONDON CT       CT   6355   NA       NA        NA
# 11 2005 218050008    10  22 NEWLONDON CT       CT   6355   NA       NA        NA
# 12 2005 218050008    11  11 NEWLONDON CT       CT   6355   NA       NA        NA

太好了，谢谢。有办法在两列上匹配吗？是的，有。建议将其作为单独的问题与代表性的样本数据一起发布。太好了，谢谢。有办法在两列上匹配吗？是的，有。但建议将其作为单独的问题发布，并提供具有代表性的样本数据。

library(dplyr)
res1 = left_join(df1, df2 %>% select(County, AREA),
                 by=c("County"="County"))
res2 = left_join(res1, df3 %>% select(zip, latitude, longitude),
                 by=c("ResZIP"="zip"))
res2
#    Year       HNo Month Day    County ST ResState ResZIP AREA latitude longitude
# 1  2005 218050003    10   8 MIDDLESEX CT       CT   6037   NA       NA        NA
# 2  2005 218050003    10  10 MIDDLESEX CT       CT   6037   NA       NA        NA
# 3  2005 218050008     9   1 NEWLONDON CT       CT   6355   NA       NA        NA
# 4  2005 218050008     9  10 NEWLONDON CT       CT   6355   NA       NA        NA
# 5  2005 218050008     9  12 NEWLONDON CT       CT   6355   NA       NA        NA
# 6  2005 218050008     9  16 NEWLONDON CT       CT   6355   NA       NA        NA
# 7  2005 218050008     9  23 NEWLONDON CT       CT   6355   NA       NA        NA
# 8  2005 218050008     9  28 NEWLONDON CT       CT   6355   NA       NA        NA
# 9  2005 218050008    10  12 NEWLONDON CT       CT   6355   NA       NA        NA
# 10 2005 218050008    10  18 NEWLONDON CT       CT   6355   NA       NA        NA
# 11 2005 218050008    10  22 NEWLONDON CT       CT   6355   NA       NA        NA
# 12 2005 218050008    11  11 NEWLONDON CT       CT   6355   NA       NA        NA