字符串不起作用 结构(年龄=c(39L、50L、38L、53L、28L、37L、49L、52L、, 31L,42L,37L,30L,23L,32L,40L),工人阶级=结构(c(8L, 7L,5L,5L,5L,5L,5L,7L,5L,5L,5L,8L,5L,5L,5L,5L),标签=c(“?”, “联邦政府”、“地方政府”、“从未工作过”、“私人”、“自我管理公司”, “自营emp非公司”、“州政府”、“无报酬”)、class=“因子”), fnlwgt=c(77516L、83311L、215646L、234721L、338409L、284582L、, 160187L、209642L、45781L、159449L、280464L、141297L、122272L、, 205019L,121772L),教育=结构(c(10L,10L,12L, 2L、10L、13L、7L、12L、13L、10L、16L、10L、10L、8L、9L),标签=c(“第十”, “第11”、“第12”、“第1-4”、“第5-6”、“第7-8”、“第9”, “助理acdm”、“助理voc”、“学士”、“博士”, “HS毕业生”、“硕士”、“幼儿园”、“教授学校”、“某学院” ),class=“factor”),`education num`=c(13L,13L,9L,7L, 13L,14L,5L,9L,14L,13L,10L,13L,13L,12L,11L),“婚姻状况”=结构(c(5L, 3L,1L,3L,3L,3L,4L,3L,5L,3L,3L,5L,5L,3L),标签=c(“离婚”, “已婚AF配偶”、“已婚civ配偶”、“已婚配偶缺席”, “从未结婚”、“分居”、“丧偶”)、class=“factor”), 职业=结构(c(2L、5L、7L、7L、11L、5L、9L、5L、, 11L、5L、5L、11L、2L、13L、4L),.Label=c(“?”,“行政文书”, “武装部队”、“工艺维修”、“高级管理人员”、“农耕渔业”, “清洁工人”、“机器操作检查”、“其他服务”, “私人住宅服务”、“教授专业”、“保护性服务”, “销售”、“技术支持”、“运输搬运”),class=“因子”), 关系=结构(c(2L,1L,2L,1L,6L,6L,2L,1L, 2L,1L,1L,1L,4L,2L,1L),.Label=c(“丈夫”,“不在家”, “其他亲属”、“亲生子女”、“未婚”、“妻子”),class=“factor”), 种族=结构(c(5L,5L,5L,3L,3L,5L,3L,5L,5L,5L,5L, 3L,2L,5L,3L,2L),.Label=c(“美国印第安爱斯基摩人”,“亚洲太平洋岛民”, “黑色”、“其他”、“白色”)、class=”因子“、性别=结构(c(2L、, 2L,2L,2L,1L,1L,1L,2L,1L,2L,2L,2L,1L,2L,2L,2L,2L),标签=c(“内螺纹”, “男性”,class=“factor”),`资本收益`=c(2174升,0升, 0L,0L,0L,0L,0L,0L,14084L,5178L,0L,0L,0L,0L,0L,0L ),资本损失`=c(0L,0L,0L,0L,0L,0L,0L,0L,0L, 0L,0L,0L,0L,0L,0L),‘每周工作小时数’=c(40L,13L,40L, 40L,40L,40L,16L,45L,50L,40L,80L,40L,30L,50L,40L ),`native country`=结构(c(40L,40L,40L,40L,6L, 40L,24L,40L,40L,40L,40L,20L,40L,40L,40L,1L),标签=c(“?”, “柬埔寨”、“加拿大”、“中国”、“哥伦比亚”、“古巴”、“多米尼加共和国”, “厄瓜多尔”、“萨尔瓦多”、“英国”、“法国”、“德国”, “希腊”、“危地马拉”、“海地”、“荷兰荷兰”, “洪都拉斯”、“香港”、“匈牙利”、“印度”、“伊朗”、“爱尔兰”, “意大利”、“牙买加”、“日本”、“老挝”、“墨西哥”、“尼加拉瓜”, “美国边远地区(关岛、美属维尔京群岛等)”、“秘鲁”、“菲律宾”、“波兰”, “葡萄牙”、“波多黎各”、“苏格兰”、“南方”、“台湾”, “泰国”、“特立尼达和多巴哥”、“美国”、“越南”, “南斯拉夫”,class=“factor”),`NA`=结构(c(1L, 1L,1L,1L,1L,1L,1L,2L,2L,2L,2L,1L,1L,1L,1L,2L),.Label=c(“50K”),class=“factor”),.Names=c(“年龄”,“工人阶级”, “fnlwgt”、“受教育程度”、“受教育人数”、“婚姻状况”、“职业”, “关系”、“种族”、“性别”、“资本收益”、“资本损失”, “每周小时数”、“本国”,NA),row.names=c(NA,15L),class=“data.frame”)

字符串不起作用 结构(年龄=c(39L、50L、38L、53L、28L、37L、49L、52L、, 31L,42L,37L,30L,23L,32L,40L),工人阶级=结构(c(8L, 7L,5L,5L,5L,5L,5L,7L,5L,5L,5L,8L,5L,5L,5L,5L),标签=c(“?”, “联邦政府”、“地方政府”、“从未工作过”、“私人”、“自我管理公司”, “自营emp非公司”、“州政府”、“无报酬”)、class=“因子”), fnlwgt=c(77516L、83311L、215646L、234721L、338409L、284582L、, 160187L、209642L、45781L、159449L、280464L、141297L、122272L、, 205019L,121772L),教育=结构(c(10L,10L,12L, 2L、10L、13L、7L、12L、13L、10L、16L、10L、10L、8L、9L),标签=c(“第十”, “第11”、“第12”、“第1-4”、“第5-6”、“第7-8”、“第9”, “助理acdm”、“助理voc”、“学士”、“博士”, “HS毕业生”、“硕士”、“幼儿园”、“教授学校”、“某学院” ),class=“factor”),`education num`=c(13L,13L,9L,7L, 13L,14L,5L,9L,14L,13L,10L,13L,13L,12L,11L),“婚姻状况”=结构(c(5L, 3L,1L,3L,3L,3L,4L,3L,5L,3L,3L,5L,5L,3L),标签=c(“离婚”, “已婚AF配偶”、“已婚civ配偶”、“已婚配偶缺席”, “从未结婚”、“分居”、“丧偶”)、class=“factor”), 职业=结构(c(2L、5L、7L、7L、11L、5L、9L、5L、, 11L、5L、5L、11L、2L、13L、4L),.Label=c(“?”,“行政文书”, “武装部队”、“工艺维修”、“高级管理人员”、“农耕渔业”, “清洁工人”、“机器操作检查”、“其他服务”, “私人住宅服务”、“教授专业”、“保护性服务”, “销售”、“技术支持”、“运输搬运”),class=“因子”), 关系=结构(c(2L,1L,2L,1L,6L,6L,2L,1L, 2L,1L,1L,1L,4L,2L,1L),.Label=c(“丈夫”,“不在家”, “其他亲属”、“亲生子女”、“未婚”、“妻子”),class=“factor”), 种族=结构(c(5L,5L,5L,3L,3L,5L,3L,5L,5L,5L,5L, 3L,2L,5L,3L,2L),.Label=c(“美国印第安爱斯基摩人”,“亚洲太平洋岛民”, “黑色”、“其他”、“白色”)、class=”因子“、性别=结构(c(2L、, 2L,2L,2L,1L,1L,1L,2L,1L,2L,2L,2L,1L,2L,2L,2L,2L),标签=c(“内螺纹”, “男性”,class=“factor”),`资本收益`=c(2174升,0升, 0L,0L,0L,0L,0L,0L,14084L,5178L,0L,0L,0L,0L,0L,0L ),资本损失`=c(0L,0L,0L,0L,0L,0L,0L,0L,0L, 0L,0L,0L,0L,0L,0L),‘每周工作小时数’=c(40L,13L,40L, 40L,40L,40L,16L,45L,50L,40L,80L,40L,30L,50L,40L ),`native country`=结构(c(40L,40L,40L,40L,6L, 40L,24L,40L,40L,40L,40L,20L,40L,40L,40L,1L),标签=c(“?”, “柬埔寨”、“加拿大”、“中国”、“哥伦比亚”、“古巴”、“多米尼加共和国”, “厄瓜多尔”、“萨尔瓦多”、“英国”、“法国”、“德国”, “希腊”、“危地马拉”、“海地”、“荷兰荷兰”, “洪都拉斯”、“香港”、“匈牙利”、“印度”、“伊朗”、“爱尔兰”, “意大利”、“牙买加”、“日本”、“老挝”、“墨西哥”、“尼加拉瓜”, “美国边远地区(关岛、美属维尔京群岛等)”、“秘鲁”、“菲律宾”、“波兰”, “葡萄牙”、“波多黎各”、“苏格兰”、“南方”、“台湾”, “泰国”、“特立尼达和多巴哥”、“美国”、“越南”, “南斯拉夫”,class=“factor”),`NA`=结构(c(1L, 1L,1L,1L,1L,1L,1L,2L,2L,2L,2L,1L,1L,1L,1L,2L),.Label=c(“50K”),class=“factor”),.Names=c(“年龄”,“工人阶级”, “fnlwgt”、“受教育程度”、“受教育人数”、“婚姻状况”、“职业”, “关系”、“种族”、“性别”、“资本收益”、“资本损失”, “每周小时数”、“本国”,NA),row.names=c(NA,15L),class=“data.frame”),r,sas,R,Sas,在这些数据中有一些问号。SAS或R不将其识别为缺失值。 所以我试着首先将其作为缺失值,或者需要删除带有问号(?)的观察值 最好的办法是让它成为缺失的价值,但我不知道怎么做。 所以我尝试了下面这样的“na.string” structure(list(age = c(39L, 50L, 38L, 53L, 28L, 37L, 49L, 52L, 31L, 42L, 37L, 30L, 23L, 32L, 40L), workclass = structure(c(8L, 7L, 5L, 5L,

在这些数据中有一些问号。SAS或R不将其识别为缺失值。 所以我试着首先将其作为缺失值,或者需要删除带有问号(?)的观察值

最好的办法是让它成为缺失的价值,但我不知道怎么做。 所以我尝试了下面这样的“na.string”

structure(list(age = c(39L, 50L, 38L, 53L, 28L, 37L, 49L, 52L, 
31L, 42L, 37L, 30L, 23L, 32L, 40L), workclass = structure(c(8L, 
7L, 5L, 5L, 5L, 5L, 5L, 7L, 5L, 5L, 5L, 8L, 5L, 5L, 5L), .Label = c(" ?", 
" Federal-gov", " Local-gov", " Never-worked", " Private", " Self-emp-inc", 
" Self-emp-not-inc", " State-gov", " Without-pay"), class = "factor"), 
fnlwgt = c(77516L, 83311L, 215646L, 234721L, 338409L, 284582L, 
160187L, 209642L, 45781L, 159449L, 280464L, 141297L, 122272L, 
205019L, 121772L), education = structure(c(10L, 10L, 12L, 
2L, 10L, 13L, 7L, 12L, 13L, 10L, 16L, 10L, 10L, 8L, 9L), .Label = c(" 10th", 
" 11th", " 12th", " 1st-4th", " 5th-6th", " 7th-8th", " 9th", 
" Assoc-acdm", " Assoc-voc", " Bachelors", " Doctorate", 
" HS-grad", " Masters", " Preschool", " Prof-school", " Some-college"
), class = "factor"), `education-num` = c(13L, 13L, 9L, 7L, 
13L, 14L, 5L, 9L, 14L, 13L, 10L, 13L, 13L, 12L, 11L), `marital-status` = structure(c(5L, 
3L, 1L, 3L, 3L, 3L, 4L, 3L, 5L, 3L, 3L, 3L, 5L, 5L, 3L), .Label = c(" Divorced", 
" Married-AF-spouse", " Married-civ-spouse", " Married-spouse-absent", 
" Never-married", " Separated", " Widowed"), class = "factor"), 
occupation = structure(c(2L, 5L, 7L, 7L, 11L, 5L, 9L, 5L, 
11L, 5L, 5L, 11L, 2L, 13L, 4L), .Label = c(" ?", " Adm-clerical", 
" Armed-Forces", " Craft-repair", " Exec-managerial", " Farming-fishing", 
" Handlers-cleaners", " Machine-op-inspct", " Other-service", 
" Priv-house-serv", " Prof-specialty", " Protective-serv", 
" Sales", " Tech-support", " Transport-moving"), class = "factor"), 
relationship = structure(c(2L, 1L, 2L, 1L, 6L, 6L, 2L, 1L, 
2L, 1L, 1L, 1L, 4L, 2L, 1L), .Label = c(" Husband", " Not-in-family", 
" Other-relative", " Own-child", " Unmarried", " Wife"), class = "factor"), 
race = structure(c(5L, 5L, 5L, 3L, 3L, 5L, 3L, 5L, 5L, 5L, 
3L, 2L, 5L, 3L, 2L), .Label = c(" Amer-Indian-Eskimo", " Asian-Pac-Islander", 
" Black", " Other", " White"), class = "factor"), sex = structure(c(2L, 
2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L), .Label = c(" Female", 
" Male"), class = "factor"), `capital-gain` = c(2174L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 14084L, 5178L, 0L, 0L, 0L, 0L, 0L
), `capital-loss` = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L), `hours-per-week` = c(40L, 13L, 40L, 
40L, 40L, 40L, 16L, 45L, 50L, 40L, 80L, 40L, 30L, 50L, 40L
), `native-country` = structure(c(40L, 40L, 40L, 40L, 6L, 
40L, 24L, 40L, 40L, 40L, 40L, 20L, 40L, 40L, 1L), .Label = c(" ?", 
" Cambodia", " Canada", " China", " Columbia", " Cuba", " Dominican-Republic", 
" Ecuador", " El-Salvador", " England", " France", " Germany", 
" Greece", " Guatemala", " Haiti", " Holand-Netherlands", 
" Honduras", " Hong", " Hungary", " India", " Iran", " Ireland", 
" Italy", " Jamaica", " Japan", " Laos", " Mexico", " Nicaragua", 
" Outlying-US(Guam-USVI-etc)", " Peru", " Philippines", " Poland", 
" Portugal", " Puerto-Rico", " Scotland", " South", " Taiwan", 
" Thailand", " Trinadad&Tobago", " United-States", " Vietnam", 
" Yugoslavia"), class = "factor"), `NA` = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L), .Label = c(" <=50K", 
" >50K"), class = "factor")), .Names = c("age", "workclass", 
"fnlwgt", "education", "education-num", "marital-status", "occupation", 
"relationship", "race", "sex", "capital-gain", "capital-loss", 
"hours-per-week", "native-country", NA), row.names = c(NA, 15L), class = "data.frame")

成人除了修复
na.string
,您还可以在之后将这些条目定义为
na

adult<- read.table("adult.txt", sep= ",", header=F,na.strings="?" )

成年人只是一个想法,没有经过测试。。。您是否注意到数据结构中
前面的空格?也许在
na.string
参数中添加它就行了。我没有想到它。我试试看。Thanks@nico哦,不!!!!它起作用了!!!!!!!!!!!哇,谢谢你。你可以在回答中再说一遍,我会接受的。
adult <- read.table("adult.txt", sep= ",", header=F)
adult[adult == " ?"] <- NA
adult[grepl("?", adult, fixed=TRUE)] <- NA