R中基因游戏RNAseq数据集的提取

R中基因游戏RNAseq数据集的提取,r,geo,names,rna-seq,R,Geo,Names,Rna Seq,我有一个我能理解或解决的问题。我从GEO下载了GSE115262。我想从GSM3172784HC$annotation.gene_name中提取基因名称。当我这样做时,我得到的是数字而不是基因名。如何获取字符值?如果我运行Str(),这就是我得到的$annotation.gene_name:Factor w/56233 levels“5_8S_rRNA”,“5S_rRNA”,..:53514 52750 11836 48738。我们看到我得到了数字。如果我运行head()并查看GSM317278

我有一个我能理解或解决的问题。我从GEO下载了GSE115262。我想从GSM3172784HC$annotation.gene_name中提取基因名称。当我这样做时,我得到的是数字而不是基因名。如何获取字符值?如果我运行Str(),这就是我得到的$annotation.gene_name:Factor w/56233 levels“5_8S_rRNA”,“5S_rRNA”,..:53514 52750 11836 48738。我们看到我得到了数字。如果我运行head()并查看GSM3172784HC$annotation.gene_名称,就会得到基因名称,这就是我想要的。我怎么得到这些

 #### Need to load in all libraries
    #General Bioconductor packages
library("GEOquery");
library("Biobase");

   # Loop Through Files for download
for(i in 1:length(tmp$V1)){
    getGEOSuppFiles(tmp$V1[i])
};

######## Healthy Controls GSE115262 ##########
## May need to read thing mult. times to get into R
GSM3172784HC<-read.table(gzfile("FilePath.txt.gz"), header=T) 

## New data-frame
HCData<- cbind(GSM3172784HC$annotation.gene_name, GSM3172784HC$expected_count);

HCData<- as.data.frame(HCData)
row.names(HCData) <- HCData$V1
colnames(HCData) <- c("HC1")

str(GSM3172784HC)
'data.frame':   57955 obs. of  11 variables:
 $ X                      : int  1 2 3 4 5 6 7 8 9 10 ...
 $ annotation.gene_id     : Factor w/ 57955 levels "ENSG00000000003",..: 1 2 3 4 5 6 7 8 9 10 ...
 $ annotation.gene_biotype: Factor w/ 43 levels "3prime_overlapping_ncRNA",..: 20 20 20 20 20 20 20 20 20 20 ...
 $ annotation.gene_name   : Factor w/ 56233 levels "5_8S_rRNA","5S_rRNA",..: 53514 52750 11836 48738 5916 13731 7375 14125 14433 24521 ...
 $ annotation.source      : Factor w/ 4 levels "ensembl","ensembl_havana",..: 2 2 2 2 2 2 2 2 2 2 ...
 $ transcript_id.s.       : Factor w/ 57955 levels "ENST00000000233,ENST00000415666,ENST00000459680,ENST00000463733,ENST00000467281,ENST00000489673",..: 17666 17669 17397 16695 5799 17850 14301 7 1276 12553 ...
 $ length                 : num  1749 940 1073 1538 2430 ...
 $ effective_length       : num  1623 814 947 1412 2304 ...
 $ expected_count         : num  0 0 1 1 0 2 2 0 1 1 ...
 $ TPM                    : num  0 0 0.27 0.18 0 0.23 0.07 0 0.65 0.17 ...
 $ FPKM                   : num  0 0 0.41 0.27 0 0.35 0.11 0 0.98 0.25 ...

head(GSM3172784HC)
  X annotation.gene_id annotation.gene_biotype annotation.gene_name
1 1    ENSG00000000003          protein_coding               TSPAN6
2 2    ENSG00000000005          protein_coding                 TNMD
3 3    ENSG00000000419          protein_coding                 DPM1
4 4    ENSG00000000457          protein_coding                SCYL3
5 5    ENSG00000000460          protein_coding             C1orf112
6 6    ENSG00000000938          protein_coding                  FGR
  annotation.source
1    ensembl_havana
2    ensembl_havana
3    ensembl_havana
4    ensembl_havana
5    ensembl_havana
6    ensembl_havana
                                                                                                                                 transcript_id.s.
1                                                                 ENST00000373020,ENST00000494424,ENST00000496771,ENST00000612152,ENST00000614008
2                                                                                                                 ENST00000373031,ENST00000485971
3                                                 ENST00000371582,ENST00000371584,ENST00000371588,ENST00000413082,ENST00000466152,ENST00000494752
4                                                                 ENST00000367770,ENST00000367771,ENST00000367772,ENST00000423670,ENST00000470238
5 ENST00000286031,ENST00000359326,ENST00000413811,ENST00000459772,ENST00000466580,ENST00000472795,ENST00000481744,ENST00000496973,ENST00000498289
6                                 ENST00000374003,ENST00000374004,ENST00000374005,ENST00000399173,ENST00000457296,ENST00000468038,ENST00000475472
   length effective_length expected_count  TPM FPKM
1 1749.40          1623.17              0 0.00 0.00
2  940.50           814.28              0 0.00 0.00
3 1073.00           946.77              1 0.27 0.41
4 1538.00          1411.77              1 0.18 0.27
5 2430.11          2303.88              0 0.00 0.00
6 2350.00          2223.77              2 0.23 0.35
#####需要在所有库中加载
#通用生物导体封装
图书馆(“地理查询”);
图书馆(“Biobase”);
#循环下载文件
用于(1中的i:长度(tmp$V1)){
getGEOSuppFiles(tmp$V1[i])
};
########健康对照组GSE115262##########
##可能需要多读一些东西。进入R的时间

GSM3172784HC我们可以将列转换为
字符

library(dplyr)
GSM3172784HC <- GSM3172784HC %>%
                    mutate_if(is.factor, as.character)

base R
中,我们可以

i1 <- sapply(GSM3172784HC, is.factor)
GSM3172784HC[i1] <- lapply(GSM3172784HC[i1], as.character)

i1我们可以将列转换为
字符

library(dplyr)
GSM3172784HC <- GSM3172784HC %>%
                    mutate_if(is.factor, as.character)

base R
中,我们可以

i1 <- sapply(GSM3172784HC, is.factor)
GSM3172784HC[i1] <- lapply(GSM3172784HC[i1], as.character)

i1 Hi Akrun,这太完美了。在这上面花了几个小时,谢谢!嗨,阿克伦,这太完美了。在这上面花了几个小时,谢谢!