900字范文,内容丰富有趣,生活中的好帮手!
900字范文 > seurat使用findallmarkers 得到的差异基因列表进行富集分析clusterprolifer-2 调成fc值为0.69

seurat使用findallmarkers 得到的差异基因列表进行富集分析clusterprolifer-2 调成fc值为0.69

时间:2023-01-08 09:42:47

相关推荐

seurat使用findallmarkers 得到的差异基因列表进行富集分析clusterprolifer-2 调成fc值为0.69

#######差异分析

###########################33

library(clusterProfiler)

library(Seurat)

library(SeuratWrappers)

library(ggplot2)

library(dplyr)

library(stringr)

library(openxlsx)

library(org.Mm.eg.db)

library(“reshape”)

library(“RColorBrewer”)

getwd()

file=“G:/silicosis/sicosis/silicosis_ST/yll/0214/harmony_cluster/d_all/r=0.6_11clusters”

dir.create(file)

setwd(file)

getwd()

?loadWorkbook

#library(xlsx) #比openxlsx读取文件慢多了!!!!!

##再三检查文件路径!!!!!!!!!!!!

markers = read.xlsx(“G:/silicosis/sicosis/silicosis_ST/yll/0214/harmony_cluster/d_all/silicosis_ST_harmony_r0.6_cluster_marker.xlsx”)

head(markers)

#View(markers)

names(table(markersKaTeX parse error: Expected 'EOF', got '#' at position 11: cluster)) #̲markers"Myofibroblast/vascular smooth muscle cell"<-“myo-fib”

markerscluster[markerscluster[markerscluster[markerscluster==“Myofibroblast/vascular smooth muscle cell”]=“fib-myo” #把第17个cluster的非法名字改成正常名字,因为给导出的xlsx命名时使用了custer的名字

names(table(markers$cluster))

k <- keys(org.Mm.eg.db, keytype = “ENTREZID”)

gene.list <- select(org.Mm.eg.db, keys = k, columns = c(“SYMBOL”, “ENSEMBL”), keytype = “ENTREZID”)## 73306 3

head(gene.list)

#自建函数 输入go富集条目 和文件的名称 就返回相应的pdf文件和xlsx文件

clusterProfilerOut = function(enrichRes,outfile){

enrichResfunction.gene.num=unlist(strsplit(enrichRes[,"BgRatio"],"/"))[seq(1,length(unlist(strsplit(enrichRes[,"BgRatio"],"/"))),2)]enrichResfunction.gene.num = unlist(strsplit(enrichRes[,"BgRatio"],"/"))[seq(1,length(unlist(strsplit(enrichRes[,"BgRatio"],"/"))),2)] enrichResfunction.gene.num=unlist(strsplit(enrichRes[,"BgRatio"],"/"))[seq(1,length(unlist(strsplit(enrichRes[,"BgRatio"],"/"))),2)]enrichResGeneRatio = enrichResCount/as.numeric(enrichResCount / as.numeric(enrichResCount/as.numeric(enrichResfunction.gene.num) #计算generatio

write.xlsx(enrichRes,paste0(outfile, “.xlsx”),col.names=T,row.names=F)

if (dim(enrichRes)[1]>=10) {enrichRes.use = enrichRes[1:10,]} else{enrichRes.use = enrichRes[1:dim(enrichRes)[1],]}#显示的富集条目不超过10条

enrichRes.use = enrichRes.use[order(enrichRes.useKaTeX parse error: Expected 'EOF', got '#' at position 28: …ecreasing=T),] #̲按照generatio降序显示…Description = factor(enrichRes.useDescription,levels=rev(enrichRes.useDescription,levels=rev(enrichRes.useDescription,levels=rev(enrichRes.useDescription))

gap = (max(enrichRes.useGeneRatio)−min(enrichRes.useGeneRatio) - min(enrichRes.useGeneRatio)−min(enrichRes.useGeneRatio))/5

#制作并输出pdf

pdf(paste0(outfile, “.pdf”))

p = ggplot(enrichRes.use,mapping = aes(x=GeneRatio,y=Description))+

geom_point(aes(size=Count,color=p.adjust))+theme_bw()+

scale_color_gradient(low = “blue”, high = “red”)+

scale_x_continuous(expand = c(gap, gap))+ylab(NULL)+

scale_y_discrete(labels=function(x) str_wrap(x, width=60))

print§

dev.off()

}

getwd() #此时手动把all_cluster_markers.xlsx文件复制到工作目录下

path=getwd()

dir(path)

files=dir(path)

files #得到工作目录下的所有文件名

‘’’

path=“G:/silicosis/sicosis/silicosis_ST/yll/0214/harmony_cluster/d_all/r=0.6_11clusters_0.69fc”

dir.create(path)

setwd(path)

‘’’

getwd()

head(markers)

#library(xlsx)

for (i in files) {

#i=“all_cluster_markers.xlsx”

input = read.xlsx(paste(path, i, sep = “/”))#读取xlsx文件 注意有的时候需要加row.names 有的时候不需要加

head(input)

inputcluster[inputcluster[inputcluster[inputcluster==“Myofibroblast/vascular smooth muscle cell”]=“fib-myo” #把第17个cluster的非法名字改成正常名字,因为给导出的xlsx命名时使用了custer的名字

inputsymbol<−inputsymbol<-inputsymbol<−inputgene

head(input)

inputFeatureName.entrez=gene.list[match(inputFeatureName.entrez = gene.list[match(inputFeatureName.entrez=gene.list[match(inputsymbol, gene.listKaTeX parse error: Expected 'EOF', got '#' at position 20: …OL),"ENTREZID"]#̲在input文件的基础上添加s…change=ifelse(inputavglog2FC>0,"up","down")input.sub<−input[inputavg_log2FC>0,"up","down") input.sub<-input[inputavgl​og2FC>0,"up","down")input.sub<−input[inputp_val_adj<0.001,] #取出所有p值小于0.001的

print(head(input.sub))

dim(input.sub)

input.sub = na.omit(input[,c(“cluster”,“FeatureName.entrez”,“avg_log2FC”,“p_val_adj”)])

head(input.sub)

colnames(input.sub) = c(“cluster”,“gene”, “log2FC”, “PValue”)

print(head(input.sub))

subset_data=input.sub

head(subset_data)

cluster_number<-names(table(subset_data$cluster)) #获取文件中的cluster个数

length(cluster_number)

cluster_number

str(cluster_number)

names(table(subset_datacluster))[1]subsetdata[subsetdatacluster))[1] subset_data[subset_datacluster))[1]subsetd​ata[subsetd​atacluster==“0”,]

##注意文件中cluster的命名 最好不要出现斜杠|等特殊字符 容易出错

#注意p值是否需要约束?

#注意是否需要正负号表示上下调?

#for (cluster_num in 0:(length(cluster_number)-1)) { #对于cluster为数字的

##只看上调的基因 ,注意修改名字!!!!!!!!!!!!!

subset_data = subset(input.sub, log2FC>0.69&PValue<0.05)

head(subset_data)

cluster_number<-names(table(subset_data$cluster)) #获取文件中的cluster个数

cluster_number

for (cluster_num in cluster_number) { ##对于cluster为字符的

ego=enrichGO(gene=subset_data[subset_data$cluster==cluster_num,]$gene, #!!!!!!全都要改!!!!!!!!!!!!!!!!!!!泉沟要改keyType = "ENTREZID", OrgDb = org.Mm.eg.db, ont = "ALL", readable = TRUE,pvalueCutoff = 1, qvalueCutoff = 1, pAdjustMethod = "BH")res = ego[ego$p.adjust<0.05,]print(dim(res))print(table(res$ONTOLOGY))if(nrow(res) != 0){#go,使用自定义的函数画图 ,注意命名时候把i的 文件属性名去掉clusterProfilerOut(res, paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_up_enrichGO"))}ego.KEGG = enrichKEGG(gene=subset_data[subset_data$cluster==cluster_num,]$gene, organism = "mmu",keyType = "kegg", pvalueCutoff = 1, pAdjustMethod = "BH", qvalueCutoff = 1)res = ego.KEGG[ego.KEGG$p.adjust<0.05,]print(dim(res)) if(nrow(res) !=0){#keggclusterProfilerOut(res,paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_up_enrichKEGG"))}

}

##只看下调的基因 ,注意修改名字!!!!!!!!!!!!!

subset_data = subset(input.sub, log2FC<(-0.69)&PValue<0.05)

head(subset_data)

cluster_number<-names(table(subset_data$cluster)) #获取文件中的cluster个数

cluster_number

for (cluster_num in cluster_number) { ##对于cluster为字符的

ego=enrichGO(gene=subset_data[subset_data$cluster==cluster_num,]$gene, #!!!!!!全都要改!!!!!!!!!!!!!!!!!!!泉沟要改keyType = "ENTREZID", OrgDb = org.Mm.eg.db, ont = "ALL", readable = TRUE,pvalueCutoff = 1, qvalueCutoff = 1, pAdjustMethod = "BH")res = ego[ego$p.adjust<0.05,]print(dim(res))print(table(res$ONTOLOGY))if(nrow(res) != 0){#go,使用自定义的函数画图 ,注意命名时候把i的 文件属性名去掉clusterProfilerOut(res, paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_down_enrichGO"))}ego.KEGG = enrichKEGG(gene=subset_data[subset_data$cluster==cluster_num,]$gene, organism = "mmu",keyType = "kegg", pvalueCutoff = 1, pAdjustMethod = "BH", qvalueCutoff = 1)res = ego.KEGG[ego.KEGG$p.adjust<0.05,]print(dim(res)) if(nrow(res) !=0){#keggclusterProfilerOut(res,paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_down_enrichKEGG"))}

}

##不区分上下调 #################################

subset_data = subset(input.sub, abs(log2FC)>0.69&PValue<0.05)

head(subset_data)

cluster_number<-names(table(subset_data$cluster)) #获取文件中的cluster个数

cluster_number

for (cluster_num in cluster_number) { ##对于cluster为字符的

ego=enrichGO(gene=subset_data[subset_data$cluster==cluster_num,]$gene, #!!!!!!全都要改!!!!!!!!!!!!!!!!!!!泉沟要改keyType = "ENTREZID", OrgDb = org.Mm.eg.db, ont = "ALL", readable = TRUE,pvalueCutoff = 1, qvalueCutoff = 1, pAdjustMethod = "BH")res = ego[ego$p.adjust<0.05,]print(dim(res))print(table(res$ONTOLOGY))if(nrow(res) != 0){#go,使用自定义的函数画图 ,注意命名时候把i的 文件属性名去掉clusterProfilerOut(res, paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_both_enrichGO"))}ego.KEGG = enrichKEGG(gene=subset_data[subset_data$cluster==cluster_num,]$gene, organism = "mmu",keyType = "kegg", pvalueCutoff = 1, pAdjustMethod = "BH", qvalueCutoff = 1)res = ego.KEGG[ego.KEGG$p.adjust<0.05,]print(dim(res)) if(nrow(res) !=0){#keggclusterProfilerOut(res,paste0(unlist(strsplit(i, ".xl"))[1],cluster_num,"_both_enrichKEGG"))}

}

}

dev.off()

path=“D:/ARDS_scripts_1012/ARDS/Step2_harmony_f200_R3/ws/Fibroblast/0.5/sepsis_Fibroblast_r0.5.rds”

load(path)

table(subset_datastim,subsetdatastim,subset_datastim,subsetd​ataseurat_clusters)

DimPlot(subset_data,group.by = “stim”)

DimPlot(subset_data,split.by = “stim”,

group.by = “seurat_clusters”,

label = TRUE)

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。