Genemapping as of February 2022.
ensembl_hg38 = useEnsembl(biomart="ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", mirror="useast")
genemap = getBM( attributes = c("ensembl_gene_id","chromosome_name","start_position",
"end_position","transcript_start","transcript_end",
"transcript_length","strand","percentage_gene_gc_content",
"transcription_start_site","external_gene_name",
"go_id","gene_biotype","hgnc_symbol","arrayexpress"),
filters = "ensembl_gene_id",
values = human_count$Geneid,
mart = ensembl_hg38)
save(ensembl_hg38_genemap, file=paste0(objects_directory,"ensembl_hg38_genemap.RData"))
gtf = import(paste0(outputs_directory,'hg38_ensembl.gtf'))
promoters_ap = data.frame( chr=as.character(chrom(gtf)),
start=as.numeric(start(gtf)),
end=as.numeric(end(gtf)),
strand=as.character(strand(gtf)),
transcript_id=as.character(gtf$transcript_id),
gene_id=as.character(gtf$gene_id),
gene_name = as.character(gtf$gene_name),
gene_biotype = as.character(gtf$gene_biotype),
type = gtf$type,
stringsAsFactors = FALSE )
length(unique(promoters_ap$gene_id))
promoters_ap = promoters_ap[promoters_ap$type == "transcript",]
promoters_sp = split(promoters_ap,promoters_ap$transcript_id)
## for each transcript find the TSS
promoters_tss = do.call('rbind', lapply( promoters_sp, function(x){
tss = ifelse( as.character(unique(x$strand))=="+",
x[which.min(x$start),'start'],
x[which.max(x$end),'end'] )
if( as.character(unique(x$strand))=="+") tp = x[which.min(x$start),] else tp = x[which.max(x$end),]
tp$tss = tss
return(tp)
}))
##
promoters_tss_gr = GRanges(seqnames = promoters_tss$chr,
ranges = IRanges(as.numeric(promoters_tss$tss)-500 ,
end=as.numeric(promoters_tss$tss) + 500,
names=promoters_tss$transcript_id),
strand = promoters_tss$strand,
gene_id = promoters_tss$gene_id,
gene_name = promoters_tss$gene_name,
gene_biotype = promoters_tss$gene_biotype,
tss = promoters_tss$tss)
seqlevelsStyle(promoters_tss_gr) = 'ucsc'
promoters_tss$me3_peak = 0
promoters_tss$me3_peak[queryHits(findOverlaps(promoters_tss_gr,hs_me3))]=subjectHits(findOverlaps(promoters_tss_gr,hs_me3))
length(unique(promoters_tss$gene_id))
all(names(promoters_tss_gr)==promoters_tss$transcript_id)
promoters_tss_split = split(promoters_tss,promoters_tss$gene_id)
length(promoters_tss_split)
promoters_filtered = do.call('rbind', lapply(promoters_tss_split,function(p){
if( sum(p$me3_peak)>0 ){
PS = p[p$me3_peak>0,]
if( unique(PS$strand)=='-') res=PS[which.max(PS$tss),] else res=PS[which.min(PS$tss),] }
if( sum(p$me3_peak)==0 ) { if( unique(p$strand)=='+') res=p[which.min(p$tss),] else res=p[which.max(p$tss),] }
return(res)
} ))
length(unique(promoters_filtered$gene_id))
promoters_filtered_gr = GRanges(seqnames = promoters_filtered$chr,
ranges = IRanges(as.numeric(promoters_filtered$tss)-500 ,
end=as.numeric(promoters_filtered$tss) + 500,
names=promoters_filtered$gene_id),
strand = promoters_filtered$strand,
gene_id = promoters_filtered$gene_id,
gene_name = promoters_filtered$gene_name,
gene_biotype = promoters_filtered$gene_biotype,
tss = promoters_filtered$tss)
seqlevelsStyle(promoters_filtered_gr)='ucsc'
save( promoters_filtered, promoters_filtered_gr, promoters_tss,promoters_tss_gr,
file=paste0(objects_directory,'tss_objects.RData') )load(paste0(objects_directory,"Zhang_DataBundle.RData"))
load(paste0(objects_directory,"GTF_Annotation.RData"))
load(paste0(objects_directory,"ensembl_hg38_genemap.RData"))
genemapu = genemap[!duplicated(genemap$ensembl_gene_id),]
species.colors = c( 'MF' = '#66CCFF', 'HS' = '#000000', 'PT' = '#FF3300', 'MM' = '#0033FF')countdata_zhang = read.table(file = paste0(outputs_directory, "Zhang_gene_counts_redownloaded.txt"), header = T)
zhang_countdata_4tpm = data.frame(
fetal=rowSums(countdata_zhang[,c(36,28,29,23,24,11)]),
adult=rowSums(countdata_zhang[,colnames(countdata_zhang) %like% "YO_ATL_Astro|YO_HPC_Astro"]),
Length=countdata_zhang$Length,
row.names = countdata_zhang$Geneid,
stringsAsFactors = FALSE)
zhang_countdata_tpm = as.data.frame(GetTPM(zhang_countdata_4tpm,1:2,
rownames(zhang_countdata_4tpm)))
expressed = zhang_countdata_tpm[zhang_countdata_tpm$fetal>1 | zhang_countdata_tpm$adult>1, ]
not_expressed = zhang_countdata_tpm[zhang_countdata_tpm$fetal<0.1 & zhang_countdata_tpm$adult<0.1, ]
expressed_fetal = zhang_countdata_tpm[zhang_countdata_tpm$fetal>1, ]
expressed_adult = zhang_countdata_tpm[zhang_countdata_tpm$adult>1, ]
expressed_only_fetal = rownames(expressed_fetal)[! rownames(expressed_fetal) %in% rownames(expressed_adult) ]
expressed_only_adult = rownames(expressed_adult)[! rownames(expressed_adult) %in% rownames(expressed_fetal) ]
expressed_fetal_adult = rownames(expressed_fetal)[rownames(expressed_fetal) %in% rownames(expressed_adult) ]
fetal_markers_geneName = unique(genemap$hgnc_symbol[genemap$ensembl_gene_id %in% Fetal_Markers])
adult_markers_geneName = unique(genemap$hgnc_symbol[genemap$ensembl_gene_id %in% Adult_Markers])Based on data quality and previous analyses the selected samples are processed and filtered. We perform a differential expression analysis of genes between 4 species - Humans, Chimps, Rhesus Macaques and Crab Eating Macaques, based upon their expression profile on the Consensus Genome. This sheet details the steps of the differential analysis with relevant graphs for overview of the data and finally list the significant hits based on a the canonical workflow using DESeq2.
countdata = read.table(file = paste0(outputs_directory,"featureCounts_Counts_MO_All.tsv"), header = T)
countdata_tcw_iAstrocytes = read.table(file = paste0(outputs_directory,'tcw_latest_gene_counts.txt'), header = T)
## prep the tables
colnames(countdata_tcw_iAstrocytes)[7:ncol(countdata_tcw_iAstrocytes)]=c('tcw_3651_Astros','tcw_3651_NPCs','tcw_9319_Astros',
'tcw_9429_Astros', 'tcw_9429_NPCs','tcw_BJ_Astros',
'Cerebral_Cortex_pAstros','Midbrain_pAstros')
all(rownames(countdata) == countdata_tcw_iAstrocytes$Geneid)## [1] TRUE
countdata = data.frame( PrimaryFetal_F = countdata$PrimaryFetal_F,
PrimaryFetal_M = countdata$PrimaryFetal_M,
PrimaryFetal_1 = countdata$PrimaryFetal_1,
HSapiens_ELE10 = countdata$HSapiens_ELE10_1 + countdata$HSapiens_ELE10_2,
HSapiens_ELE30 = countdata$HSapiens_ELE30_1 + countdata$HSapiens_ELE30_2,
HSapiens_TCW_F1 = countdata_tcw_iAstrocytes[,'tcw_3651_Astros'],
HSapiens_TCW_F3 = countdata_tcw_iAstrocytes[,'tcw_9319_Astros'],
HSapiens_TCW_F4 = countdata_tcw_iAstrocytes[,'tcw_9429_Astros'],
Chimp_SandraA = countdata$Chimp_Sandra_BD1 + countdata$Chimp_Sandra_BD2 + countdata$Chimp_Sandra_nwNPC,
Chimp_Mandy6 = countdata$Chimp_Mandy6 + countdata$Chimp_Mandy6_New,
Chimp_Mandy4 = countdata$Chimp_Mandy4_New,
RhMacaque_Becky = countdata$RhMacaque_Becky_BD1 + countdata$RhMacaque_Becky_BD2,
row.names = rownames(countdata))
sample_names = c("PrimaryFetal_F",
"PrimaryFetal_M",
"PrimaryFetal_1",
"HSapiens_ELE10",
"HSapiens_ELE30",
"HSapiens_TCW_F1",
"HSapiens_TCW_F3",
"HSapiens_TCW_F4",
"Chimp_SandraA",
"Chimp_Mandy6",
"Chimp_Mandy4",
"RhMacaque_Becky" )
# Setting up metadata for included samples
species = c(rep("HS",8),rep("PT",3),"MM")
sources = c(rep("Fetal",3),rep("iPSC",9))
sub_class = c("PF","PF","PF","ELE10","ELE30","TCW_F1","TCW_F3","TCW_F4",
"SandraA","Mandy6","Mandy4",
"Becky")
metadata = data.frame(species=as.factor(species),
sources=as.factor(sources),
class=as.factor(sub_class),
row.names = sample_names,
gender=c('F','M','F',rep('F',9)),
lab=c('other','other',rep("PL",3),rep("other",3),rep("PL",4)))
metadata$lp = 1:nrow(metadata)
all(colnames(countdata)==rownames(metadata))## [1] TRUE
# Creating a TPM normalized table for the read counts for all genes
Length=countdata_tcw_iAstrocytes$Length[match(rownames(countdata),countdata_tcw_iAstrocytes$Geneid)]
tpm_norm_count_table = GetTPM(data.frame(cbind(countdata,Length=Length)),
1:ncol(countdata),
rownames(countdata))
# Creating a TPM normalized table for the read counts for all genes
all(rownames(tpm_norm_count_table)==rownames(zhang_countdata_tpm))## [1] TRUE
tpm_norm_count_all = cbind( zhang_countdata_tpm,tpm_norm_count_table )
all(rownames(zhang_countdata_tpm) == rownames(tpm_norm_count_table))## [1] TRUE
all(rownames(tpm_norm_count_table)==rownames(zhang_countdata_tpm))## [1] TRUE
# filtering out expressed fetal and adult genes
expressed_fetal_str = zhang_countdata_tpm[zhang_countdata_tpm$fetal>5, ]
expressed_adult_str = zhang_countdata_tpm[zhang_countdata_tpm$adult>5, ]
expressed_only_fetal_str = rownames(expressed_fetal_str)[! rownames(expressed_fetal_str) %in% rownames(expressed_adult_str) ]
expressed_only_adult_str = rownames(expressed_adult_str)[! rownames(expressed_adult_str) %in% rownames(expressed_fetal_str) ]zhang_countdata_DS = data.frame(countdata_zhang[,c(36,28,29,23,24,11)],
countdata_zhang[,colnames(countdata_zhang) %like% "YO_ATL_Astro|YO_HPC_Astro"],
row.names=countdata_zhang$Geneid)
zhang_countdata_DS = log( colSums(zhang_countdata_DS[rownames(zhang_countdata_DS) %in% Adult_Markers,])/
colSums(zhang_countdata_DS[rownames(zhang_countdata_DS) %in% Fetal_Markers,]) )
countdata_DS = log( colSums(countdata[rownames(countdata) %in% Adult_Markers,])/colSums(countdata[rownames(countdata) %in% Fetal_Markers,]) )
ds = c(zhang_countdata_DS,countdata_DS)
sampleType = c(rep('acute_fetal',6),rep('acute_adult',15),
rep('fetal_cultured',3),rep('iAstrocytes',9))
dsd=split(ds,sampleType)
par(mfrow=c(1,1),mar=c(5,4,1,1))
beeswarm(ds ~ sampleType, pch = 19,
col = c( 'blue4', 'turquoise3', 'purple3', 'pink4'),
method = "swarm", ylim=c(-2,5), ylab="Log[2] Differentiation score" )
axis(2,lwd=2)
box(col="black",lwd=2)sampleType = factor( c(rep('human',5),rep('chimpanzee',3),rep('rhesus',1)),
levels=c('human','chimpanzee','rhesus') )
beeswarm(countdata_DS[4:length(countdata_DS)] ~ sampleType, pch = 19,
col = c( 'black', 'red', 'blue'),
method = "swarm", ylim=c(-2,5), ylab="Log[2] Differentiation score" )
axis(2,lwd=2)
box(col="black",lwd=2)beeswarm(countdata_DS[4:length(countdata_DS)] ~ sampleType, pch = 19,
col = c( 'black', 'red', 'blue'),
method = "swarm", ylim=c(-2,5), ylab="Log[2] Differentiation score" )
axis(2,lwd=2)
box(col="black",lwd=2)astro_genes = unlist(unique( genemap[genemap$go_id == 'GO:0048708','hgnc_symbol']))
astro_genes = unique( c(astro_genes,
'ABL1','ABL2', 'ARP3','ADORA2A', 'AGER', 'AGT',
'APP', 'ATF5', 'BIN','BMP2', 'C1QA', 'C5AR1',
'CNTF','CNTN2','DAB1','DLL1','DLL3','DRD1',
'EIF2B5','EPHA4','F2','FGFR3','GCM1','GFAP',
'GM5849','GPR37l1','GRN','HES1','HES5','HMGA2',
'ID2','ID4','IFNG','IFNGR1','IL1B','IL6ST',
'KDM4A','LAMB2','LDLR','MAG','MAP2K1','MAPK3',
'MBD1','MECP2','MT3','MYCN','NF1','NFIX',
'NKX2-2','NOG','NOTCH1','NR1D1','NR2E1','NTRK3',
'PLP1','PLPP3','POU3F2','PRPF19','PSEN1','PTPN11',
'ROR2','S100A8','S100A9','SERPINE2','SHH','SMO',
'SOX6','SOX8','SOX9','STAT3','TAL1','TLR4',
'TREM2','TSPAN2','TTC21B','VIM', 'SLC1A3'))
astro_genes = data.frame(unique(genemap[which(genemap$hgnc_symbol %in% astro_genes),c('ensembl_gene_id','hgnc_symbol')]))
tmp_count_table = tpm_norm_count_table[rownames(tpm_norm_count_table) %in% astro_genes$ensembl_gene_id, ]
rownames(tmp_count_table) = astro_genes$hgnc_symbol[match(rownames(tmp_count_table), astro_genes$ensembl_gene_id)]
count_frame = as.data.frame(log10(tmp_count_table))
count_frame = count_frame %>% replace(.=='-Inf', 0)
count_frame = count_frame[order(apply(count_frame, 1, median), decreasing = T),]
pheatmap(count_frame, cellheight = 10,
treeheight_row = 0,
cluster_cols = F,
cluster_rows = F,
scale = "none",
angle_col = '315')We consider comparisons between human and chimpanzee and between human and macaque samples separately.
ids=1:nrow(countdata)
## DEGs in the comparison between humans and chimps
res_HSvPT <- DESeqDataSetFromMatrix(
countData = countdata[ids,which(metadata$species %in% c("HS","PT") & metadata$sources=="iPSC" & metadata$gender=='F' )],
colData = metadata[which(metadata$species %in% c("HS","PT") & metadata$sources=="iPSC" & metadata$gender=='F'),],
design = ~ 0 + species )## factor levels were dropped which had no samples
res_HSvPT$species = relevel(res_HSvPT$species, "HS")
res_HSvPT <- DESeq(res_HSvPT,fitType="local")## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
resultsNames(res_HSvPT)## [1] "speciesHS" "speciesPT"
res_HSvPT_sh <- lfcShrink(res_HSvPT, contrast = c("species","HS","PT"),type='ashr')## using 'ashr' for LFC shrinkage. If used in published research, please cite:
## Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
## https://doi.org/10.1093/biostatistics/kxw041
res_HSvPT <- results(res_HSvPT, contrast = c("species","HS","PT") )
summary(res_HSvPT)##
## out of 46457 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 3881, 8.4%
## LFC < 0 (down) : 3890, 8.4%
## outliers [1] : 307, 0.66%
## low counts [2] : 13293, 29%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
res_HSvMM <- DESeqDataSetFromMatrix(
countData = countdata[ids,c(metadata$species %in% c("HS","MM") & metadata$sources=="iPSC" & metadata$gender=='F')],
colData = metadata[c(metadata$species %in% c("HS","MM") & metadata$sources=="iPSC" & metadata$gender=='F'),],
design = ~ 0 + species
)## factor levels were dropped which had no samples
res_HSvMM$species = relevel(res_HSvMM$species, "HS")
res_HSvMM <- DESeq(res_HSvMM,fitType="local")## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
res_HSvMM_sh <- lfcShrink(res_HSvMM, contrast = c("species","HS","MM"), type="ashr")## using 'ashr' for LFC shrinkage. If used in published research, please cite:
## Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
## https://doi.org/10.1093/biostatistics/kxw041
res_HSvMM <- results(res_HSvMM, contrast = c("species","HS","MM") )
summary(res_HSvMM)##
## out of 45191 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 5517, 12%
## LFC < 0 (down) : 4670, 10%
## outliers [1] : 133, 0.29%
## low counts [2] : 15485, 34%
## (mean count < 2)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
HSvPT_DEG <- as.data.frame(res_HSvPT)
HSvPT_DEG_0.1 <- HSvPT_DEG %>% filter(padj <= 0.01 )
res_HSvPT_sh = as.data.frame(res_HSvPT_sh)
HSvPT_DEG_sh <- res_HSvPT_sh %>% filter(padj <= 0.01 )
setDT(HSvPT_DEG_0.1, keep.rownames = TRUE)
HSvMM_DEG <- as.data.frame(res_HSvMM)
HSvMM_DEG_0.1 <- HSvMM_DEG %>% filter(padj <= 0.01 )
setDT(HSvMM_DEG_0.1, keep.rownames = TRUE)
res_HSvMM_sh = as.data.frame(res_HSvMM_sh)
HSvMM_DEG_sh <- res_HSvMM_sh %>% filter(padj <= 0.01 )
sum( HSvMM_DEG_0.1$rn %in% HSvPT_DEG_0.1$rn )## [1] 1271
sum(! HSvMM_DEG_0.1$rn %in% HSvPT_DEG_0.1$rn )## [1] 3885
all( HSvMM_DEG_sh$rn == HSvMM_DEG_0.1$rn )## [1] TRUE
HSvMM_DEG_0.1$lfc_sh = HSvMM_DEG_sh$log2FoldChange
HSvPT_DEG_0.1$lfc_sh = HSvPT_DEG_sh$log2FoldChangeHuman versus chimpanzee - volcano
Human versus macaque - volcano
Largely congruent changes in gene expression
# tpm_norm_count_table_thresholded_top = apply(tpm_norm_count_table,2,function(x){x>(quantile(x[x>0])[3])})
tpm_norm_count_table_thresholded_top = tpm_norm_count_table>1
## ------------------------------
log_fold_dat <- gtf_annotation_table[,c(1,7,12)]
log_fold_dat[log_fold_dat$gene_biotype %like% "pseudogene",]$gene_biotype = "pseudogene"
log_fold_dat[log_fold_dat$gene_biotype %like% "TR_",]$gene_biotype = "TR_genes"
log_fold_dat$rn = log_fold_dat$ensembl_gene_id
log_fold_dat = merge(log_fold_dat,HSvPT_DEG_0.1[,c(1,3,8)], by='rn')
colnames(log_fold_dat)[5] ="HSvPT_lfc"
colnames(log_fold_dat)[6] ="HSvPT_lfc_shrunk"
log_fold_dat = merge(log_fold_dat,HSvMM_DEG_0.1[,c(1,3,8)], by='rn')
colnames(log_fold_dat)[7] ="HSvMM_lfc"
colnames(log_fold_dat)[8] ="HSvMM_lfc_shrunk"
## ------------------------------
all(colnames(tpm_norm_count_table) == rownames(metadata))## [1] TRUE
tpm_norm_count_table_df = data.frame( human = rowMeans(tpm_norm_count_table[,which(metadata$species =="HS" & metadata$sources=="iPSC" & metadata$gender=='F')]),
chimp = rowMeans(tpm_norm_count_table[,which(metadata$species =="PT" & metadata$sources=="iPSC" & metadata$gender=='F')]),
macaque=tpm_norm_count_table[,which(metadata$species =="MM" & metadata$sources=="iPSC" & metadata$gender=='F')],
rn=rownames(tpm_norm_count_table) )
log_fold_dat = merge(log_fold_dat, tpm_norm_count_table_df, by='rn')
tpm_norm_count_table_df$rn=NULL
log_fold_dat$rn=NULL
log_fold_dat_biotype=log_fold_dat[log_fold_dat$gene_biotype %in% c('protein_coding',
'pseudogene',
'lncRNA','miRNA'),]
log_fold_dat_biotype$col = rep('steelblue',nrow(log_fold_dat_biotype))
log_fold_dat_biotype$col[log_fold_dat_biotype$gene_biotype=='pseudogene']='thistle3'
log_fold_dat_biotype$col[log_fold_dat_biotype$gene_biotype=='lncRNA']='red3'
log_fold_dat_biotype$col[log_fold_dat_biotype$gene_biotype=='miRNA']='black'
log_fold_dat_biotype$DEX = 0
# log_fold_dat_biotype$DEX[log_fold_dat_biotype$ensembl_gene_id %in% eid] = 1
par(mfrow=c(1,1),mar=c(5,5,5,5),cex.lab=2,pty='s')
plot(x=log_fold_dat_biotype$HSvPT_lfc_shrunk,
y=log_fold_dat_biotype$HSvMM_lfc_shrunk,
xlim=c(-10,10),ylim=c(-10,10),
col=log_fold_dat_biotype$col,pch=19,cex=0.5,
ylab='Hs vs. Pt',xlab='Hs vs. Mm',axes=F)
axis(1,lwd=2,cex.axis=2)
axis(2,lwd=2,cex.axis=2)
abline(a=0,b=1)
abline(h=0,v=0,lwd=2,col='gray')
box(col='black',lwd=2)cor.test(log_fold_dat_biotype$HSvPT_lfc,log_fold_dat_biotype$HSvMM_lfc)##
## Pearson's product-moment correlation
##
## data: log_fold_dat_biotype$HSvPT_lfc and log_fold_dat_biotype$HSvMM_lfc
## t = 42.744, df = 1243, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7479379 0.7930088
## sample estimates:
## cor
## 0.7714392
colnames(log_fold_dat)[1:3] = c("ensembl_id","hgnc_symbol","gene_biotype")EAGs, the expression of how many of these genes is detected in the human astrocytes.
HS_UP_Genes <- log_fold_dat %>% filter(HSvPT_lfc > 0 & HSvMM_lfc > 0)
HS_DN_Genes <- log_fold_dat %>% filter(HSvPT_lfc < (-0) & HSvMM_lfc < (-0) )
all(HS_UP_Genes$ensembl_id %in% HSvMM_DEG_0.1$rn )## [1] TRUE
all(HS_UP_Genes$ensembl_id %in% HSvPT_DEG_0.1$rn )## [1] TRUE
hits_up = as.data.frame(HS_UP_Genes)
hits_dn = as.data.frame(HS_DN_Genes)
dim(hits_up) # 677## [1] 677 10
dim(hits_dn) # 486## [1] 486 10
sum(hits_up$ensembl_id %in% rownames(expressed))/nrow(hits_up)## [1] 0.8227474
sum(hits_dn$ensembl_id %in% rownames(expressed))/nrow(hits_dn)## [1] 0.8395062
hits_up = hits_up[hits_up$ensembl_id %in% rownames(tpm_norm_count_table_thresholded_top[rowSums(tpm_norm_count_table_thresholded_top[,which(metadata$species =="HS" & metadata$sources=="iPSC" & metadata$gender=='F')])>3,]),]
hits_dn = hits_dn[hits_dn$ensembl_id %in% rownames(tpm_norm_count_table_thresholded_top[rowSums(tpm_norm_count_table_thresholded_top[,which(metadata$species %in% c("PT","MM"))])>2,]),]
sum(hits_up$ensembl_id %in% rownames(expressed))/nrow(hits_up)## [1] 0.8464052
sum(hits_dn$ensembl_id %in% rownames(expressed))/nrow(hits_dn)## [1] 0.8883929
## boxplot expression in zhang
u = cbind(not_expressed = sum(hits_up$ensembl_id %in% rownames(not_expressed )),
fetal=sum(hits_up$ensembl_id %in% expressed_only_fetal ),
adult=sum(hits_up$ensembl_id %in% expressed_only_adult ),
both=sum(hits_up$ensembl_id %in% expressed_fetal_adult ))
d = cbind(not_expressed = sum(hits_dn$ensembl_id %in% rownames(not_expressed )),
fetal=sum(hits_dn$ensembl_id %in% expressed_only_fetal ),
adult=sum(hits_dn$ensembl_id %in% expressed_only_adult ),
both=sum(hits_dn$ensembl_id %in% expressed_fetal_adult ))
m=rbind(u,d)
m## not_expressed fetal adult both
## [1,] 33 106 61 351
## [2,] 26 29 26 343
par(lwd=2, cex.axis=1.5,mar=c(5,5,1,1),pty='m')
barplot(t(m/rowSums(m)),col=c('red4','white','black','gray'),
ylab="%",names=c("Up","Down"),xlab="EAGs",
cex.names=2.5,cex.lab=2)
axis(2,lwd=3)par(mfrow=c(2,2))
hits_up_split_pt = split(hits_up$HSvPT_lfc,hits_up$gene_biotype)
boxplot( hits_up_split_pt[c('protein_coding','lncRNA','pseudogene')],ylim=c(0,15),col='white',border=c('steelblue','red3','thistle3'))
hits_up_split_mm = split(hits_up$HSvMM_lfc,hits_up$gene_biotype)
boxplot( hits_up_split_mm[c('protein_coding','lncRNA','pseudogene')],ylim=c(0,15),col='white',border=c('steelblue','red3','thistle3'))
hits_dn_split_pt = split(hits_dn$HSvPT_lfc,hits_dn$gene_biotype)
boxplot( hits_dn_split_pt[c('protein_coding','lncRNA','pseudogene')],ylim=c(-15,0),col='white',border=c('steelblue','red3','thistle3'))
hits_dn_split_mm = split(hits_dn$HSvMM_lfc,hits_dn$gene_biotype)
boxplot( hits_dn_split_mm[c('protein_coding','lncRNA','pseudogene')],
ylim=c(-15,0),col='white',border=c('steelblue','red3','thistle3'))hits_up = hits_up[hits_up$ensembl_id %in% rownames(expressed),]
dim(hits_up)## [1] 518 10
hits_dn = hits_dn[hits_dn$ensembl_id %in% rownames(expressed),]
dim(hits_dn)## [1] 398 10
sum(hits_up$ensembl_id %in% Fetal_Markers)## [1] 25
sum(hits_dn$ensembl_id %in% Fetal_Markers)## [1] 26
sum(hits_up$ensembl_id %in% Adult_Markers)## [1] 20
sum(hits_dn$ensembl_id %in% Adult_Markers)## [1] 25
hits_up = hits_up[! hits_up$ensembl_id %in% Fetal_Markers,]
hits_dn = hits_dn[! hits_dn$ensembl_id %in% Adult_Markers,]
dim(hits_up)## [1] 493 10
dim(hits_dn)## [1] 373 10
par(lwd=2, cex.axis=1.5,mar=c(5,5,3,1),mfrow=c(1,1))
barplot( c(up=nrow(hits_up),
down=nrow(hits_dn)),
col=c("green4","wheat3"),
ylim=c(0,500),ylab="EAGs",cex.axis = 1.5, cex.lab=2)
axis(2,lwd=2)x=hits_up$ensembl_id
y=hits_dn$ensembl_id
write.table(y,file=paste0(outputs_directory,'dn_engs.txt'),quote=FALSE, row.names=FALSE,col.names=FALSE,sep='\n')
write.table(x,file=paste0(outputs_directory,'up_engs.txt'),quote=FALSE, row.names=FALSE,col.names=FALSE,sep='\n')
## are genes affected by evolution frequently totally on or off??
table(hits_up[hits_up$chimp<0.1 & hits_up$macaque<0.1,'gene_biotype'])##
## lncRNA protein_coding pseudogene TEC
## 7 1 6 1
nrow(hits_up[hits_up$chimp<0.1 & hits_up$macaque<0.1,])## [1] 15
table(hits_dn[hits_dn$human<0.1,'gene_biotype'])##
## lncRNA Mt_tRNA protein_coding pseudogene
## 3 1 11 2
nrow(hits_dn[hits_dn$human<0.1,])## [1] 17
all_Deseqs = merge( HSvPT_DEG,HSvMM_DEG,by=0,all=TRUE ) # data frame of merged results
save( hits_dn, hits_up, HS_DN_Genes, HS_UP_Genes, all_Deseqs,log_fold_dat,tpm_norm_count_table,
file=paste0(objects_directory,"DEseq2_RNA.RData"))load(paste0(objects_directory,"bda_final.RData"))
bda_final = bda_final[bda_final$ensid %in% rownames(countdata),]
tot_n_EAG = nrow(hits_up) + nrow(hits_dn)
x = bda_final[bda_final$ensid %in% hits_up$ensembl_id | bda_final$Gene.symbol %in% hits_up$hgnc_symbol,]
x$or = paste(x$ensid,x$Disease,sep='-')
x = x[!duplicated(x$or),]
y = bda_final[bda_final$ensid %in% hits_dn$ensembl_id | bda_final$Gene.symbol %in% hits_dn$hgnc_symbol,]
y$or = paste(y$ensid,y$Disease,sep='-')
y = y[!duplicated(y$or),]
length(unique(c(x$Disease,y$Disease)))## [1] 23
All_diseases = table(bda_final$Disease)
X = table(x$Disease)
Y = table(y$Disease)
All_diseases = table(bda_final$Disease)
All_diseases = All_diseases[order(All_diseases,decreasing=TRUE)]
nulv = rep(0, length(All_diseases))
names(nulv) = names(All_diseases)
nulv1=nulv
nulv1[match(names(X),names(nulv1))]=X
nulv2=nulv
nulv2[match(names(Y),names(nulv2))]=Y
m=rbind(nulv1,nulv2)
mcut1=m[,colSums(m)>0]
dim(mcut1)## [1] 2 23
par(mar=c(12,4,1,1),mfrow=c(1,1))
barplot(mcut1, beside=TRUE, col=c('green4','wheat3'),las=2,
ylim=c(0,25),axes=FALSE,ylab="EAG")
axis(2,lwd=3)M = matrix( c( length(unique(x$Gene.symbol)),
length(unique(hits_up$ensembl_id)),
length(unique(y$Gene.symbol)),
length(unique(hits_dn$ensembl_id))),ncol=2,nrow=2)
prop.test( M )##
## 2-sample test for equality of proportions with continuity correction
##
## data: M
## X-squared = 20.933, df = 1, p-value = 0.000004757
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.3106748 -0.1256033
## sample estimates:
## prop 1 prop 2
## 0.3511450 0.5692841
fisher.test( M )##
## Fisher's Exact Test for Count Data
##
## data: M
## p-value = 0.000003401
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.2727455 0.6096183
## sample estimates:
## odds ratio
## 0.4098171
M## [,1] [,2]
## [1,] 46 85
## [2,] 493 373
M = matrix( c( length(unique(x$Gene.symbol[x$Disease=="Intellectual Disability"])),
length(unique(hits_up$ensembl_id)),
length(unique(y$Gene.symbol[y$Disease=="Intellectual Disability"])),
length(unique(hits_dn$ensembl_id))),ncol=2,nrow=2)
chisq.test( M )## Warning in chisq.test(M): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: M
## X-squared = 8.2604, df = 1, p-value = 0.004052
fisher.test( M )##
## Fisher's Exact Test for Count Data
##
## data: M
## p-value = 0.001251
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.0000000 0.4471496
## sample estimates:
## odds ratio
## 0
unique(bda_final$Disease)## [1] "Autism Spectrum Disorder"
## [2] "Alzheimer's Disease"
## [3] "Amyotrophic Lateral Sclerosis"
## [4] "Multiple Sclerosis"
## [5] "Epilepsy"
## [6] "Intracranial Aneurysm"
## [7] "Neuroblastoma"
## [8] "Parkinson's Disease"
## [9] "Restless legs Syndrome"
## [10] "Meningioma"
## [11] "Narcolepsy"
## [12] "Glioma"
## [13] "Prader-Willi Syndrome"
## [14] "Progressive Supranuclear Plasy"
## [15] "Restless Legs Syndrome"
## [16] "Rett Syndrome"
## [17] "Rolandic Epilepsy with Speech impairment"
## [18] "Shy Drager Syndrome"
## [19] "Spasmodic Dysphonia"
## [20] "Stroke"
## [21] "Tay-Sachs Disease"
## [22] "Tourette Syndrome"
## [23] "Tuberous Sclerosis"
## [24] "Von Hippel-Lindau Syndrome"
## [25] "X-linked Hydrocephalus"
## [26] "Agenesis Corpus Callosum"
## [27] "Alopecia with Mental Retardation"
## [28] "Alpha-Thalassemia X-Linked Intellectual Disability Syndrome"
## [29] "Alternating Hemiplegia of Childhood"
## [30] "Aphasia"
## [31] "Attention Deficit Hyperactivity Disorder"
## [32] "Autosomal Dominant Nocturnal Frontal Lobe Epilepsy"
## [33] "Autosomal Dominant Partial Epilepsy with Auditory Features"
## [34] "Autosomal Recessive Cerebellar Ataxia Type 1"
## [35] "Batten Disease"
## [36] "Benign Familial Neonatal Seizures"
## [37] "Benign Hereditary Chorea"
## [38] "Cerebral Aneurysm"
## [39] "Cerebellar Ataxia, Mental Retardation and Disequilibrium Syndrome"
## [40] "Cerebral Palsy"
## [41] "Cerebro-Oculo-Facio-Skeletal Syndrome"
## [42] "Cerebrocostomandibular Syndrome"
## [43] "Charcot-Marie-Tooth Disease"
## [44] "Chiari Malformation"
## [45] "Chronic Inflammatory Demyelinating Polyneuropathy"
## [46] "Coma"
## [47] "Creutzfeldt Jakob Disease"
## [48] "Dementia (Non Alzheimer)"
## [49] "Down Syndrome"
## [50] "Dysautonomia"
## [51] "Dyslexia"
## [52] "Dyspraxia"
## [53] "Dystonia"
## [54] "Encephalitis"
## [55] "Essential Tremor"
## [56] "Familial Focal Epilepsy with Variable Foci"
## [57] "Ferro-Cerebro-Cutaneous Syndrome"
## [58] "Friedreich Ataxia"
## [59] "Gaucher Disease"
## [60] "Generalized Epilepsy with Febrile Seizures Plus"
## [61] "Huntington's Disease"
## [62] "Hydrocephalus"
## [63] "Intellectual Disability"
## [64] "Meningitis"
## [65] "Motor Neurone Disease"
## [66] "Muscular Dystrophy"
## [67] "Neurodegenerative Disease"
## [68] "Paraganglioma"
## [69] "Schizophrenia"
## [70] "Pontocerebellar Hypoplasia"
## [71] "Depression Disorder"
## [72] "Neurofibromatosis"
## [73] "Major Depression Disorder"
## [74] "Ischemic Stroke"
## [75] "Ataxia Telangiectasia"
## [76] "Spinocerebellar Ataxia"
## [77] "Smith-Magenis Syndrome"
## [78] "Anorexia Nervosa"
## [79] "Bipolar Disorder"
## [80] "Frontotemporal Lobar Degeneration"
## [81] "Neurodevelopmental Disability"
## [82] "Panic Disorder"
## [83] "Post-traumatic Stress Disorder"
## [84] "Amyotrophic lateral Sclerosis"
## [85] "Angelman Syndrome"
## [86] "Cerebral infarction"
## [87] "Cognitive Functions and Neuronal plasticity"
## [88] "Fragile X Syndrome"
## [89] "Neurological Disorder"
## [90] "Non-functioning Pituitary Adenoma"
## [91] "Pituitary Adenoma"
## [92] "Plexiform Neurofibroma"
## [93] "Prader-willi Syndrome and Angelman Syndrome"
## [94] "Psychiatric Disease"
## [95] "West Syndrome"
## [96] "Non-functioning Pituitary Neoplasms"
## [97] "Pituitary Neoplasms"
## [98] "Forebrain Ischemia"
## [99] "Status Epilepticus"
## [100] "Acute Cerebral Infarction"
## [101] "Acute Cerebral Ischemia"
## [102] "Brain Neoplasms"
## [103] "Cerebellum Cancer"
## [104] "Cerebral Cavernous Malformation"
## [105] "Cerebral Ischemia"
## [106] "Cerebral Malaria"
## [107] "Encephalomyelitis"
## [108] "Intracerebral Hemorrhage"
## [109] "Mild Cognitive Impairment"
## [110] "Neurilemmoma"
## [111] "Neuroendocrine Tumor"
## [112] "Neuroepithelial Tumor"
## [113] "Neuroma"
## [114] "Neuronal Apoptosis-Related Disease"
## [115] "Frontotemporal Dementia"
## [116] "Anxiety Disorder"
## [117] "Acute Ischemic Stroke"
## [118] "Aneurysmal Subarachnoid Hemorrhage"
## [119] "Central Nervous System Embryonal Tumor"
id_DAVID_all = read.delim(paste0(outputs_directory,"uniprotkb_keyword_KW_0991_2023_09_01.tsv"),
header=TRUE)
id_DAVID_all = unlist(lapply(split(id_DAVID_all$Gene.Names,id_DAVID_all$Entry),
function(x){strsplit(x," ")}))
id_DAVID_all_ensg = unique( genemapu$ensembl_gene_id[genemapu$external_gene_name %in% id_DAVID_all ] )
fisher.test(matrix(c(sum(hits_dn$ensembl_id %in% id_DAVID_all_ensg),
sum(hits_up$ensembl_id %in% id_DAVID_all_ensg),
nrow(hits_dn),
nrow(hits_up)),2,2))##
## Fisher's Exact Test for Count Data
##
## data: matrix(c(sum(hits_dn$ensembl_id %in% id_DAVID_all_ensg), sum(hits_up$ensembl_id %in% id_DAVID_all_ensg), nrow(hits_dn), nrow(hits_up)), 2, 2)
## p-value = 0.000000000003697
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 7.914837 1916.514987
## sample estimates:
## odds ratio
## 47.39888
hits_dn$hgnc_symbol[hits_dn$hgnc_symbol %in% id_DAVID_all ]## [1] "KMT2E" "SYN1" "CDH1" "NUP133" "ATP2B1" "OPHN1" "KAT6A"
## [8] "ZC3H14" "CTCF" "SMC3" "FBXW7" "KMT5B" "ARL6" "CEP104"
## [15] "ZMYM2" "FGF13" "DPP6" "ATP8A2" "CDK8" "DPF2" "STXBP1"
## [22] "FBXO11" "ASXL2" "PHIP" "TLK2" "RBMX" "AFF2" "PHF6"
## [29] "DYRK1A" "ZNF148" "HNRNPH1" "SOX11" "DCC" "USP7" "ZNF292"
## [36] "PGAP1"
hits_up$hgnc_symbol[hits_up$hgnc_symbol %in% id_DAVID_all ]## [1] "ZNHIT3"
Heatmap of ID related genes
idgenesensembl = hits_dn$ensembl_id[ hits_dn$ensembl_id %in% id_DAVID_all_ensg ]
tpm_norm_count_table_ID= tpm_norm_count_table[rownames(tpm_norm_count_table) %in% idgenesensembl,3:ncol(tpm_norm_count_table)]
tpm_norm_count_table_id = as.data.frame(log10(0.1+tpm_norm_count_table_ID))
rownames(tpm_norm_count_table_id) = genemapu$external_gene_name[match(idgenesensembl,genemapu$ensembl_gene_id)]
png(paste0(plots_directory,'/ID_heatmap.png'),
width = 5000, height = 10000, res = 1200 )
pheatmap(tpm_norm_count_table_id,
cellheight = 10,
treeheight_row = 0,
cluster_cols = F,
cluster_rows = T,
scale = "row",color=colorRampPalette(c("blue","white","red"))(100),
angle_col = '315')
dev.off()## quartz_off_screen
## 3
up_fa = read.delim(paste0(outputs_directory,"hits_up_DAVID_KEGG.txt"))
exosomal_genes = unique( unlist(strsplit(up_fa$Genes[up_fa$Term=="GO:0070062~extracellular exosome"],", ")) )
exosomal_genes = exosomal_genes[-which(exosomal_genes %in% "ENSG00000285762")]
tpm_norm_count_table_EX = tpm_norm_count_table[rownames(tpm_norm_count_table) %in% exosomal_genes,3:ncol(tpm_norm_count_table)]
tpm_norm_count_table_ex = as.data.frame(log10(0.1+tpm_norm_count_table_EX))
rownames(tpm_norm_count_table_ex) = hits_up$hgnc_symbol[match(rownames(tpm_norm_count_table_ex),hits_up$ensembl_id)]
png(paste0(plots_directory,'/Exosome_heatmap_GeneNames.png'),
width = 5000, height = 10000, res = 1200 )
pheatmap(tpm_norm_count_table_ex,
cellheight = 10,
treeheight_row = 0,
cluster_cols = F,
cluster_rows = T,
scale = "row",color=colorRampPalette(c("blue","white","red"))(100),
angle_col = '315')
dev.off()## quartz_off_screen
## 3
go_dn = read.delim( paste0(outputs_directory,'dn_engs_DAVID_KEGG.txt' ))
go_dn = go_dn[order(go_dn$Benjamini,decreasing=TRUE),]
go_dn = go_dn[go_dn$Benjamini<0.05,]
go_dn$anyGo = unlist(lapply(strsplit(go_dn$Term,":"),function(x){x[[1]]}))
go_dn = go_dn[go_dn$anyGo %in% c("GO","hsa01100"),]
par(mfrow=c(1,1),mar=c(5,30, 1,1))
barplot(-log10(go_dn$Benjamini), horiz=TRUE,
names=go_dn$Term,las=2,xlim=c(0,20),xlab="-Log[10]B-H adj. P-val")
axis(1,lwd=2,las=2)Genes related to nucleus
length(unique(unlist(strsplit(go_dn$Genes,", "))))## [1] 249
go_up = read.delim( paste0(outputs_directory,'hits_up_DAVID_KEGG.txt' ))
go_up = go_up[order(go_up$Benjamini,decreasing=TRUE),]
go_up = go_up[go_up$Benjamini<0.01,]
go_up$anyGo = unlist(lapply(strsplit(go_up$Term,":"),function(x){x[[1]]}))
go_up = go_up[go_up$anyGo %in% c("GO","hsa01100"),]
par(mfrow=c(1,1),mar=c(5,20, 1,1))
barplot(-log10(go_up$Benjamini), horiz=TRUE,
names=go_up$Term,las=2,xlim=c(0,3),xlab="-Log[10]B-H adj. P-val")
axis(1,lwd=2,las=2)pluripotencyGenes = read.delim(paste0(outputs_directory,'Conserved_Pluripotency_genes.txt'),header=FALSE,as.is=TRUE)
df = read.delim( paste0(outputs_directory,'gene_counts_Mandy.txt'),skip=1, as.is=TRUE)
countTable = df[,c(7,8,9,10,11,6)]
rownames(countTable) = df$Geneid
chimp_tpm=GetTPM(countTable,1:5,rownames(countTable))
colnames(chimp_tpm) = unlist(strsplit(colnames(chimp_tpm),"analyses.star.RNA_Seq_02.22_PanTro_iPSC_WT_"))[seq(2,2*ncol(chimp_tpm),by=2)]
colnames(chimp_tpm) = unlist(strsplit(colnames(chimp_tpm),"_Rep_1_Aligned.sortedByCoord.out.bam"))
countTable = df[,c(7,8,9,10,11)]
rownames(countTable) = df$Geneid
colnames(countTable) = unlist(strsplit(colnames(countTable),"analyses.star.RNA_Seq_02.22_PanTro_iPSC_WT_"))[seq(2,2*ncol(chimp_tpm),by=2)]
colnames(countTable) = unlist(strsplit(colnames(countTable),"_Rep_1_Aligned.sortedByCoord.out.bam"))
coldata = data.frame(condition=c(rep("Mandy",4),"SandraA"))
rownames(coldata) = colnames(countTable)
dds <- DESeqDataSetFromMatrix(
countData = countTable,
colData = coldata,
design = ~ condition )## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
dds <- DESeq(dds)## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
resultsNames(dds)## [1] "Intercept" "condition_SandraA_vs_Mandy"
res = results(dds)
res <- res [order(res$padj),]
vsdat <- vst(dds, blind=FALSE)
matvsdat=assay(vsdat) ## variance stablised data
mat=counts(dds, normalized=TRUE) ## variance stablised data
mat = mat[rowSums(mat)>20,]
par(mfrow=c(2,2),mar=c(5,5,5,5),pty="s",bty="O")
heatscatter( log2(0.1+mat[,'Mandy4']),log2(0.1+mat[,'SandraA']),
colpal = 'crazyblue',pch=19, cex=0.5,
xlab="Mandy4 [log2(counts)]", ylab="SandraA [log2(counts)]")
box(col="black")
heatscatter( log2(0.1+mat[,'Mandy6']),log2(0.1+mat[,'SandraA']),
colpal = 'crazyblue',pch=19, cex=0.5,
xlab="Mandy6 [log2(counts)]", ylab="SandraA [log2(counts)]")
box(col="black")
heatscatter( log2(0.1+mat[,'Mandy4']),log2(0.1+mat[,'Mandy6']),
colpal = 'crazyblue',pch=19, cex=0.5,
xlab="Mandy4 [log2(counts)]", ylab="Mandy6 [log2(counts)]")
box(col="black")par(mfrow=c(1,1),mar=c(7,5,5,1),bty='n')
boxplot( chimp_tpm[, 'Mandy6'],
chimp_tpm[rownames(chimp_tpm) %in% pluripotencyGenes$V1, 'Mandy6'],
chimp_tpm[, 'Mandy4'],
chimp_tpm[rownames(chimp_tpm) %in% pluripotencyGenes$V1, 'Mandy4'],
chimp_tpm[, 'SandraA'],
chimp_tpm[rownames(chimp_tpm) %in% pluripotencyGenes$V1, 'SandraA'],
border=rep(c('gray','red'),3), main='',col='white',
ylab=expression('TPM'), outline=FALSE,
ylim=c(0,60), bty='n',notch=FALSE,lwd=2,
names=rep(c("all genes","Pluripotency"),3),las=2 )id_DAVID_all = read.delim(paste0(outputs_directory,"uniprotkb_keyword_KW_0991_2023_09_01.tsv"),
header=TRUE)
id_DAVID_all = unlist(lapply(split(id_DAVID_all$Gene.Names,id_DAVID_all$Entry),
function(x){strsplit(x," ")}))
id_DAVID_all_ensg = unique( genemapu$ensembl_gene_id[genemapu$external_gene_name %in% id_DAVID_all ] )
id = id_DAVID_all_ensg[id_DAVID_all_ensg %in% hits_dn$ensembl_id]
up_fa = read.delim(paste0(outputs_directory,"hits_up_DAVID_KEGG.txt"))
exosomal_genes = unique( unlist(strsplit(up_fa$Genes[up_fa$Term=="GO:0070062~extracellular exosome"],", ")) )
exosome = exosomal_genes[-which(exosomal_genes %in% "ENSG00000285762")]
tpm_norm_count_table_EX = tpm_norm_count_table[rownames(tpm_norm_count_table) %in% exosomal_genes,3:ncol(tpm_norm_count_table)]
tpm_norm_count_table_ex = as.data.frame(log10(0.1+tpm_norm_count_table_EX))
rownames(tpm_norm_count_table_ex) = hits_up$hgnc_symbol[match(rownames(tpm_norm_count_table_ex),hits_up$ensembl_id)]human_count = read.delim(paste0(outputs_directory,'gene_counts_human_dec2022.txt'),skip=1)
macaque_count = read.delim(paste0(outputs_directory,'gene_counts_macaque.txt'),skip=1)
all(human_count$Geneid==macaque_count$Geneid)## [1] TRUE
APlab_count = data.frame(Hs_CTX_WT_Brain_S3A1_M = human_count[,7],
Hs_CTX_WT_Brain_S7A1_M = human_count[,8],
Hs_CTX_WT_Brain_S2A1_M = human_count[,9],
Hs_CTX_WT_Brain_S1A1_M = human_count[,10],
Hs_CTX_WT_Brain_S6A1_F = human_count[,11],
Mm_CTX_WT_Brain_10506_M = macaque_count[,7],
Mm_CTX_WT_Brain_10521_F = macaque_count[,8],
row.names = human_count$Geneid)
brain_met = data.frame(species=factor( c(rep("HS",5),c('MM','MM')),levels=c("HS","MM")),
sample='WholeCortex',sex=c('M','M','M','M','F','M','F'),
row.names=colnames(APlab_count))
brain_bulk = DESeqDataSetFromMatrix(
countData = APlab_count,
colData = brain_met,
design = ~ species )
brain_bulk = estimateSizeFactors(brain_bulk)
brain_bulk <- DESeq(brain_bulk)## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
vst_data = vst(brain_bulk, blind=TRUE)
log_data = rlog(brain_bulk, blind=TRUE)
normalized_counts = counts(brain_bulk,normalized=TRUE)
brain_bulk_PL_res = results(brain_bulk, contrast = c("species","HS","MM") )
brain_bulk_PL_sig = brain_bulk_PL_res[!is.na(brain_bulk_PL_res$padj),]
brain_bulk_PL_sig = brain_bulk_PL_sig[brain_bulk_PL_sig$padj<0.01,]
brain_bulk_PL_sig_down = brain_bulk_PL_sig[brain_bulk_PL_sig$log2FoldChange<(0),]
brain_bulk_PL_sig_up = brain_bulk_PL_sig[brain_bulk_PL_sig$log2FoldChange>(0),]brain_tpm = read.delim(paste0(outputs_directory,'Ext_RNASeq_TPMCOUNTS.tsv'),header=TRUE, as.is=TRUE)
brain_met = read.delim(paste0(outputs_directory,'Ext_RNASeq_METADATA.tsv'),header=TRUE, as.is=TRUE)
brain_counts = read.delim(paste0(outputs_directory,'Ext_RNASeq_COUNTDATA.tsv'),header=TRUE, as.is=TRUE)
rownames(brain_met) = brain_met$sample_names
brain_counts = brain_counts[,match(rownames(brain_met),colnames(brain_counts))]load(paste0(objects_directory,"ensembl_hg38_genemap.RData"))
load(paste0(objects_directory,"GTF_Annotation.RData"))
genemapu = genemap[!duplicated(genemap$ensembl_gene_id),]Load objects from other vignettes
load(paste0(objects_directory,"bda_final.RData"))
load(paste0(objects_directory,"DEseq2_RNA.RData"))
id_DAVID_all = read.delim(paste0(outputs_directory,"uniprotkb_keyword_KW_0991_2023_09_01.tsv"),
header=TRUE)
id_DAVID_all = unlist(lapply(split(id_DAVID_all$Gene.Names,id_DAVID_all$Entry),
function(x){strsplit(x," ")}))
id_DAVID_all_ensg = unique( genemapu$ensembl_gene_id[genemapu$external_gene_name %in% id_DAVID_all ] )
id = id_DAVID_all_ensg[id_DAVID_all_ensg %in% hits_dn$ensembl_id]
up_fa = read.delim(paste0(outputs_directory,"hits_up_DAVID_KEGG.txt"))
exosomal_genes = unique( unlist(strsplit(up_fa$Genes[up_fa$Term=="GO:0070062~extracellular exosome"],", ")) )
exosome = exosomal_genes[-which(exosomal_genes %in% "ENSG00000285762")]
tpm_norm_count_table_EX = tpm_norm_count_table[rownames(tpm_norm_count_table) %in% exosomal_genes,3:ncol(tpm_norm_count_table)]
tpm_norm_count_table_ex = as.data.frame(log10(0.1+tpm_norm_count_table_EX))
rownames(tpm_norm_count_table_ex) = hits_up$hgnc_symbol[match(rownames(tpm_norm_count_table_ex),hits_up$ensembl_id)]human_count = read.delim(paste0(outputs_directory,'gene_counts_human_dec2022.txt'),skip=1)
macaque_count = read.delim(paste0(outputs_directory,'gene_counts_macaque.txt'),skip=1)
all(human_count$Geneid==macaque_count$Geneid)## [1] TRUE
APlab_count = data.frame(Hs_CTX_WT_Brain_S3A1_M = human_count[,7],
Hs_CTX_WT_Brain_S7A1_M = human_count[,8],
Hs_CTX_WT_Brain_S2A1_M = human_count[,9],
Hs_CTX_WT_Brain_S1A1_M = human_count[,10],
Hs_CTX_WT_Brain_S6A1_F = human_count[,11],
Mm_CTX_WT_Brain_10506_M = macaque_count[,7],
Mm_CTX_WT_Brain_10521_F = macaque_count[,8],
row.names = human_count$Geneid)
brain_met = data.frame(species=factor( c(rep("HS",5),c('MM','MM')),levels=c("HS","MM")),
sample='WholeCortex',sex=c('M','M','M','M','F','M','F'),
row.names=colnames(APlab_count))
brain_bulk = DESeqDataSetFromMatrix(
countData = APlab_count,
colData = brain_met,
design = ~ species )
brain_bulk = estimateSizeFactors(brain_bulk)
brain_bulk <- DESeq(brain_bulk)## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
vst_data = vst(brain_bulk, blind=TRUE)
log_data = rlog(brain_bulk, blind=TRUE)
normalized_counts = counts(brain_bulk,normalized=TRUE)
brain_bulk_PL_res = results(brain_bulk, contrast = c("species","HS","MM") )
brain_bulk_PL_sig = brain_bulk_PL_res[!is.na(brain_bulk_PL_res$padj),]
brain_bulk_PL_sig = brain_bulk_PL_sig[brain_bulk_PL_sig$padj<0.01,]
brain_bulk_PL_sig_down = brain_bulk_PL_sig[brain_bulk_PL_sig$log2FoldChange<(0),]
brain_bulk_PL_sig_up = brain_bulk_PL_sig[brain_bulk_PL_sig$log2FoldChange>(0),]brain_tpm = read.delim(paste0(outputs_directory,'Ext_RNASeq_TPMCOUNTS.tsv'),header=TRUE, as.is=TRUE)
brain_met = read.delim(paste0(outputs_directory,'Ext_RNASeq_METADATA.tsv'),header=TRUE, as.is=TRUE)
brain_counts = read.delim(paste0(outputs_directory,'Ext_RNASeq_COUNTDATA.tsv'),header=TRUE, as.is=TRUE)
rownames(brain_met) = brain_met$sample_names
brain_counts = brain_counts[,match(rownames(brain_met),colnames(brain_counts))]Retain normal Cortex and Female samples and perform DESeq2 based normalisation
klrna = brain_counts[ , brain_met$lab=="Khaitovich Lab" & brain_met$sources %like% 'Cortex' & brain_met$condition=="Normal" & brain_met$sex=="F"]
brain_met_kl = brain_met[brain_met$lab=="Khaitovich Lab" & brain_met$sources %like% 'Cortex' & brain_met$condition=="Normal" & brain_met$sex=="F",]
brain_bulk_kl = DESeqDataSetFromMatrix(
countData = klrna,
colData = brain_met_kl,
design = ~ species )## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
brain_bulk_kl = estimateSizeFactors(brain_bulk_kl)
brain_bulk_kl_normalized = counts(brain_bulk_kl, normalized=TRUE )
brain_bulk_kl = DESeqDataSetFromMatrix(
countData = klrna,
colData = brain_met_kl,
design = ~ species )## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
brain_bulk_kl = estimateSizeFactors(brain_bulk_kl)
brain_bulk_kl = DESeq(brain_bulk_kl)## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
brain_bulk_kl_normalized = counts(brain_bulk_kl, normalized=TRUE )
brain_bulk_KL_res = results(brain_bulk_kl, contrast = c("species","HS","MM") )
brain_bulk_KL_res2 = results(brain_bulk_kl, contrast = c("species","HS","PT") )
brain_bulk_KL_res3 = results(brain_bulk_kl, contrast = c("species","HS","PP") )All the other diseases - do they show such a trend? Fix the gene names! https://www.mirbase.org/ftp.shtml
choroby = table(bda_final$Disease)
choroby = choroby[choroby>30]
LFC_disease = lapply( split( bda_final$ensid[bda_final$Disease %in% names(choroby)],
bda_final$Disease[bda_final$Disease %in% names(choroby)]), function(genes){
pt = brain_bulk_PL_res[rownames(brain_bulk_PL_res) %in% genes, ]
pt = pt[!is.na(pt$log2FoldChange),]
res = pt$log2FoldChange
names(res) = rownames(pt)
return(res)} )
LFC_disease2 = lapply( split( bda_final$ensid[bda_final$Disease %in% names(choroby)],
bda_final$Disease[bda_final$Disease %in% names(choroby)]), function(genes){
pt = brain_bulk_KL_res[rownames(brain_bulk_KL_res) %in% genes, ]
pt = pt[!is.na(pt$log2FoldChange),]
res = pt$log2FoldChange
names(res) = rownames(pt)
return(res)} )
LFC_disease3 = lapply( split( bda_final$ensid[bda_final$Disease %in% names(choroby)],
bda_final$Disease[bda_final$Disease %in% names(choroby)]), function(genes){
pt = brain_bulk_KL_res2[rownames(brain_bulk_KL_res2) %in% genes, ]
pt = pt[!is.na(pt$log2FoldChange),]
res = pt$log2FoldChange
names(res) = rownames(pt)
return(res) } )
### --------------------
ChosenFunction = function(x){t.test(x)$p.value}
LFC_disease_pv = unlist(lapply(LFC_disease,ChosenFunction))
LFC_disease2_pv = unlist(lapply(LFC_disease2,ChosenFunction))
LFC_disease3_pv = unlist(lapply(LFC_disease3,ChosenFunction))
ChosenFunction = median
LFC_disease_fc = unlist(lapply(LFC_disease,ChosenFunction))
LFC_disease2_fc = unlist(lapply(LFC_disease2,ChosenFunction))
LFC_disease3_fc = unlist(lapply(LFC_disease3,ChosenFunction))
cols=colorRampPalette(c("orange3","white","aquamarine3"))(length(LFC_disease_fc))
par(mfrow=c(1,1),mar=c(15,4,5,1))
barplot(LFC_disease_fc[order(LFC_disease_fc,decreasing=FALSE)],
col=ifelse(LFC_disease_pv[order(LFC_disease_fc,decreasing=FALSE)]<0.05,"aquamarine3","gray80"),
las=2,axes=FALSE,ylim=c(-1,1),ylab="log[2]FC (Human/NHP)")
axis(2,lwd=2,las=2,cex.lab=1.5)par(mfrow=c(1,1),mar=c(15,4,5,1))
barplot(LFC_disease2_fc[order(LFC_disease2_fc,decreasing=FALSE)],
col=ifelse(LFC_disease2_pv[order(LFC_disease2_fc,decreasing=FALSE)]<0.05,"aquamarine3","gray80"),
las=2,axes=FALSE,ylab="log[2]FC (Human/NHP)")
axis(2,lwd=2,las=2,cex.lab=1.5)par(mfrow=c(1,1),mar=c(15,4,1,1))
barplot(LFC_disease3_fc[order(LFC_disease3_fc,decreasing=FALSE)],
col=ifelse(LFC_disease3_pv[order(LFC_disease3_fc,decreasing=FALSE)]<0.05,"aquamarine3","gray80"),
las=2,axes=FALSE,ylim=c(-0.4,0.4),ylab="log[2]FC (Human/NHP)")
axis(2,lwd=2,las=2,cex.lab=1.5)library(beeswarm)
library(ggpubr)
library(dplyr)
normalized_counts_PL = counts(brain_bulk,normalized=TRUE)
normalized_counts_KL = counts(brain_bulk_kl,normalized=TRUE)
samples_KL = brain_met[brain_met$lab=="Khaitovich Lab" & brain_met$sources %like% 'Cortex' & brain_met$condition=="Normal" & brain_met$sex=="F",]
tead3_pl = data.frame( expression=normalized_counts_PL['ENSG00000007866',],species=c(rep("HS",5),rep("MM",2)))
tead3_kl = data.frame( expression=normalized_counts_KL['ENSG00000007866',],
species=samples_KL$species[match(colnames(normalized_counts_KL),samples_KL$sample_names)] )
tead3_kl = tead3_kl[tead3_kl$species %in% c("HS","PT","MM"),]
tead3_kl$species = factor(tead3_kl$species,levels=c("HS","PT","MM"))
se <- function(x){sd(x)/sqrt(length(x))}
my_dat <- summarise(group_by(tead3_pl, species), mean=mean(expression),se=se(expression))
ggplot(my_dat, aes(x=species, y=mean, fill=species)) +
geom_bar(stat="identity", position=position_dodge()) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.2,
position=position_dodge(.9)) + theme_classic() + ylim(c(0,50)) + scale_fill_manual(values=c('gray','blue')) se <- function(x){sd(x)/sqrt(length(x))}
my_dat <- summarise(group_by(tead3_kl, species),
mean=mean(expression),se=se(expression))
ggplot(my_dat, aes(x=species, y=mean, fill=species)) +
geom_bar(stat="identity", position=position_dodge()) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.2,
position=position_dodge(.9)) + theme_classic() + ylim(c(0,90)) + scale_fill_manual(values=c('gray','red','blue')) P-values
brain_bulk_KL_res['ENSG00000007866',]## log2 fold change (MLE): species HS vs MM
## Wald test p-value: species HS vs MM
## DataFrame with 1 row and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSG00000007866 39.6105 1.75034 0.441979 3.96023 0.000074879
## padj
## <numeric>
## ENSG00000007866 0.0004964
brain_bulk_KL_res3['ENSG00000007866',]## log2 fold change (MLE): species HS vs PP
## Wald test p-value: species HS vs PP
## DataFrame with 1 row and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSG00000007866 39.6105 1.16246 0.350057 3.32078 0.000897662
## padj
## <numeric>
## ENSG00000007866 0.00614593
brain_bulk_PL_sig['ENSG00000007866',]## log2 fold change (MLE): species HS vs MM
## Wald test p-value: species HS vs MM
## DataFrame with 1 row and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSG00000007866 30.2115 1.78403 0.581936 3.06568 0.00217175
## padj
## <numeric>
## ENSG00000007866 0.00825217
Domains were identified using TOPDOM. We read them in here. We consider boundaries that have support in two replicates. First chunk lifts over the boundary coordinates between human and chimpanzee assemblies.
ele_domains = readTADs( paste0(outputs_directory,"25kb_domains/hs_ele_krnorm.all.25kb.topdom.bedpe" ) )
fas_domains = readTADs( paste0(outputs_directory,"25kb_domains/hs_pf_krnorm.all.25kb.topdom.bedpe" ) )
man_domains = readTADs( paste0(outputs_directory,"25kb_domains/pt_mandy_krnorm.all.25kb.topdom.bedpe" ) )
saa_domains = readTADs( paste0(outputs_directory,"25kb_domains/pt_sandra_krnorm.all.25kb.topdom.bedpe" ) )
ele_domains_lift_over_Pt6 = liftOverBoundaries( ele_domains, chain_HsPt, WSize = 500 )
fas_domains_lift_over_Pt6 = liftOverBoundaries( fas_domains, chain_HsPt, WSize = 500 )
bed_file = c(ele_domains_lift_over_Pt6$lifted_over,fas_domains_lift_over_Pt6$lifted_over)
export.bed( bed_file, con=paste0(outputs_directory,"ele_fas_boundaries_lift_Pt6.bed" ) )
save( ele_domains_lift_over_Pt6, fas_domains_lift_over_Pt6, file=paste0(objects_directory, "ele_fas_boundaries_lift_Pt6.RData"))
man_domains_lift_over_hg38 = liftOverBoundaries( man_domains, chain_PtHs, WSize = 500 )
saa_domains_lift_over_hg38 = liftOverBoundaries( saa_domains, chain_PtHs, WSize = 500 )
bed_file = c(man_domains_lift_over_hg38$lifted_over,saa_domains_lift_over_hg38$lifted_over)
export.bed( bed_file, con=paste0(outputs_directory,"man_saa_boundaries_lift_Hg38.bed" ) )
save( man_domains_lift_over_hg38, saa_domains_lift_over_hg38, file=paste0(objects_directory, "man_saa_domains_lift_Hg38.RData") )We display the reproducibility
ele_domains = readTADs( paste0(outputs_directory,"25kb_domains/hs_ele_krnorm.all.25kb.topdom.bedpe" ) )
fas_domains = readTADs( paste0(outputs_directory,"25kb_domains/hs_pf_krnorm.all.25kb.topdom.bedpe" ) )
man_domains = readTADs( paste0(outputs_directory,"25kb_domains/pt_mandy_krnorm.all.25kb.topdom.bedpe" ) )
saa_domains = readTADs( paste0(outputs_directory,"25kb_domains/pt_sandra_krnorm.all.25kb.topdom.bedpe" ) )
load(paste0(objects_directory, "man_saa_domains_lift_Hg38.RData"))
load(paste0(objects_directory, "ele_fas_boundaries_lift_Pt6.RData"))
all_human_boundaires = getAllBoundaries( ele_domains$boundaries, fas_domains$boundaries )
all_chimp_boundaires = getAllBoundaries( man_domains$boundaries, saa_domains$boundaries )
peak_list = list(ELE30 = unique(queryHits(findOverlaps(all_human_boundaires,ele_domains$boundaries))),
PF = unique(queryHits(findOverlaps(all_human_boundaires,fas_domains$boundaries))) )
ggVennDiagram(peak_list,label_alpha=0) + scale_fill_distiller( direction = 1)peak_list = list(Sandra = unique(queryHits(findOverlaps(all_chimp_boundaires,saa_domains$boundaries))),
Mandy = unique(queryHits(findOverlaps(all_chimp_boundaires,man_domains$boundaries))) )
ggVennDiagram(peak_list,label_alpha=0) + scale_fill_distiller( direction = 1)m = matrix(c(6100,
1021,
769,
6000,
593,
1131),
ncol = 2, nrow=3,
byrow = FALSE)
barplot(m,col=c("green4","steelblue3","blue4"), ylim=c(0,8000),ylab="Loops",names=c("Human", "Chimpanzee"))
axis(2,lwd=2)export.bed(all_human_boundaires,con=paste0(outputs_directory,"all_human_boundaires_input.bed"))
export.bed(all_chimp_boundaires,con=paste0(outputs_directory,"all_chimp_boundaires.bed") )Check the evolutionary conservation of the reproducible boundaries.
human_boundaires_reproducible = ele_domains$boundaries[queryHits(findOverlaps(ele_domains$boundaries,fas_domains$boundaries))]
chimp_domains = vector("list",1)
names(chimp_domains) = "boundaries"
chimp_domains$boundaries = man_domains$boundaries[queryHits(findOverlaps(man_domains$boundaries,saa_domains$boundaries))]
chimp_domains_lift_over_hg38 = liftOverBoundaries( chimp_domains, chain_PtHs, WSize = 500 )
export.bed(chimp_domains_lift_over_hg38$lifted_over,con=paste0(outputs_directory,"chimp_domains_lift_over_hg38.bed"))
save( chimp_domains_lift_over_hg38, file=paste0(objects_directory,"chimp_domains_lift_over_hg38.RData"))
save(human_boundaires_reproducible,file=paste0(objects_directory,"human_boundaires_reproducible.RData"))Display the result
load( paste0(objects_directory,"chimp_domains_lift_over_hg38.RData") )
load(paste0(objects_directory,"human_boundaires_reproducible.RData"))
allBound = getAllBoundaries( human_boundaires_reproducible,chimp_domains_lift_over_hg38$lifted_over )
peak_list = list(Human = unique(queryHits(findOverlaps(allBound,human_boundaires_reproducible))),
Chimp = unique(queryHits(findOverlaps(allBound,chimp_domains_lift_over_hg38$lifted_over))) )
ggVennDiagram(peak_list,label_alpha=0) + scale_fill_distiller( direction = 1)all_evol_shared_boundaries = human_boundaires_reproducible[unique(queryHits(findOverlaps(human_boundaires_reproducible,chimp_domains_lift_over_hg38$lifted_over)))]
all_evol_shared_boundaries_Pt = chimp_domains_lift_over_hg38$original[ which(names(chimp_domains_lift_over_hg38$original) %in% names(chimp_domains_lift_over_hg38$lifted_over[queryHits(findOverlaps(chimp_domains_lift_over_hg38$lifted_over,all_evol_shared_boundaries))]) ) ]Sometimes the human boundaries in the chimp have no reads or are in the regions with an overtly low mappability and vice versa. We want to gent rid of those instances. Boundaries called in human should not be in the vicinity of low coverage regions in the human and in the chimp. Boundaries called in the chimp should not be in the vicinity of low coverage bins in chrim and in human.
lowCoverageBinsHG38 = do.call("c", lapply( as.list(names(ele)), function(x){
print(x)
thischr = gagr[which(chrom(gagr)==x)]
m = ele[[x]]
tp = as.data.frame(summary(m$LFM))
tp = tp[tp$j > (tp$i+200),]
M = Matrix::Matrix(0, nrow=nrow(m$LFM),ncol=ncol(m$LFM),sparse=TRUE )
M[cbind(tp$i,tp$j)] = tp$x
M = M + t(M)
thischr = thischr[which(rowSums(M)<100)]
return(thischr) } ) )
export.bed(lowCoverageBinsHG38,con=paste0(outputs_directory,"lowCoverageBinsHG38.bed"))
lowCoverageBinsPT6 = do.call("c", lapply( as.list(names(mandy)), function(x){
print(x)
# x = "chr1"
thischr = gagr_pt[which(chrom(gagr_pt)==x)]
m = mandy[[x]]
tp = as.data.frame(summary(m$LFM))
tp = tp[tp$j > (tp$i+200),]
M = Matrix::Matrix(0, nrow=nrow(m$LFM),ncol=ncol(m$LFM),sparse=TRUE )
M[cbind(tp$i,tp$j)] = tp$x
M = M + t(M)
thischr = thischr[which(rowSums(M)<100)]
return(thischr) } ) )
export.bed(lowCoverageBinsPT6,con=paste0(outputs_directory,"lowCoverageBinsPaT6.bed"))
lowCoverageBinsHG38_2 = do.call("c", lapply( as.list(names(fa)), function(x){
print(x)
thischr = gagr[which(chrom(gagr)==x)]
m = fa[[x]]
tp = as.data.frame(summary(m$LFM))
tp = tp[tp$j > (tp$i+200),]
M = Matrix::Matrix(0, nrow=nrow(m$LFM),ncol=ncol(m$LFM),sparse=TRUE )
M[cbind(tp$i,tp$j)] = tp$x
M = M + t(M)
thischr = thischr[which(rowSums(M)<100)]
return(thischr) } ) )
export.bed(lowCoverageBinsHG38_2,con=paste0(outputs_directory,"lowCoverageBinsHG38_2.bed"))
lowCoverageBinsPT6_2 = do.call("c", lapply( as.list(names(sa)), function(x){
print(x)
# x = "chr1"
thischr = gagr_pt[which(chrom(gagr_pt)==x)]
m = sa[[x]]
tp = as.data.frame(summary(m$LFM))
tp = tp[tp$j > (tp$i+200),]
M = Matrix::Matrix(0, nrow=nrow(m$LFM),ncol=ncol(m$LFM),sparse=TRUE )
M[cbind(tp$i,tp$j)] = tp$x
M = M + t(M)
thischr = thischr[which(rowSums(M)<100)]
return(thischr) } ) )
export.bed(lowCoverageBinsPT6_2,con=paste0(outputs_directory,"lowCoverageBinsPaT6_2.bed"))
save( lowCoverageBinsHG38, lowCoverageBinsPT6, file=paste0(objects_directory,"lowCoverageBins.RData") )
save( lowCoverageBinsHG38_2, lowCoverageBinsPT6_2, file=paste0(objects_directory,"lowCoverageBins2.RData") )Lift over these intervals of low coverage. Then lift over the boundaries for the next steps of the analyses.
load(paste0(objects_directory,"lowCoverageBins.RData"))
load(paste0(objects_directory,"lowCoverageBins2.RData"))
lowCoverageBinsHG38 = GenomicRanges::resize(lowCoverageBinsHG38,50000,fix="center")
lowCoverageBinsHG38_2 = GenomicRanges::resize(lowCoverageBinsHG38_2,50000,fix="center")
lowCoverageBinsPT6 = GenomicRanges::resize(lowCoverageBinsPT6,50000,fix="center")
lowCoverageBinsPT6_2 = GenomicRanges::resize(lowCoverageBinsPT6_2,50000,fix="center")
lowCoverageBinsHG38 = lowCoverageBinsHG38[queryHits(findOverlaps(lowCoverageBinsHG38,lowCoverageBinsHG38_2))]
lowCoverageBinsPT6 = lowCoverageBinsPT6[queryHits(findOverlaps(lowCoverageBinsPT6,lowCoverageBinsPT6_2))]
# used to be 10000
lowCoverageBinsHG38_Pt = GenomicRanges::resize( unlist(liftOver(GenomicRanges::resize(lowCoverageBinsHG38,500,fix="center"),
chain = chain_HsPt)), 50000, fix="center")
lowCoverageBinsPt_Hg38 = GenomicRanges::resize( unlist(liftOver(GenomicRanges::resize(lowCoverageBinsPT6,500,fix="center"),
chain = chain_PtHs)), 50000, fix="center")
export.bed(lowCoverageBinsPt_Hg38,con=paste0(outputs_directory,"lowCoverageBinsPt_Hg38.bed"))
save( lowCoverageBinsPt_Hg38, lowCoverageBinsPt_Hg38,
file=paste0(objects_directory,"low_coverage_bins_lifted_over.RData"))Get to the list of human and chimp specific boundaries. Consider boundaries observed in both replicates. Remove boundaries within regions with poor mappability in both species (50kb intervals centered on the lifted over region).
load( paste0(objects_directory, "ele_fas_boundaries_lift_Pt6.RData") )
load( paste0(objects_directory, "man_saa_domains_lift_Hg38.RData") )
### -----------
all_human_boundaires_input = getAllBoundaries( ele_domains$boundaries,
fas_domains$boundaries )
all_chimp_boundaires_input = getAllBoundaries( man_domains$boundaries,
saa_domains$boundaries )
all_chimp_boundaires_Hg38 = getAllBoundaries( man_domains_lift_over_hg38$lifted_over,
saa_domains_lift_over_hg38$lifted_over )
export.bed( all_chimp_boundaires_Hg38, con=paste0(outputs_directory,"all_chimp_boundaires_Hg38_tp.bed" ))
### --------------------
### remove boundaries that intersect poorly mappable regions in the two species
all_human_boundaires = all_human_boundaires_input[-queryHits(findOverlaps(all_human_boundaires_input,c( lowCoverageBinsHG38,lowCoverageBinsPt_Hg38) )) ]
all_chimp_boundaires = all_chimp_boundaires_input[-queryHits(findOverlaps(all_chimp_boundaires_input,c(lowCoverageBinsPT6,lowCoverageBinsHG38_Pt)))]
export.bed( all_human_boundaires, con=paste0(outputs_directory,"all_human_boundaires_Hg38.bed" ) )
export.bed( all_chimp_boundaires, con=paste0(outputs_directory,"all_chimp_boundaires_Pt6.bed" ) )Liftovers: - we pick the longest one - lift over needs to be on the same chromosome.
Identify species specific boundaries. To call a boundary species specific it needs to be: - found in both replicates of this species - not in a poorly mappable region in either of the two species - never found in the other species - be amenable for liftOver.
human_specific_boundaries = human_boundaires_reproducible[ - queryHits(findOverlaps(human_boundaires_reproducible,all_chimp_boundaires_Hg38 )) ]
human_specific_boundaries = human_specific_boundaries[ - queryHits(findOverlaps(human_specific_boundaries,reduce(c(lowCoverageBinsHG38,lowCoverageBinsPt_Hg38)) )) ]
chimp_specific_boundaries = chimp_domains$boundaries[ - queryHits(findOverlaps(chimp_domains$boundaries,reduce(c(ele_domains_lift_over_Pt6$lifted_over,
fas_domains_lift_over_Pt6$lifted_over)) )) ]
chimp_specific_boundaries = chimp_specific_boundaries[ - queryHits(findOverlaps(chimp_specific_boundaries,reduce(c(lowCoverageBinsPT6,lowCoverageBinsHG38_Pt)) )) ]
## Boundaries need to be able to be lifted over! Otherwise we do not know if the boundary is lost because it is not lifted over or it is lost because it was not called
chimp_specific_boundaries_Hg38 = liftOverBoundaries(list(boundaries=chimp_specific_boundaries), chain_PtHs, WSize = 500 )
human_specific_boundaries_Pt = liftOverBoundaries(list(boundaries=human_specific_boundaries), chain_HsPt, WSize = 500 )
## double filtering for lift overs, any chimp boundary lifted over to hg38 should not be observed in human baoundaries
chimp_specific_boundaries = GenomicRanges::resize(chimp_specific_boundaries_Hg38$original[which(names(chimp_specific_boundaries_Hg38$original) %in% names(chimp_specific_boundaries_Hg38$lifted_over))],50000,fix="center")
chimp_specific_boundaries_Hg38 = GenomicRanges::resize(chimp_specific_boundaries_Hg38$lifted_over,50000,fix="center")
all(names(chimp_specific_boundaries)==names(chimp_specific_boundaries_Hg38))
human_specific_boundaries = GenomicRanges::resize(human_specific_boundaries_Pt$original[which(names(human_specific_boundaries_Pt$original) %in% names(human_specific_boundaries_Pt$lifted_over))],50000,fix="center")
human_specific_boundaries_Pt = GenomicRanges::resize(human_specific_boundaries_Pt$lifted_over,50000,fix="center")
all(names(human_specific_boundaries)==names(human_specific_boundaries_Pt))
chimp_specific_boundaries = chimp_specific_boundaries[-queryHits(findOverlaps(chimp_specific_boundaries_Hg38,all_human_boundaires_input))]
human_specific_boundaries = human_specific_boundaries[-queryHits(findOverlaps(human_specific_boundaries_Pt,all_chimp_boundaires_input))]
chimp_specific_boundaries_Hg38 = chimp_specific_boundaries_Hg38[-queryHits(findOverlaps(chimp_specific_boundaries_Hg38,all_human_boundaires_input))]
human_specific_boundaries_Pt = human_specific_boundaries_Pt[-queryHits(findOverlaps(human_specific_boundaries_Pt,all_chimp_boundaires_input))]
export.bed( chimp_specific_boundaries_Hg38,
con=paste0(outputs_directory,"/chimp_specific_boundaries_Hg38.bed" ))
export.bed( human_specific_boundaries_Pt,
con=paste0(outputs_directory,"human_specific_boundaries_Pt.bed" ) )
export.bed( human_specific_boundaries,
con=paste0(outputs_directory,"human_specific_boundaries.bed" ) )
export.bed( chimp_specific_boundaries,
con=paste0(outputs_directory,"chimp_specific_boundaries.bed" ) )
##########################
species_specific_boundaries = c( human_specific_boundaries, chimp_specific_boundaries_Hg38 )
export.bed( species_specific_boundaries,
con=paste0(outputs_directory,"species_specific_boundaries.bed" ) )
save( human_specific_boundaries, chimp_specific_boundaries_Hg38,human_specific_boundaries_Pt,chimp_specific_boundaries,
file=paste0(objects_directory,"species_specific_boundaries.RData"))
##########################We have the species specific boundaries
load(paste0(objects_directory,"species_specific_boundaries.RData"))
load(paste0(objects_directory,"low_coverage_bins_lifted_over.RData"))
species_specific_boundaries = c( human_specific_boundaries, chimp_specific_boundaries_Hg38 )load(paste0(objects_directory,'si.RData'))
chroms_combs_hs = data.frame( V1=paste0("chr",c(1:22,"X")),
V2=paste0("chr",c(1:22,"X")),stringsAsFactors = FALSE )
itn=20
chroms=paste0("chr",c(1:22,"X"))
ele_lfm_5kb = read.hic_files( paste0(dumped_directory_ele), "",".matrix.txt", ga, paste0("chr",c(1:22,"X") ) )
ele = lapply( ele_lfm_5kb, function(m){ IPF( m, numberOfIterations=itn ) } )
save( ele, file=paste0(objects_directory,"Ele30_hic.RData" ))
fa_lfm_5kb = read.hic_files( paste0(dumped_directory_fa), "",".matrix.txt", ga, paste0("chr",c(1:22,"X") ) )
fa = lapply( fa_lfm_5kb, function(m){ IPF( m, numberOfIterations=itn ) } )
save( fa, file=paste0(objects_directory,"FetalAstrocytes_hic.RData" ))load(paste0(objects_directory,'si_pt.RData'))
chroms_combs_pt = data.frame( V1=paste0("chr",c(c(1,"2A","2B",3:22),"X")),
V2=paste0("chr",c(c(1,"2A","2B",3:22),"X")),
stringsAsFactors = FALSE)
itn=20
chroms=paste0("chr",c(c(1,"2A","2B",3:22),"X"))
## ---------------
mandy_lfm_5kb = read.hic_files( paste0(dumped_directory_mandy), "",".matrix.txt", ga_pt, chroms=paste0("chr",c(c(1,"2A","2B",3:22),"X")) )
mandy = lapply( mandy_lfm_5kb, function(m){ IPF( m, numberOfIterations=itn ) } )
save( mandy, file=paste0(objects_directory,"Mandy_hic.RData" ) )
## ---------------
sa_lfm_5kb = read.hic_files( paste0(dumped_directory_sandra), "",".matrix.txt", ga_pt, chroms=paste0("chr",c(c(1,"2A","2B",3:22),"X")) )
sa = lapply( sa_lfm_5kb, function(m){ IPF( m, numberOfIterations=itn ) } )
save( sa, file=paste0(objects_directory,"SandraA_hic.RData" ) )human_spe_bound_IS_ele = InsulationScore( human_specific_boundaries,
ele, gagr, 5, 3, 10 )
save(human_spe_bound_IS_ele,file=paste0(objects_directory,"human_spe_bound_IS_ele.RData"))
human_spe_bound_IS_fas = InsulationScore( human_specific_boundaries,
fa, gagr, 5, 3, 10 )
save(human_spe_bound_IS_fas,file=paste0(objects_directory,"human_spe_bound_IS_fas.RData"))
chimp_spe_bound_IS_ele = InsulationScore( chimp_specific_boundaries_Hg38,
ele, gagr, 5, 3, 10 )
save(chimp_spe_bound_IS_ele,file=paste0(objects_directory,"chimp_spe_bound_IS_ele.RData"))
chimp_spe_bound_IS_fas = InsulationScore( chimp_specific_boundaries_Hg38,
fa, gagr, 5, 3, 10 )
save(chimp_spe_bound_IS_fas,file=paste0(objects_directory,"chimp_spe_bound_IS_fas.RData"))
shared_bound_IS_ele = InsulationScore( all_evol_shared_boundaries,
ele, gagr, 5, 3, 10 )
save(shared_bound_IS_ele,file=paste0(objects_directory,"shared_bound_IS_ele.RData"))
shared_bound_IS_fas = InsulationScore( all_evol_shared_boundaries,
fa, gagr, 5, 3, 10 )
save(shared_bound_IS_fas,file=paste0(objects_directory,"shared_bound_IS_fas.RData"))
## values for chimpanzee samples
human_spe_bound_IS_mandy = InsulationScore( human_specific_boundaries_Pt,
mandy, gagr_pt, 5, 3, 10 )
save(human_spe_bound_IS_mandy,file=paste0(objects_directory,"human_spe_bound_IS_mandy.RData"))
human_spe_bound_IS_sandraA = InsulationScore( human_specific_boundaries_Pt,
sa, gagr_pt, 5, 3, 10 )
save(human_spe_bound_IS_sandraA,file=paste0(objects_directory,"human_spe_bound_IS_sandraA.RData"))
chimp_spe_bound_IS_mandy = InsulationScore( chimp_specific_boundaries,
mandy, gagr_pt, 5, 3, 10 )
save(chimp_spe_bound_IS_mandy,file=paste0(objects_directory,"chimp_spe_bound_IS_mandy.RData"))
chimp_spe_bound_IS_sandraA = InsulationScore( chimp_specific_boundaries,
sa, gagr_pt, 5, 3, 10 )
save(chimp_spe_bound_IS_sandraA,file=paste0(objects_directory,"chimp_spe_bound_IS_sandraA.RData"))
shared_bound_IS_mandy = InsulationScore( all_evol_shared_boundaries_Pt,
mandy, gagr_pt, 5, 3, 10 )
save(shared_bound_IS_mandy,file=paste0(objects_directory,"shared_bound_IS_mandy.RData"))
shared_bound_IS_sandraA = InsulationScore( all_evol_shared_boundaries_Pt,
sa, gagr_pt, 5, 3, 10 )
save(shared_bound_IS_sandraA,file=paste0(objects_directory,"shared_bound_IS_sandraA.RData"))Species specific boundaries, insulation change
load(paste0(objects_directory,"human_spe_bound_IS_mandy.RData"))
load(paste0(objects_directory,"human_spe_bound_IS_fas.RData"))
load(paste0(objects_directory,"human_spe_bound_IS_ele.RData"))
load(paste0(objects_directory,"human_spe_bound_IS_sandraA.RData"))
load(paste0(objects_directory,"chimp_spe_bound_IS_mandy.RData"))
load(paste0(objects_directory,"chimp_spe_bound_IS_ele.RData"))
load(paste0(objects_directory,"chimp_spe_bound_IS_fas.RData"))
load(paste0(objects_directory,"chimp_spe_bound_IS_sandraA.RData"))
load(paste0(objects_directory,"shared_bound_IS_ele.RData"))
load(paste0(objects_directory,"shared_bound_IS_fas.RData"))
load(paste0(objects_directory,"shared_bound_IS_mandy.RData"))
load(paste0(objects_directory,"shared_bound_IS_sandraA.RData"))
human_spe_bound_IS_ele = log2(rowMeans(human_spe_bound_IS_ele[,c(1,3)])/human_spe_bound_IS_ele[,2])
human_spe_bound_IS_mandy = log2(rowMeans(human_spe_bound_IS_mandy[,c(1,3)])/human_spe_bound_IS_mandy[,2])
human_spe_bound_IS_fas = log2(rowMeans(human_spe_bound_IS_fas[,c(1,3)])/human_spe_bound_IS_fas[,2])
human_spe_bound_IS_sandraA = log2(rowMeans(human_spe_bound_IS_sandraA[,c(1,3)])/human_spe_bound_IS_sandraA[,2])
chimp_spe_bound_IS_ele = log2(rowMeans(chimp_spe_bound_IS_ele[,c(1,3)])/chimp_spe_bound_IS_ele[,2])
chimp_spe_bound_IS_mandy = log2(rowMeans(chimp_spe_bound_IS_mandy[,c(1,3)])/chimp_spe_bound_IS_mandy[,2])
chimp_spe_bound_IS_fas = log2(rowMeans(chimp_spe_bound_IS_fas[,c(1,3)])/chimp_spe_bound_IS_fas[,2])
chimp_spe_bound_IS_sandraA = log2(rowMeans(chimp_spe_bound_IS_sandraA[,c(1,3)])/chimp_spe_bound_IS_sandraA[,2])
shared_bound_IS_mandy = log2(rowMeans(shared_bound_IS_mandy[,c(1,3)])/shared_bound_IS_mandy[,2])
shared_bound_IS_sandraA = log2(rowMeans(shared_bound_IS_sandraA[,c(1,3)])/shared_bound_IS_sandraA[,2])
shared_bound_IS_ele = log2(rowMeans(shared_bound_IS_ele[,c(1,3)])/shared_bound_IS_ele[,2])
shared_bound_IS_fas = log2(rowMeans(shared_bound_IS_fas[,c(1,3)])/shared_bound_IS_fas[,2])
is_hs = c(shared_bound_IS_ele, shared_bound_IS_fas)
is_pt = c(shared_bound_IS_mandy, shared_bound_IS_sandraA)
is_hs = is_hs[is.finite(is_hs)]
is_pt = is_pt[is.finite(is_pt)]
## -----
human_spe_bound_human = rowMax(cbind(human_spe_bound_IS_ele,human_spe_bound_IS_fas))
names(human_spe_bound_human) = names(human_spe_bound_IS_ele)
human_spe_bound_chimp = rowMax(cbind(human_spe_bound_IS_mandy,human_spe_bound_IS_sandraA))
names(human_spe_bound_chimp) = names(human_spe_bound_IS_mandy)
human_spe_bound_human = human_spe_bound_human[match(names(human_spe_bound_chimp),names(human_spe_bound_human))]
human_spe_boundaries_evol = human_spe_bound_human-human_spe_bound_chimp
## -----
chimp_spe_bound_human = rowMax(cbind(chimp_spe_bound_IS_ele,chimp_spe_bound_IS_fas))
names(chimp_spe_bound_human) = names(chimp_spe_bound_IS_ele)
chimp_spe_bound_chimp = rowMax(cbind(chimp_spe_bound_IS_mandy,chimp_spe_bound_IS_sandraA))
names(chimp_spe_bound_chimp) = names(chimp_spe_bound_IS_mandy)
chimp_spe_bound_chimp = chimp_spe_bound_chimp[match(names(chimp_spe_bound_human),names(chimp_spe_bound_chimp))]
chimp_spe_boundaries_evol = chimp_spe_bound_human-chimp_spe_bound_chimp
boxplot( human_spe_boundaries_evol, chimp_spe_boundaries_evol,
outline=FALSE, col="white",border=c("black","red"),
tlim=c(-0.6,0.6),ylab="Insulation change Human/Chimp [log2]",
names=c("Human","Chimp"),xlab="Species specificity of boundary")t.test(human_spe_boundaries_evol,chimp_spe_boundaries_evol)##
## Welch Two Sample t-test
##
## data: human_spe_boundaries_evol and chimp_spe_boundaries_evol
## t = 6.6461, df = 274.65, p-value = 0.0000000001614
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.2357400 0.4341763
## sample estimates:
## mean of x mean of y
## 0.1432925 -0.1916657
names(gagr) = paste(chrom(gagr),names(gagr),sep="_")
processIS = function( IS, GAGR ){
res = GAGR
res$binid=NULL
res$score = 0
res$score[match(rownames(IS),names(res))] = log2( rowMeans((0.001+IS[,c(1,3)]))/(0.001+IS[,2] ) )
return(res) }
## --------
genome_wide_IS_ele = InsulationScore( gagr[which(chrom(gagr)!="chrY")], ele, gagr, 5, 3, 10 )
save(genome_wide_IS_ele,file=paste0(objects_directory,"genome_wide_IS_ele.RData"))
genome_wide_IS_ele_gr = processIS(genome_wide_IS_ele,gagr)
export.bedGraph( genome_wide_IS_ele_gr, con=paste0(outputs_directory,"genome_wide_IS_ele_gagr.bedGraph"))
genome_wide_IS_fas = InsulationScore( gagr[which(chrom(gagr)!="chrY")], fa, gagr, 5, 3, 10 )
save(genome_wide_IS_fas,file=paste0(objects_directory,"genome_wide_IS_fas.RData"))
genome_wide_IS_fas_gr = processIS(genome_wide_IS_fas,gagr)
export.bedGraph( genome_wide_IS_fas_gr, con=paste0(outputs_directory,"genome_wide_IS_fas_gr.bedGraph"))
genome_wide_IS_mandy = InsulationScore( gagr_pt[which(chrom(gagr_pt)!="chrY")], mandy, gagr_pt, 5, 3, 10 )
save(genome_wide_IS_mandy,file=paste0(objects_directory,"genome_wide_IS_mandy.RData"))
genome_wide_IS_mandy_gr = processIS(genome_wide_IS_mandy,gagr_pt)
export.bedGraph( genome_wide_IS_mandy_gr, con=paste0(outputs_directory,"genome_wide_IS_mandy_gr.bedGraph"))
genome_wide_IS_sandraA = InsulationScore( gagr_pt[which(chrom(gagr_pt)!="chrY")], sa, gagr_pt, 5, 3, 10 )
save(genome_wide_IS_sandraA,file=paste0(objects_directory,"genome_wide_IS_sandraA.RData"))
genome_wide_IS_sandraA_gr = processIS(genome_wide_IS_sandraA,gagr_pt)
export.bedGraph( genome_wide_IS_sandraA_gr, con=paste0(outputs_directory,"genome_wide_IS_sandraA_gr.bedGraph"))Final plots
Display the insulation scores for all the bins, species specific as well as shared boundaries.
genome_wide_IS_ele_gr=import.bedGraph(paste0(outputs_directory,"genome_wide_IS_ele_gagr.bedGraph"))
genome_wide_IS_fas_gr=import.bedGraph(paste0(outputs_directory,"genome_wide_IS_fas_gr.bedGraph"))
genome_wide_IS_sandraA=import.bedGraph(paste0(outputs_directory,"genome_wide_IS_sandraA_gr.bedGraph"))
genome_wide_IS_mandy_gr=import.bedGraph(paste0(outputs_directory,"genome_wide_IS_mandy_gr.bedGraph"))
genome_wide_IS_human = rowMeans(cbind(genome_wide_IS_ele_gr$score,genome_wide_IS_fas_gr$score))
genome_wide_IS_chimp = rowMeans(cbind(genome_wide_IS_sandraA$score,genome_wide_IS_mandy_gr$score))
boxplot( genome_wide_IS_human, genome_wide_IS_chimp,
is_hs, human_spe_bound_human,human_spe_bound_chimp,
is_pt,chimp_spe_bound_chimp,
chimp_spe_bound_human, outline=FALSE,
col="white",border=c("black","red","black","black","red","red","red","black"),
ylim=c(-1,1),ylab=expression("Insulation (log"[2]*")"),
names=c("GW Hs","GW Pt","Bound Hs","Hs-sp Hs","Hs-spe Pt","Bound Pt","Pt-spe Pt","Pt-spe Hs"),las=2)
axis(1,lwd=2,at=1:8,c("GW Hs","GW Pt","Bound Hs","Hs-sp Hs","Hs-spe Pt","Bound Pt","Pt-spe Pt","Pt-spe Hs"),las=2)
axis(2,lwd=2,las=2)
box(col="black",lwd=2)
abline(h=0,lwd=2,lty=2,col="gray")t.test(human_spe_bound_human,human_spe_bound_chimp)##
## Welch Two Sample t-test
##
## data: human_spe_bound_human and human_spe_bound_chimp
## t = 5.3102, df = 510.25, p-value = 0.0000001638
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.09027806 0.19630691
## sample estimates:
## mean of x mean of y
## 0.3546137 0.2113212
t.test(chimp_spe_bound_human,chimp_spe_bound_chimp)##
## Welch Two Sample t-test
##
## data: chimp_spe_bound_human and chimp_spe_bound_chimp
## t = -3.7563, df = 315.76, p-value = 0.0002053
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.30303371 -0.09470476
## sample estimates:
## mean of x mean of y
## 0.3448980 0.5437673
hs_me3 = readBed_filterChroms(paste0(outputs_directory,'ChIP_Seq_H3K4me3_12-22_HomSap_i-Astro_ELE10-30_merged_hg38_peaks.narrowPeak'),
paste0("",c(1:22,'X')),10)
hs_k27ac = readBed_filterChroms(paste0(outputs_directory,'ChIP_Seq_H3K27ac_12-22_HomSap_i-Astro_ELE10-30_merged_hg38_peaks.narrowPeak'),
paste0("",c(1:22,'X')),10)
pt_me3 = readBed_filterChroms(paste0(outputs_directory,'ChIP_Seq_H3K4me3_12-22_PanTro_i-Astro_Sandra_Mandy4-6_merged_PanTro6_peaks.narrowPeak'),
chroms=paste0('chr',c(1,'2A','2B', 3:22,'X')),4)
pt_k27ac = readBed_filterChroms(paste0(outputs_directory,'ChIP_Seq_H3K27ac_12-22_PanTro_i-Astro_Sandra_Mandy4-6_merged_PanTro6_peaks.narrowPeak'),
chroms=paste0('chr',c(1,'2A','2B', 3:22,'X')),4)
mm_me3 = readBed_filterChroms(paste0(outputs_directory,'ChIP_Seq_H3K4me3_03-22_MacMul_i-Astro_WT_Becky_Rep_1_RheMac10_peaks.narrowPeak'),
chroms=paste0(c(1:22,'X')),4)
mm_k27ac = readBed_filterChroms(paste0(outputs_directory,'ChIP_Seq_H3K27ac_03-22_MacMul_i-Astro_WT_Becky_Rep_1_RheMac10_peaks.narrowPeak'),
chroms=paste0(c(1:22,'X')),4)
seqlevelsStyle(hs_k27ac) = "ucsc"
seqlevelsStyle(pt_k27ac) = "ucsc"
seqlevelsStyle(mm_k27ac) = "ucsc"
seqlevelsStyle(hs_me3) = "ucsc"
seqlevelsStyle(pt_me3) = "ucsc"
seqlevelsStyle(mm_me3) = "ucsc"Load the files from the other vignettes
load(paste0(objects_directory,'tss_objects.RData'))
load(paste0(objects_directory,'DEseq2_RNA.RData'))
species_specific_boundaries = import.bed(paste0(outputs_directory,"species_specific_boundaries.bed"))hs_atac = readBed_filterChroms(paste0(outputs_directory,'ATAC_Seq_12-22_HomSap_i-Astro_ELE10-30_merged_hg38_peaks.narrowPeak'),
paste0("",c(1:22,'X')),10)
pt_atac = readBed_filterChroms(paste0(outputs_directory,'ATAC_Seq_12-22_Pantro_i-Astro_Sandra_Mandy4-6_merged_PanTro6_peaks.narrowPeak'),
paste0("chr",c(1,"2A","2B", 3:22,'X')),10)
mm_atac = readBed_filterChroms(paste0(outputs_directory,'ATAC_Seq_12-22_MacMul_i-Astro_Becky_merged_RheMac10_peaks.narrowPeak'),
paste0("",c(1:22,'X')),10)
seqlevelsStyle(hs_atac) = 'ucsc'
seqlevelsStyle(mm_atac) = 'ucsc'
start(hs_atac) = start(hs_atac) + hs_atac$score
end(hs_atac) = start(hs_atac) + 1
start(pt_atac) = start(pt_atac) + pt_atac$score
end(pt_atac) = start(pt_atac) + 1
start(mm_atac) = start(mm_atac) + mm_atac$score
end(mm_atac) = start(mm_atac) + 1
export.bed( hs_atac, con=paste0(outputs_directory,'ATAC.Hs_clean_summit.narrowPeak'))
export.bed( pt_atac, con=paste0(outputs_directory,'ATAC.Pt_clean_summit.narrowPeak'))
export.bed( mm_atac, con=paste0(outputs_directory,'ATAC.Mm_clean_summit.narrowPeak'))The generic liftOver command: $liftOver -minMatch=0.5 -bedPlus=6 -tab
cd ~/Documents/Tools/
## human to chimp
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Hs_clean_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/hg38ToPanTro6.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Hs_clean_peaks_on_PT6_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Hs_clean_peaks_on_PT6_summit.unmapped.file
## human to macaque
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Hs_clean_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/hg38ToRheMac10.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Hs_clean_peaks_on_RM10_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Hs_clean_peaks_on_RM10_summit.unmapped.file
## chimp to human
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Pt_clean_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/panTro6ToHg38.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Pt_clean_peaks_on_Hg38_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Pt_clean_peaks_on_Hg38_summit.unmapped.file
## macaque to human
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Mm_clean_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/rheMac10ToHg38.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Mm_clean_peaks_on_Hg38_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Mm_clean_peaks_on_Hg38_summit.unmapped.file
## chimp to macaque
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Pt_clean_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/panTro6ToRheMac10.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Pt_clean_peaks_on_RheMac10_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Pt_clean_peaks_on_RheMac10_summit.unmapped.file
## macaque to chimp
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Mm_clean_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/rheMac10ToPanTro6.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Mm_clean_peaks_on_PanTro6_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Mm_clean_peaks_on_PanTro6_summit.unmapped.fileWe define a unique set of intervals of peaks found in at least one species and that are aligneable.
hs_atac_mapped_in_chimp = readBed_filterChroms(paste0(liftOverPeaks,'ATAC.Hs_clean_peaks_on_PT6_summit.narrowPeak'),
chroms=paste0('chr',c(1,'2A','2B', 3:22,'X')),4)
hs_atac_mapped_in_rhesus = readBed_filterChroms(paste0(liftOverPeaks,"ATAC.Hs_clean_peaks_on_RM10_summit.narrowPeak"),
chroms=paste0("chr",c(1:22,'X')),4)
chimp_mapped_in_humans = readBed_filterChroms(paste0(liftOverPeaks,'ATAC.Pt_clean_peaks_on_Hg38_summit.narrowPeak'),
chroms=paste0('chr',c(1:22,'X')),4)
macaque_mapped_in_humans = readBed_filterChroms(paste0(liftOverPeaks,'ATAC.Mm_clean_peaks_on_Hg38_summit.narrowPeak'),
chroms=paste0('chr',c(1:22,'X')),4)
macaque_in_chimps = readBed_filterChroms(paste0(liftOverPeaks,'ATAC.Mm_clean_peaks_on_PanTro6_summit.narrowPeak'),
chroms=paste0('chr',c(1,'2A','2B', 3:22,'X')),4)
chimps_in_macaque = readBed_filterChroms(paste0(liftOverPeaks,'ATAC.Pt_clean_peaks_on_RheMac10_summit.narrowPeak'),
chroms=paste0('chr',c(1:22,'X')),4)
CleanAndResize = function( gro, finalSize ){
return( GenomicRanges::resize(gro[ width(gro)==2 & start(gro)>500 ],finalSize,fix="center"))
}
hs_atac_mapped_in_chimp = CleanAndResize( hs_atac_mapped_in_chimp, 500 )
hs_atac_mapped_in_rhesus = CleanAndResize( hs_atac_mapped_in_rhesus, 500 )
chimp_mapped_in_humans = CleanAndResize( chimp_mapped_in_humans, 500 )
macaque_mapped_in_humans = CleanAndResize( macaque_mapped_in_humans, 500 )
macaque_in_chimps = CleanAndResize( macaque_in_chimps, 500 )
chimps_in_macaque = CleanAndResize( chimps_in_macaque, 500 )
hs_atac = CleanAndResize( hs_atac, 500 )
pt_atac = CleanAndResize( pt_atac, 500 )
mm_atac = CleanAndResize( mm_atac, 500 )Now, we have all the lifted over combinations.
# human peaks aligned in all the species
hs_pt_mm_liftover = names(hs_atac_mapped_in_chimp)[names(hs_atac_mapped_in_chimp) %in% names(hs_atac_mapped_in_rhesus) ]
human_peaks_aligned_Pt_Mm_coordinates_hs = hs_atac[ which(names(hs_atac) %in% hs_pt_mm_liftover ) ]
human_peaks_aligned_Pt_Mm_coordinates_hs = human_peaks_aligned_Pt_Mm_coordinates_hs[-subjectHits(findOverlaps(human_peaks_aligned_Pt_Mm_coordinates_hs,drop.self=TRUE,drop.redundant=TRUE))]
length(human_peaks_aligned_Pt_Mm_coordinates_hs)## [1] 141484
human_peaks_aligned_Pt_Mm_coordinates_pt = hs_atac_mapped_in_chimp[ match(names(human_peaks_aligned_Pt_Mm_coordinates_hs),names(hs_atac_mapped_in_chimp)) ]
human_peaks_aligned_Pt_Mm_coordinates_mm = hs_atac_mapped_in_rhesus[ match(names(human_peaks_aligned_Pt_Mm_coordinates_hs),names(hs_atac_mapped_in_rhesus)) ]
all(names(human_peaks_aligned_Pt_Mm_coordinates_hs)==names(human_peaks_aligned_Pt_Mm_coordinates_pt))## [1] TRUE
all(names(human_peaks_aligned_Pt_Mm_coordinates_hs)==names(human_peaks_aligned_Pt_Mm_coordinates_mm))## [1] TRUE
# peaks found in chimp and macaque (aligned to the human genome) but not detected in human
nhp_peaks = chimp_mapped_in_humans[ queryHits(findOverlaps(chimp_mapped_in_humans,macaque_mapped_in_humans))]
nhp_peaks = nhp_peaks[ which(names(nhp_peaks) %in% names(chimps_in_macaque))]
nhp_peaks_coordinates_hs = nhp_peaks[ -queryHits(findOverlaps(nhp_peaks,hs_atac))]
nhp_peaks_coordinates_hs = nhp_peaks_coordinates_hs[-subjectHits(findOverlaps(nhp_peaks_coordinates_hs,drop.self=TRUE,drop.redundant=TRUE))]
nhp_peaks_coordinates_pt = pt_atac[ match(names(nhp_peaks_coordinates_hs),names(pt_atac)) ]
nhp_peaks_coordinates_mm = chimps_in_macaque[ match(names(nhp_peaks_coordinates_pt),names(chimps_in_macaque)) ]
all(names(nhp_peaks_coordinates_hs)==names(nhp_peaks_coordinates_mm))## [1] TRUE
all(names(nhp_peaks_coordinates_hs)==names(nhp_peaks_coordinates_pt))## [1] TRUE
# chimp peaks aligned both in humans and macaques but not detected as peaks in humans and macaques
chimp_peaks = chimp_mapped_in_humans[ -queryHits(findOverlaps(chimp_mapped_in_humans,c(nhp_peaks_coordinates_hs, hs_atac))) ]
chimp_peaks = chimp_peaks[ which(names(chimp_peaks) %in% names(chimps_in_macaque)) ]
chimp_peaks = chimp_peaks[ which(names(chimp_peaks) %in% names(pt_atac)) ]
chimp_uniquely_peaks_coordinates_hs = chimp_mapped_in_humans[ match( names(chimp_peaks), names(chimp_mapped_in_humans) )]
chimp_uniquely_peaks_coordinates_hs = chimp_uniquely_peaks_coordinates_hs[-subjectHits(findOverlaps(chimp_uniquely_peaks_coordinates_hs,
drop.self=TRUE,drop.redundant=TRUE))]
chimp_uniquely_peaks_coordinates_pt = pt_atac[ match( names(chimp_uniquely_peaks_coordinates_hs), names(pt_atac) )]
chimp_uniquely_peaks_coordinates_mm = chimps_in_macaque[ match( names(chimp_uniquely_peaks_coordinates_pt), names(chimps_in_macaque) )]
all(names(chimp_uniquely_peaks_coordinates_hs)==names(chimp_uniquely_peaks_coordinates_pt))## [1] TRUE
all(names(chimp_uniquely_peaks_coordinates_hs)==names(chimp_uniquely_peaks_coordinates_mm))## [1] TRUE
length(chimp_uniquely_peaks_coordinates_hs)## [1] 31740
# macaque peaks aligned both in humans and chimps but not detected as peaks in humans and chimps
macaque_peaks = macaque_mapped_in_humans[ -queryHits(findOverlaps(macaque_mapped_in_humans,c(nhp_peaks_coordinates_hs, hs_atac))) ]
macaque_peaks = macaque_peaks[ which(names(macaque_peaks) %in% names(macaque_in_chimps)) ]
macaque_peaks = macaque_peaks[ which(names(macaque_peaks) %in% names(mm_atac)) ]
macaque_uniquely_peaks_coordinates_hs = macaque_mapped_in_humans[ match( names(macaque_peaks), names(macaque_mapped_in_humans) )]
macaque_uniquely_peaks_coordinates_hs = macaque_uniquely_peaks_coordinates_hs[ -subjectHits(findOverlaps(macaque_uniquely_peaks_coordinates_hs,drop.self=TRUE,drop.redundant=TRUE))]
macaque_uniquely_peaks_coordinates_pt = macaque_in_chimps[ match( names(macaque_uniquely_peaks_coordinates_hs), names(macaque_in_chimps) )]
macaque_uniquely_peaks_coordinates_mm = mm_atac[ match( names(macaque_uniquely_peaks_coordinates_hs), names(mm_atac) )]
all(names(macaque_uniquely_peaks_coordinates_hs)==names(macaque_uniquely_peaks_coordinates_pt))## [1] TRUE
all(names(macaque_uniquely_peaks_coordinates_hs)==names(macaque_uniquely_peaks_coordinates_mm))## [1] TRUE
length(macaque_uniquely_peaks_coordinates_hs)## [1] 43319
### we pool all togehter and remove duplicated peaks --> 225,059
all_human_intervals = c( human_peaks_aligned_Pt_Mm_coordinates_hs, nhp_peaks_coordinates_hs,
chimp_uniquely_peaks_coordinates_hs, macaque_uniquely_peaks_coordinates_hs )
all_chimp_intervals = c( human_peaks_aligned_Pt_Mm_coordinates_pt, nhp_peaks_coordinates_pt,
chimp_uniquely_peaks_coordinates_pt, macaque_uniquely_peaks_coordinates_pt )
all_macaque_intervals = c( human_peaks_aligned_Pt_Mm_coordinates_mm, nhp_peaks_coordinates_mm,
chimp_uniquely_peaks_coordinates_mm, macaque_uniquely_peaks_coordinates_mm )
all(names(all_human_intervals) == names(all_chimp_intervals))## [1] TRUE
all(names(all_human_intervals) == names(all_macaque_intervals))## [1] TRUE
duplicated_peak_names = table(names(all_human_intervals))
duplicated_peak_names = names(duplicated_peak_names[duplicated_peak_names>1])
all_human_intervals = all_human_intervals[which(! names(all_human_intervals) %in% duplicated_peak_names) ]
all_chimp_intervals = all_chimp_intervals[which(! names(all_chimp_intervals) %in% duplicated_peak_names) ]
all_macaque_intervals = all_macaque_intervals[which(! names(all_macaque_intervals) %in% duplicated_peak_names) ]hs_atac2 = readBed_filterChroms(paste0(outputs_directory,'ATAC_Seq_12-22_HomSap_i-Astro_ELE10-30_merged_hg38_peaks.narrowPeak'),
paste0("",c(1:22,'X')),10)
pt_atac2 = readBed_filterChroms(paste0(outputs_directory,'ATAC_Seq_12-22_Pantro_i-Astro_Sandra_Mandy4-6_merged_PanTro6_peaks.narrowPeak'),
paste0("chr",c(1,"2A","2B", 3:22,'X')),10)
mm_atac2 = readBed_filterChroms(paste0(outputs_directory,'ATAC_Seq_12-22_MacMul_i-Astro_Becky_merged_RheMac10_peaks.narrowPeak'),
paste0("",c(1:22,'X')),10)
seqlevelsStyle(hs_atac2) = 'ucsc'
seqlevelsStyle(mm_atac2) = 'ucsc'
start(hs_atac2) = start(hs_atac2) + hs_atac2$score
end(hs_atac2) = start(hs_atac2)
hs_atac2 = GenomicRanges::resize(hs_atac2,500,fix="center")
start(pt_atac2) = start(pt_atac2) + pt_atac2$score
end(pt_atac2) = start(pt_atac2)
pt_atac2 = GenomicRanges::resize(pt_atac2,500,fix="center")
start(mm_atac2) = start(mm_atac2) + mm_atac2$score
end(mm_atac2) = start(mm_atac2)
mm_atac2 = GenomicRanges::resize(mm_atac2,500,fix="center")
export.bed( hs_atac2, con=paste0(outputs_directory,'ATAC.Hs_clean_500_summit.narrowPeak'))
export.bed( pt_atac2, con=paste0(outputs_directory,'ATAC.Pt_clean_500_summit.narrowPeak'))
export.bed( mm_atac2, con=paste0(outputs_directory,'ATAC.Mm_clean_500_summit.narrowPeak'))cd ~/Documents/Tools/
## human to chimp
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Hs_clean_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/hg38ToPanTro6.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Hs_clean_peaks_on_PT6_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Hs_clean_peaks_on_PT6_500_summit.unmapped.file
## human to macaque
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Hs_clean_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/hg38ToRheMac10.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Hs_clean_peaks_on_RM10_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Hs_clean_peaks_on_RM10_500_summit.unmapped.file
## chimp to human
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Pt_clean_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/panTro6ToHg38.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Pt_clean_peaks_on_Hg38_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Pt_clean_peaks_on_Hg38_500_summit.unmapped.file
## macaque to human
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Mm_clean_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/rheMac10ToHg38.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Mm_clean_peaks_on_Hg38_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Mm_clean_peaks_on_Hg38_500_summit.unmapped.file
## chimp to macaque
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Pt_clean_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/panTro6ToRheMac10.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Pt_clean_peaks_on_RheMac10_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Pt_clean_peaks_on_RheMac10_500_summit.unmapped.file
## macaque to chimp
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Mm_clean_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/rheMac10ToPanTro6.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Mm_clean_peaks_on_PanTro6_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Mm_clean_peaks_on_PanTro6_500_summit.unmapped.fileRead in the results to retrieve the peak names that we wish to use.
peaks_hs_Pt = readBed_filterChroms(paste0(liftOverPeaks,"ATAC.Hs_clean_peaks_on_PT6_500_summit.narrowPeak"),
paste0("chr",c(1,"2A","2B", 3:22,'X')),5 )
peaks_hs_Mm = readBed_filterChroms(paste0(liftOverPeaks,"ATAC.Hs_clean_peaks_on_RM10_500_summit.narrowPeak"),
paste0("chr",c(1:22,'X')),5 )
peaks_hs = names(peaks_hs_Pt)[names(peaks_hs_Pt) %in% names(peaks_hs_Mm) ]
peaks_Pt_Hs = readBed_filterChroms(paste0(liftOverPeaks,"ATAC.Pt_clean_peaks_on_Hg38_500_summit.narrowPeak"),
paste0("chr",c(1:22,'X')),5 )
peaks_Pt_Mm = readBed_filterChroms(paste0(liftOverPeaks,"ATAC.Pt_clean_peaks_on_RheMac10_500_summit.narrowPeak"),
paste0("chr",c(1:22,'X')),5 )
peaks_pt = names(peaks_Pt_Hs)[names(peaks_Pt_Hs) %in% names(peaks_Pt_Mm) ]
peaks_Mm_Hs = readBed_filterChroms(paste0(liftOverPeaks,"ATAC.Mm_clean_peaks_on_Hg38_500_summit.narrowPeak"),
paste0("chr",c(1:22,'X')),5 )
peaks_Mm_Pt = readBed_filterChroms(paste0(liftOverPeaks,"ATAC.Mm_clean_peaks_on_PanTro6_500_summit.narrowPeak"),
paste0("chr",c(1,"2A","2B", 3:22,'X')), 5 )
peaks_mm = names(peaks_Mm_Hs)[names(peaks_Mm_Hs) %in% names(peaks_Mm_Pt) ]
peaks = unique( c( peaks_hs, peaks_pt, peaks_mm))The final ranges
length(all_human_intervals)==length(all_chimp_intervals)## [1] TRUE
length(all_human_intervals)==length(all_macaque_intervals)## [1] TRUE
length(all_macaque_intervals) # 225,059## [1] 225059
all_human_intervals = all_human_intervals[which(names(all_human_intervals) %in% peaks)]
all_chimp_intervals = all_chimp_intervals[which(names(all_chimp_intervals) %in% peaks)]
all_macaque_intervals = all_macaque_intervals[which(names(all_macaque_intervals) %in% peaks)]
length(all_human_intervals)==length(all_chimp_intervals)## [1] TRUE
length(all_human_intervals)==length(all_macaque_intervals)## [1] TRUE
length(all_macaque_intervals) # 224,411## [1] 224411
all(names(all_human_intervals)==names(all_chimp_intervals))## [1] TRUE
all(names(all_human_intervals)==names(all_macaque_intervals))## [1] TRUE
export.gff( all_human_intervals,
con=paste0(outputs_directory,"hs_atac_for_Deseq2.gtf" ) )
export.gff( all_chimp_intervals,
con=paste0(outputs_directory,"pt_atac_for_Deseq2.gtf" ) )
export.gff( all_macaque_intervals,
con=paste0(outputs_directory,"mm_atac_for_Deseq2.gtf" ) )
writeLines( paste0( seqlevels(all_human_intervals), ",",
gsub("chr",'',seqlevels(all_human_intervals)) ),
paste0(outputs_directory,'hs_atac_for_Deseq2.txt') )
writeLines( paste0( seqlevels(all_chimp_intervals), ",",
gsub("chr",'',seqlevels(all_chimp_intervals)) ),
paste0(outputs_directory,'pt_atac_for_Deseq2.txt') )
writeLines( paste0( seqlevels(all_macaque_intervals), ",",
gsub("chr",'',seqlevels(all_macaque_intervals)) ),
paste0(outputs_directory,'mm_atac_for_Deseq2.txt') )setwd('/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/bam_files/')
## -------------------------
ele10 = featureCounts( 'ATAC_Seq_12-21_HomSap_i-Astro_WT_ELE10_merged_hg38.bam',
annot.ext = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/hs_atac_for_Deseq2.gtf',
isGTFAnnotationFile = TRUE,
chrAliases = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/hs_atac_for_Deseq2.txt' ,
GTF.featureType = 'sequence_feature', GTF.attrType = 'ID', isPairedEnd=TRUE)
ele30 = featureCounts( 'ATAC_Seq_05-22_HomSap_i-Astro_WT_ELE30_2_Rep_1_hg38.bam',
annot.ext = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/hs_atac_for_Deseq2.gtf',
isGTFAnnotationFile = TRUE,
chrAliases = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/hs_atac_for_Deseq2.txt' ,
GTF.featureType = 'sequence_feature', GTF.attrType = 'ID', isPairedEnd=TRUE)
## -------------------------
sandraa = featureCounts( 'ATAC_Seq_12-22_Pantro_i-Astro_Sandra_merged_PanTro6.bam',
annot.ext = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/pt_atac_for_Deseq2.gtf',
isGTFAnnotationFile = TRUE,
chrAliases = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/pt_atac_for_Deseq2.txt' ,
GTF.featureType = 'sequence_feature', GTF.attrType = 'ID', isPairedEnd=TRUE)
Mandy04 = featureCounts( 'ATAC_Seq_05-22_PanTro_i-Astro_WT_Mandy4_Rep_1_PanTro6.bam',
annot.ext = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/pt_atac_for_Deseq2.gtf',
isGTFAnnotationFile = TRUE,
chrAliases = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/pt_atac_for_Deseq2.txt' ,
GTF.featureType = 'sequence_feature', GTF.attrType = 'ID', isPairedEnd=TRUE)
Mandy06 = featureCounts( 'ATAC_Seq_05-22_PanTro_i-Astro_WT_Mandy6_Rep_1_PanTro6.bam',
annot.ext = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/pt_atac_for_Deseq2.gtf',
isGTFAnnotationFile = TRUE,
chrAliases = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/pt_atac_for_Deseq2.txt' ,
GTF.featureType = 'sequence_feature', GTF.attrType = 'ID', isPairedEnd=TRUE)
## -------------------------
becky = featureCounts( 'ATAC_Seq_12-22_MacMul_i-Astro_Becky_merged_RheMac10.bam',
annot.ext = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/mm_atac_for_Deseq2.gtf',
isGTFAnnotationFile = TRUE,
chrAliases = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/mm_atac_for_Deseq2.txt' ,
GTF.featureType = 'sequence_feature', GTF.attrType = 'ID', isPairedEnd=TRUE)
## -------------------------
save( ele10, ele30, sandraa, Mandy04, Mandy06, becky,
file=paste0(outputs_directory,'counts_ATAC_refined.RData' ) )
## -------------------------
all( ele10$annotation$GeneID == ele30$annotation$GeneID )
all( names(all_human_intervals)== ele10$annotation$GeneID)
all(start(all_human_intervals)==ele10$annotation$Start)
all( names(all_human_intervals)== becky$annotation$GeneID)
ATAC_count = data.frame( ELE10 = ele10$counts[,1],
ELE30 = ele30$counts[,1],
SandraA = sandraa$counts[,1],
Mandy04 = Mandy04$counts[,1],
Mandy06 = Mandy06$counts[,1],
Becky = becky$counts[,1] )
save( ATAC_count,
file = paste0(outputs_directory,'ATAC_count.RData' ) )|| Paired-end : yes ||
|| Count read pairs : yes ||
|| Annotation : hs_atac_for_Deseq2.gtf (GTF) ||
|| Dir for temp files : . ||
|| Chromosome alias file : hs_atac_for_Deseq2.txt ||
|| Threads : 1 ||
|| Level : meta-feature level ||
|| Multimapping reads : counted ||
|| Multi-overlapping reads : not counted ||
|| Min overlapping bases : 1 Here for the quantitative analysis we will consider only peaks that have at least 50% liftover between all the species
load( paste0(outputs_directory,'ATAC_count.RData') )
metadata = data.frame(species=c('HS','HS','PT','PT','PT','MM'),
human_or_not = c("HS","HS","NHP","NHP","NHP","NHP"),
assay='ATAC',
row.names=colnames(ATAC_count))
data <- DESeqDataSetFromMatrix( countData=ATAC_count,
colData = metadata,
design = ~ 0 + species )
data$species = relevel(data$species, "HS")
data = DESeq(data,fitType = 'local')## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
vst_data = vst(data, blind=TRUE)
log_data = rlog(data, blind=TRUE)PCA Plot for all Samples.
species.colors <- c('HS' = '#000000', 'PT' = '#FF3300', 'MM' = '#0033FF')
ord = order(rowVars(counts(data, normalized = TRUE)), decreasing = TRUE)
pca = prcomp(t(counts(data, normalized = TRUE)[ord,]))
plotPCA(log_data, intgroup="species") +
geom_label_repel(aes(label = name),fill = alpha(c("white"),0.2),
show.legend = FALSE, size = 3.25, label.size=0.5,
fontface = 'bold') +
scale_color_manual(values = species.colors) + theme_bw() + labs(color = "Species") +
theme(aspect.ratio = 1, axis.text = element_text(face = 'bold', size = 11),
axis.title = element_text(face = 'bold', size = 13),
legend.text = element_text(face = 'bold'), legend.title = element_text(face = 'bold', size = 12)) +
ggtitle("PCA Plot")Next, we will identify human specific ATAC peaks in comparison with chimpanzee and macaque.
hs_atac_for_Deseq2 = all_human_intervals[ which(names(all_human_intervals) %in% rownames(ATAC_count))]
score(hs_atac_for_Deseq2) = 1
export.bed( hs_atac_for_Deseq2,
con=paste0(outputs_directory,"hs_atac_for_Deseq2.bed" ))
save(hs_atac_for_Deseq2,
file=paste0(objects_directory,"hs_atac_for_Deseq2.RData") )
pt_atac_for_Deseq2 = all_chimp_intervals[ which(names(all_chimp_intervals) %in% names(hs_atac_for_Deseq2))]
score(pt_atac_for_Deseq2) = 1
export.bed( pt_atac_for_Deseq2,
con=paste0(outputs_directory,"pt_atac_for_Deseq2.bed" ))
save(pt_atac_for_Deseq2,
file=paste0(objects_directory,"pt_atac_for_Deseq2.RData") )
mm_atac_for_Deseq2 = all_macaque_intervals[ which(names(all_macaque_intervals) %in% names(hs_atac_for_Deseq2))]
score(mm_atac_for_Deseq2) = 1
export.bed( mm_atac_for_Deseq2,
con=paste0(outputs_directory,"mm_atac_for_Deseq2.bed" ))
save(mm_atac_for_Deseq2,
file=paste0(objects_directory,"mm_atac_for_Deseq2.RData") )Individual comparisons and a table of these
HS_PT = DESeqDataSetFromMatrix( countData = ATAC_count[ ,colnames(ATAC_count) %in% rownames(metadata[metadata$species %in% c("HS","PT"),])],
colData = metadata[metadata$species %in% c("HS","PT"),],
design = ~ 0 + species )## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
HS_PT = DESeq(HS_PT,fitType = 'local')## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
HS_PT$species = relevel(HS_PT$species, "HS")
res_HS_PT = results(HS_PT, contrast = c("species","HS","PT"))
# all(rownames(res_HS_PT)==names(hs_atac_for_Deseq2))
HS_MM = DESeqDataSetFromMatrix( countData=ATAC_count[,metadata$species %in% c("HS","MM")],
colData = metadata[metadata$species %in% c("HS","MM"),],
design = ~ 0 + species )## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
HS_MM = DESeq(HS_MM,fitType = 'local')## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
HS_MM$species = relevel(HS_MM$species, "HS")
res_HS_MM = results(HS_MM, contrast = c("species","HS","MM"))
# all(rownames(res_HS_PT)==rownames(res_HS_MM))
res_HS_NHP = data.frame( hs_pt_LFC = res_HS_PT$log2FoldChange,
hs_pt_Padj = res_HS_PT$padj,
hs_mm_LFC = res_HS_MM$log2FoldChange,
hs_mm_Padj = res_HS_MM$padj,
row.names = rownames(res_HS_MM),
chrom_hs = chrom(hs_atac_for_Deseq2),
start= start(hs_atac_for_Deseq2),
end = end(hs_atac_for_Deseq2) )
hs_atac_for_Deseq2$score=0
hs_atac_for_Deseq2$padj_HSPT = res_HS_PT$padj
hs_atac_for_Deseq2$padj_HSMM = res_HS_MM$padj
export.gff(hs_atac_for_Deseq2,con=paste0(outputs_directory,"hs_atac_for_Deseq2.gtf"))
table(rowSums(res_HS_NHP[,c("hs_pt_Padj","hs_mm_Padj")]<0.1))##
## 0 1 2
## 122226 71847 17203
table(rowSums(all_Deseqs[,c("pvalue.x","padj.y")]<0.1))##
## 0 1 2
## 13083 11086 5137
table( res_HS_NHP$hs_pt_Padj<0.1 & res_HS_NHP$hs_pt_LFC<0 )##
## FALSE TRUE
## 205878 15321
table( res_HS_NHP$hs_pt_Padj<0.1 & res_HS_NHP$hs_pt_LFC>0 )##
## FALSE TRUE
## 202928 20257
table( res_HS_NHP$hs_mm_Padj<0.1 & res_HS_NHP$hs_mm_LFC<0 )##
## FALSE TRUE
## 184934 35243
table( res_HS_NHP$hs_mm_Padj<0.1 & res_HS_NHP$hs_mm_LFC>0 )##
## FALSE TRUE
## 175875 43810
## export locations of the altered peaks
hs_atac_for_Deseq2_Hs_vs_NHP_filt = hs_atac_for_Deseq2[ which(!is.na(res_HS_NHP$hs_pt_Padj) & ! is.na(res_HS_NHP$hs_mm_Padj)) ]
res_HS_NHP_filt = res_HS_NHP[ which(!is.na(res_HS_NHP$hs_pt_Padj) & ! is.na(res_HS_NHP$hs_mm_Padj)), ]
all(names(hs_atac_for_Deseq2_Hs_vs_NHP_filt)==rownames(res_HS_NHP_filt))## [1] TRUE
sum( rowSums(cbind(res_HS_NHP_filt$hs_pt_Padj<0.1,res_HS_NHP_filt$hs_mm_Padj<0.1 ))>0 )## [1] 89050
## -----------------------------
pvalthr=0.1
gained_ATAC_gr = hs_atac_for_Deseq2_Hs_vs_NHP_filt[ which( res_HS_NHP_filt$hs_pt_LFC>0 & res_HS_NHP_filt$hs_pt_Padj<pvalthr & res_HS_NHP_filt$hs_mm_LFC>0 & res_HS_NHP_filt$hs_mm_Padj<pvalthr) ]
gained_ATAC = res_HS_NHP_filt[ which( res_HS_NHP_filt$hs_pt_LFC>0 & res_HS_NHP_filt$hs_pt_Padjpvalthr & res_HS_NHP_filt$hs_mm_LFC>0 & res_HS_NHP_filt$hs_mm_Padj<pvalthr), ]
gained_ATAC_gr$score=0
export.bed(gained_ATAC_gr,con=paste0(outputs_directory,"gained_ATAC_gr.bed"))
lost_ATAC_gr = hs_atac_for_Deseq2_Hs_vs_NHP_filt[ which( res_HS_NHP_filt$hs_pt_LFC<0 & res_HS_NHP_filt$hs_pt_Padj<pvalthr & res_HS_NHP_filt$hs_mm_LFC<0 & res_HS_NHP_filt$hs_mm_Padj<pvalthr) ]
lost_ATAC = res_HS_NHP_filt[ which( res_HS_NHP_filt$hs_pt_LFC<0 & res_HS_NHP_filt$hs_pt_Padj<pvalthr & res_HS_NHP_filt$hs_mm_LFC<0 & res_HS_NHP_filt$hs_mm_Padj<pvalthr), ]
lost_ATAC_gr$score=0
export.bed(lost_ATAC_gr,con=paste0(outputs_directory,"lost_ATAC_gr.bed"))
par(mar=c(5,5,5,5),mfrow=c(1,1))#, cex=1.0, cex.main=1.4, cex.axis=1.4, cex.lab=1.4)
topT <- as.data.frame(res_HS_NHP)
topTsig = rownames(topT[topT$hs_pt_Padj <=pvalthr & topT$hs_mm_Padj<=pvalthr,])
length(topTsig)## [1] 25677
with(topT, plot(hs_pt_LFC, -log10(hs_pt_Padj),
pch=20, cex=1.0,
xlab=bquote(~Log[2]~fold~change),
ylab=bquote(~-log[10]~Q~value),
xlim=c(-10,10),
ylim=c(0,20)),col="gray60")
with(subset(topT, rownames(topT) %in% topTsig),
points(hs_pt_LFC, -log10(hs_pt_Padj), pch=20, col="steelblue", cex=0.5))
axis(2,lwd=2)
axis(1,lwd=2)
box(col="black",lwd=2)First we will consider only regions that do not overlap promoters nor H3K4me3 peaks. Then, we identify the human specific ATAC-seq peaks and remove the peaks that overlap H3K27ac in NHPs. Create a big annotation table for the DORegions between Hs, Pt and Mm.
human_spe_enhancers = gained_ATAC_gr[ - queryHits(findOverlaps(gained_ATAC_gr,c(hs_me3,promoters_tss_gr)))]
human_spe_active_promoters = gained_ATAC_gr[ queryHits(findOverlaps(gained_ATAC_gr, promoters_tss_gr[queryHits(findOverlaps(promoters_tss_gr,hs_me3))]))]
human_spe_inactive_promoters = gained_ATAC_gr[ queryHits(findOverlaps(gained_ATAC_gr, promoters_tss_gr[-queryHits(findOverlaps(promoters_tss_gr,hs_me3))]))]
pt_atac_for_Deseq2_not_H3K27ac = names(pt_atac_for_Deseq2[-queryHits(findOverlaps(pt_atac_for_Deseq2,pt_k27ac))])
mm_atac_for_Deseq2_not_H3K27ac = names(mm_atac_for_Deseq2[-queryHits(findOverlaps(mm_atac_for_Deseq2,mm_k27ac))])
## HUMAN SPECIFIC ENHANCERS
human_spe_enhancers = human_spe_enhancers[which(names(human_spe_enhancers) %in% pt_atac_for_Deseq2_not_H3K27ac[pt_atac_for_Deseq2_not_H3K27ac %in% mm_atac_for_Deseq2_not_H3K27ac])]
human_spe_enhancers_with_K27_peak = human_spe_enhancers[queryHits(findOverlaps(human_spe_enhancers,hs_k27ac))]
human_spe_enhancers_without_K27_peak = human_spe_enhancers[-queryHits(findOverlaps(human_spe_enhancers,hs_k27ac))]
length(human_spe_enhancers)## [1] 9356
gained_atac_peaks_hs = names(gained_ATAC_gr)
gained_ATAC_gr_pt = pt_atac_for_Deseq2[which(names(pt_atac_for_Deseq2) %in% gained_atac_peaks_hs)]
gained_ATAC_gr_mm = mm_atac_for_Deseq2[which(names(mm_atac_for_Deseq2) %in% gained_atac_peaks_hs)]
all(names(gained_atac_peaks_hs) == names(gained_ATAC_gr_pt))## [1] TRUE
all(names(gained_atac_peaks_hs) == names(gained_ATAC_gr_mm))## [1] TRUE
intergenic_gained = gained_atac_peaks_hs[-queryHits(findOverlaps(gained_ATAC_gr,promoters_tss_gr))]
gained_ATAC_functional_annotation = data.frame(atac_Hs = countOverlaps(gained_ATAC_gr,hs_atac),
atac_Pt = countOverlaps(gained_ATAC_gr_pt,pt_atac),
atac_Mm = countOverlaps(gained_ATAC_gr_mm,mm_atac),
me3_Hs = countOverlaps(gained_ATAC_gr,hs_me3),
me3_Pt = countOverlaps(gained_ATAC_gr_pt,pt_me3),
me3_Mm = countOverlaps(gained_ATAC_gr_mm,mm_me3),
k27_Hs = countOverlaps(gained_ATAC_gr,hs_k27ac),
k27_Pt = countOverlaps(gained_ATAC_gr_pt,pt_k27ac),
k27_Mm = countOverlaps(gained_ATAC_gr_mm,mm_k27ac),
promoter = countOverlaps(gained_ATAC_gr,promoters_tss_gr),
is_intergenic = gained_atac_peaks_hs %in% intergenic_gained,
row.names = gained_atac_peaks_hs )
colSums(gained_ATAC_functional_annotation>0)## atac_Hs atac_Pt atac_Mm me3_Hs me3_Pt
## 13108 4253 1792 411 329
## me3_Mm k27_Hs k27_Pt k27_Mm promoter
## 301 7382 2888 1424 908
## is_intergenic
## 12268
par(mfrow=c(1,1))
x=gained_ATAC_functional_annotation>0
x=x[order(x[,1],x[,2],x[,3],x[,4],x[,5],x[,6],x[,7],x[,8],x[,9],x[,10],x[,11]),]
x[x[,2]>0,2] = 2
x[x[,3]>0,3] = 3
x[x[,4]>0,4] = 4
x[x[,5]>0,5] = 5
x[x[,6]>0,6] = 6
x[x[,7]>0,7] = 7
x[x[,8]>0,8] = 8
x[x[,9]>0,9] = 9
x[x[,10]>0,10] = 10
x[x[,11]>0,11] = 11
par(mfrow=c(1,1),mar=c(7,1,1,1))
image(t(x),
col=c("white","gray80","gray80","gray80",
'forestgreen','forestgreen','forestgreen',
"coral3","coral3","coral3",
"black","blue4"),
axes=FALSE,
las=2)
box(col="black",lwd=2)
axis(1,at=seq(0,1,length=11),
c("atac_human","atac_chimp","atac_macaque",
"me3_human","me3_chimp","me3_macaque",
"k27_human","k27_chimp","k27_macaque",
"promoter","intergenic"),
las=2)
abline(v=seq(0,1,length.out=11)[c(3,6,9,10)]+0.05,lwd=2)gained_promoters = gained_ATAC_functional_annotation[gained_ATAC_functional_annotation$me3_Pt==0 & gained_ATAC_functional_annotation$me3_Mm==0 & gained_ATAC_functional_annotation$me3_Hs>0 & gained_ATAC_functional_annotation$atac_Hs>0 & gained_ATAC_functional_annotation$atac_Pt==0 & gained_ATAC_functional_annotation$atac_Mm==0 & gained_ATAC_functional_annotation$k27_Mm==0 & gained_ATAC_functional_annotation$promoter>0 & gained_ATAC_functional_annotation$me3_Hs>0 & gained_ATAC_functional_annotation$atac_Pt==0 & gained_ATAC_functional_annotation$promoter>0,]
gained_promoters_gr = gained_ATAC_gr[which(names(gained_ATAC_gr) %in% rownames(gained_promoters))]lost_atac_peaks_hs = names(lost_ATAC_gr)
lost_ATAC_gr_pt = pt_atac_for_Deseq2[which(names(pt_atac_for_Deseq2) %in% lost_atac_peaks_hs)]
lost_ATAC_gr_mm = mm_atac_for_Deseq2[which(names(mm_atac_for_Deseq2) %in% lost_atac_peaks_hs)]
all(names(lost_atac_peaks_hs) == names(lost_ATAC_gr_pt))## [1] TRUE
all(names(lost_atac_peaks_hs) == names(lost_ATAC_gr_mm))## [1] TRUE
intergenic_lost = lost_atac_peaks_hs[-queryHits(findOverlaps(lost_ATAC_gr,promoters_tss_gr))]
lost_ATAC_functional_annotation = data.frame(atac_Hs = countOverlaps(lost_ATAC_gr,hs_atac),
atac_Pt = countOverlaps(lost_ATAC_gr_pt,pt_atac),
atac_Mm = countOverlaps(lost_ATAC_gr_mm,mm_atac),
me3_Hs = countOverlaps(lost_ATAC_gr,hs_me3),
me3_Pt = countOverlaps(lost_ATAC_gr_pt,pt_me3),
me3_Mm = countOverlaps(lost_ATAC_gr_mm,mm_me3),
k27_Hs = countOverlaps(lost_ATAC_gr,hs_k27ac),
k27_Pt = countOverlaps(lost_ATAC_gr_pt,pt_k27ac),
k27_Mm = countOverlaps(lost_ATAC_gr_mm,mm_k27ac),
promoter = countOverlaps(lost_ATAC_gr,promoters_tss_gr),
is_intergenic = lost_atac_peaks_hs %in% intergenic_lost,
row.names = lost_atac_peaks_hs )
colSums(lost_ATAC_functional_annotation>0)## atac_Hs atac_Pt atac_Mm me3_Hs me3_Pt
## 479 2441 3172 145 446
## me3_Mm k27_Hs k27_Pt k27_Mm promoter
## 505 325 1538 1234 540
## is_intergenic
## 2780
We observe a 3 fold over representation of lost than gained promoters in evolution
lost_promoters = lost_ATAC_functional_annotation[lost_ATAC_functional_annotation$promoter>0 & rowSums( lost_ATAC_functional_annotation[,c("me3_Pt","me3_Mm","k27_Pt","k27_Mm")]>0)==4,]
gained_promoters = gained_ATAC_functional_annotation[gained_ATAC_functional_annotation$me3_Hs>0 & gained_ATAC_functional_annotation$promoter>0 & rowSums(gained_ATAC_functional_annotation[,c("me3_Pt","me3_Mm","k27_Pt","k27_Mm")]==0)==4,]
lost_promoters = lost_ATAC_functional_annotation[lost_ATAC_functional_annotation$promoter>0 ,]
gained_promoters = gained_ATAC_functional_annotation[gained_ATAC_functional_annotation$promoter>0 ,]
lost_promoters_ensid = unique( promoters_tss_gr[queryHits(findOverlaps(promoters_tss_gr, lost_ATAC_gr[which(names(lost_ATAC_gr) %in% rownames(lost_promoters))] ))]$gene_id )
gained_promoters_ensid = unique( promoters_tss_gr[queryHits(findOverlaps(promoters_tss_gr, gained_ATAC_gr[which(names(gained_ATAC_gr) %in% rownames(gained_promoters) )]))]$gene_id )
length(gained_promoters_ensid)## [1] 951
length(lost_promoters_ensid)## [1] 613
any(lost_promoters_ensid %in% gained_promoters_ensid)## [1] TRUE
m=matrix(c(nrow(lost_promoters),
nrow(gained_promoters),
length(lost_ATAC_gr),
length(gained_ATAC_gr)),2,2)
fisher.test(m)##
## Fisher's Exact Test for Count Data
##
## data: m
## p-value < 0.00000000000000022
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 2.103521 2.646321
## sample estimates:
## odds ratio
## 2.360091
save(gained_ATAC_gr,file=paste0(objects_directory,"gained_ATAC_gr.RData"))
save(lost_ATAC_gr,file=paste0(objects_directory,"lost_ATAC_gr.RData"))## ----------------
geniune_lost_active_enhancers = lost_ATAC_functional_annotation[ lost_ATAC_functional_annotation$is_intergenic>0 & lost_ATAC_functional_annotation$promoter==0 & rowSums(lost_ATAC_functional_annotation[,c("me3_Hs","me3_Pt","me3_Mm")])==0 & lost_ATAC_functional_annotation$k27_Hs==0 & lost_ATAC_functional_annotation$k27_Pt>0 & lost_ATAC_functional_annotation$k27_Mm>0,]
geniune_lost_poised_enhancers = lost_ATAC_functional_annotation[ lost_ATAC_functional_annotation$is_intergenic>0 & lost_ATAC_functional_annotation$promoter==0 & rowSums(lost_ATAC_functional_annotation[,c("me3_Hs","me3_Pt","me3_Mm")])==0 & lost_ATAC_functional_annotation$k27_Hs==0 & lost_ATAC_functional_annotation$k27_Pt==0 & lost_ATAC_functional_annotation$k27_Mm==0,]
geniune_lost_enhancers = lost_ATAC_functional_annotation[ lost_ATAC_functional_annotation$is_intergenic>0 & lost_ATAC_functional_annotation$promoter==0 & rowSums(lost_ATAC_functional_annotation[,c("me3_Hs","me3_Pt","me3_Mm")])==0 & lost_ATAC_functional_annotation$k27_Hs==0 ,]
genuine_lost_enhancers_gr = lost_ATAC_gr[which(names(lost_ATAC_gr) %in% rownames(geniune_lost_enhancers))]
geniune_lost_poised_enhancers_gr = lost_ATAC_gr[which(names(lost_ATAC_gr) %in% rownames(geniune_lost_poised_enhancers))]
geniune_lost_active_enhancers_gr = lost_ATAC_gr[which(names(lost_ATAC_gr) %in% rownames(geniune_lost_active_enhancers))]
save( genuine_lost_enhancers_gr, file=paste0(objects_directory,"genuine_lost_enhancers_gr.RData"))
## ----------------
genuine_gained_enhancers = gained_ATAC_functional_annotation[ gained_ATAC_functional_annotation$is_intergenic>0 & gained_ATAC_functional_annotation$promoter==0 & rowSums(gained_ATAC_functional_annotation[,c("me3_Hs","me3_Pt","me3_Mm")])==0 & gained_ATAC_functional_annotation$k27_Pt==0 & gained_ATAC_functional_annotation$k27_Mm==0,]
genuine_gained_active_enhancers = gained_ATAC_functional_annotation[ gained_ATAC_functional_annotation$is_intergenic>0 & gained_ATAC_functional_annotation$promoter==0 & rowSums(gained_ATAC_functional_annotation[,c("me3_Hs","me3_Pt","me3_Mm")])==0 & gained_ATAC_functional_annotation$k27_Pt==0 & gained_ATAC_functional_annotation$k27_Mm==0 & gained_ATAC_functional_annotation$k27_Hs>0,]
genuine_gained_poised_enhancers = gained_ATAC_functional_annotation[ gained_ATAC_functional_annotation$is_intergenic>0 & gained_ATAC_functional_annotation$promoter==0 & rowSums(gained_ATAC_functional_annotation[,c("me3_Hs","me3_Pt","me3_Mm")])==0 & gained_ATAC_functional_annotation$k27_Pt==0 & gained_ATAC_functional_annotation$k27_Mm==0 & gained_ATAC_functional_annotation$k27_Hs==0,]
genuine_gained_enhancers_gr = gained_ATAC_gr[which(names(gained_ATAC_gr) %in% rownames(genuine_gained_enhancers))]
genuine_gained_active_enhancers_gr = gained_ATAC_gr[which(names(gained_ATAC_gr) %in% rownames(genuine_gained_active_enhancers))]
genuine_gained_poised_enhancers_gr = gained_ATAC_gr[which(names(gained_ATAC_gr) %in% rownames(genuine_gained_poised_enhancers))]
seqlevelsStyle(genuine_gained_enhancers_gr) = "ncbi"
export.bed( genuine_gained_enhancers_gr, con=paste0(outputs_directory,"genuine_gained_enhancers.bed"))
seqlevelsStyle(genuine_gained_enhancers_gr) = "ucsc"
export.bed( genuine_gained_enhancers_gr, con=paste0(outputs_directory,"genuine_gained_enhancers_ucsc.bed"))up_set = HS_UP_Genes$ensembl_id
dn_set = HS_DN_Genes$ensembl_id
promoters_HITS_UP = promoters_filtered_gr[ which( promoters_filtered_gr$gene_id %in% up_set ) ]
promoters_HITS_DN = promoters_filtered_gr[ which( promoters_filtered_gr$gene_id %in% dn_set ) ]length(genuine_gained_enhancers_gr)## [1] 9343
length(genuine_lost_enhancers_gr)## [1] 2351
promoters_HITS_UP_500 = GenomicRanges::resize(promoters_HITS_UP,1000000,fix="center")
sum(countOverlaps(promoters_HITS_UP_500,genuine_gained_enhancers_gr)>0)## [1] 586
sum(countOverlaps(promoters_HITS_UP_500,genuine_gained_active_enhancers_gr)>0)## [1] 460
## ----------------
promoters_HITS_UP = promoters_filtered_gr[ which( promoters_filtered_gr$gene_id %in% up_set ) ]
promoters_HITS_DN = promoters_filtered_gr[ which( promoters_filtered_gr$gene_id %in% dn_set ) ]
save( promoters_HITS_UP, promoters_HITS_DN,
file=paste0(objects_directory,"promoters_up_down.RData"))
prom_up_hs_pt = promoters_filtered_gr[ which(promoters_filtered_gr$gene_id %in% all_Deseqs[all_Deseqs$log2FoldChange.x>0 & all_Deseqs$padj.x<0.01,"Row.names"] ) ]
prom_dn_hs_pt = promoters_filtered_gr[ which(promoters_filtered_gr$gene_id %in% all_Deseqs[all_Deseqs$log2FoldChange.x<0 & all_Deseqs$padj.x<0.01,"Row.names"] ) ]
prom_up_hs_mm = promoters_filtered_gr[ which(promoters_filtered_gr$gene_id %in% all_Deseqs[all_Deseqs$log2FoldChange.y>0 & all_Deseqs$padj.y<0.01,"Row.names"] ) ]
prom_dn_hs_mm = promoters_filtered_gr[ which(promoters_filtered_gr$gene_id %in% all_Deseqs[all_Deseqs$log2FoldChange.y<0 & all_Deseqs$padj.y<0.01,"Row.names"] ) ]
## ----------------
genuine_gained_enhancers_that_do_something = genuine_gained_enhancers_gr[ which(elementMetadata(distanceToNearest(genuine_gained_enhancers_gr,promoters_HITS_UP))[,1]<500000)]
genuine_lost_enhancers_that_do_something = genuine_lost_enhancers_gr[ which(elementMetadata(distanceToNearest(genuine_lost_enhancers_gr,promoters_HITS_DN))[,1]<500000)]
genuine_lost_enhancers_that_do_nothing = genuine_lost_enhancers_gr[ which(elementMetadata(distanceToNearest(genuine_lost_enhancers_gr,c( prom_up_hs_pt,prom_dn_hs_pt,prom_up_hs_mm,prom_dn_hs_mm,promoters_HITS_UP,promoters_HITS_DN )))[,1]>500000)]
## verify on elements identified without TAD filtering
enhancers_linked_with_activation = genuine_gained_enhancers_gr[which(elementMetadata(distanceToNearest(genuine_gained_enhancers_gr, promoters_HITS_UP ))[,1]<500000)]
enhancers_not_linked_with_activation = genuine_gained_enhancers_gr[which(elementMetadata(distanceToNearest(genuine_gained_enhancers_gr, c( prom_up_hs_pt,prom_dn_hs_pt,prom_up_hs_mm,prom_dn_hs_mm,promoters_HITS_UP,promoters_HITS_DN )))[,1]>500000) ]
save( enhancers_linked_with_activation, enhancers_not_linked_with_activation,
file=paste0(objects_directory,"enhancers_functional_groups.RData"))
export.bed(enhancers_linked_with_activation,con=paste0(outputs_directory,"enhancers_linked_with_activation.bed"))
export.bed(enhancers_not_linked_with_activation,con=paste0(outputs_directory,"enhancers_not_linked_with_activation.bed"))
save( genuine_lost_enhancers_that_do_something, genuine_lost_enhancers_that_do_nothing,
file=paste0(objects_directory,"lost_enhancers_functional_groups.RData"))
export.bed(genuine_lost_enhancers_that_do_something,con=paste0(outputs_directory,"lost_enhancers_linked_with_activation.bed"))
export.bed(genuine_lost_enhancers_that_do_nothing,con=paste0(outputs_directory,"lost_enhancers_not_linked_with_activation.bed") )
## all enhancers
enhancers_HS = hs_atac[- queryHits(findOverlaps(hs_atac,c(hs_me3,promoters_tss_gr))) ]
enhancers_HS = enhancers_HS[ queryHits(findOverlaps(enhancers_HS, hs_k27ac) ) ]
## conserved enhancers - very not a lot and do not change significantly and do not overlap any promoter
conserved_enhancers = res_HS_NHP[ abs(res_HS_NHP$hs_pt_LFC)<log2(1.5) & abs(res_HS_NHP$hs_mm_LFC)<log2(1.5) &
res_HS_NHP$hs_pt_Padj>0.1 & res_HS_NHP$hs_mm_Padj>0.1, ]
conserved_enhancers = enhancers_HS[ which(names(enhancers_HS) %in% rownames(conserved_enhancers)) ]
save( conserved_enhancers,
file=paste0(objects_directory,"conserved_enhancers.RData" ))
export.bed(conserved_enhancers,con=paste0(outputs_directory,"conserved_enhancers.bed"))Bar-graph showing how many enhancer do something
up_dn_sep = c(prom_up_hs_pt,prom_dn_hs_pt,prom_up_hs_mm,prom_dn_hs_mm)
up_dn_sep = up_dn_sep[which(! names(up_dn_sep) %in% names(promoters_HITS_UP))]
m = c( any_DEG=sum( elementMetadata(distanceToNearest(genuine_gained_enhancers_gr,promoters_HITS_UP))[,1] > 500000)-length(enhancers_not_linked_with_activation),
EAG=sum( elementMetadata(distanceToNearest(genuine_gained_enhancers_gr,promoters_HITS_UP))[,1] < 500000) )
par(mar=c(4,4,4,4),mfrow=c(1,1))
barplot(as.matrix(m),beside = FALSE,col=c("#0B6623","steelblue3"),
ylim=c(0,10000),ylab="Enhancers")
axis(2,lwd=2)m ## any_DEG EAG
## 5219 1443
genuine_gained_enhancers_gr = import.bed(paste0(outputs_directory,"genuine_gained_enhancers_ucsc.bed"))
enhancers_TAD_annotation = data.frame( up_genes = countOverlaps(ele_domains$TADs,promoters_HITS_UP),
up_enhancers = countOverlaps(ele_domains$TADs,genuine_gained_enhancers_gr),
genes_Hs_NHP = countOverlaps(ele_domains$TADs,c(prom_up_hs_pt,prom_dn_hs_pt,prom_up_hs_mm,prom_dn_hs_mm)))
enhancers_linked_with_activation_TAD = genuine_gained_enhancers_gr[unique(queryHits(findOverlaps(genuine_gained_enhancers_gr,ele_domains$TADs[which(enhancers_TAD_annotation[,1]>0 & enhancers_TAD_annotation[,2]>0)])))]
enhancers_not_linked_with_activation_TAD = genuine_gained_enhancers_gr[unique(queryHits(findOverlaps(genuine_gained_enhancers_gr,ele_domains$TADs[which(enhancers_TAD_annotation[,1]==0 & enhancers_TAD_annotation[,2]>0 & enhancers_TAD_annotation[,3]==0)])))]
enhancers_linked_with_activation_TADs = enhancers_linked_with_activation_TAD[which(!enhancers_linked_with_activation_TAD$name %in% enhancers_not_linked_with_activation_TAD$name)]
enhancers_not_linked_with_activation_TADs = enhancers_not_linked_with_activation_TAD[which(!enhancers_not_linked_with_activation_TAD$name %in% enhancers_linked_with_activation_TAD$name)]
names(enhancers_linked_with_activation_TADs) = enhancers_linked_with_activation_TADs$name
names(enhancers_not_linked_with_activation_TAD) = enhancers_not_linked_with_activation_TAD$name
export.bed(enhancers_linked_with_activation_TADs,con=paste0(outputs_directory,"enhancers_linked_with_activation_TADs.bed"))
export.bed(enhancers_not_linked_with_activation_TADs,con=paste0(outputs_directory,"enhancers_not_linked_with_activation_TADs.bed"))We read in the annotation of TADs from TopDom. We identify DEGs in single comparisions and assess how frequently we see up and down regulated genes per TAD.
prom_up_hs_pt = promoters_filtered_gr[ which(promoters_filtered_gr$gene_id %in% all_Deseqs[all_Deseqs$log2FoldChange.x>0 & all_Deseqs$padj.x<0.01,"Row.names"] ) ]
prom_dn_hs_pt = promoters_filtered_gr[ which(promoters_filtered_gr$gene_id %in% all_Deseqs[all_Deseqs$log2FoldChange.x<0 & all_Deseqs$padj.x<0.01,"Row.names"] ) ]
prom_up_hs_mm = promoters_filtered_gr[ which(promoters_filtered_gr$gene_id %in% all_Deseqs[all_Deseqs$log2FoldChange.y>0 & all_Deseqs$padj.y<0.01,"Row.names"] ) ]
prom_dn_hs_mm = promoters_filtered_gr[ which(promoters_filtered_gr$gene_id %in% all_Deseqs[all_Deseqs$log2FoldChange.y<0 & all_Deseqs$padj.y<0.01,"Row.names"] ) ]
ele_domains_anno = data.frame( up_prom = countOverlaps(ele_domains$TADs,promoters_HITS_UP),
dn_prom = countOverlaps(ele_domains$TADs,promoters_HITS_DN),
up_vsPt = countOverlaps(ele_domains$TADs,prom_up_hs_pt),
dn_vsPt = countOverlaps(ele_domains$TADs,prom_dn_hs_pt),
up_vsMm = countOverlaps(ele_domains$TADs,prom_up_hs_mm),
dn_vsMm = countOverlaps(ele_domains$TADs,prom_dn_hs_mm),
number_of_enh_hs = countOverlaps(ele_domains$TADs, enhancers_HS ),
genuine_gained_enhancers = countOverlaps(ele_domains$TADs,genuine_gained_enhancers_gr),
genuine_gained_active_enhancers = countOverlaps(ele_domains$TADs,genuine_gained_active_enhancers_gr),
genuine_gained_poised_enhancers = countOverlaps(ele_domains$TADs,genuine_gained_poised_enhancers_gr),
genuine_lost_enhancers = countOverlaps(ele_domains$TADs,genuine_lost_enhancers_gr),
geniune_lost_active_enhancers = countOverlaps(ele_domains$TADs,geniune_lost_active_enhancers_gr),
geniune_lost_poised_enhancers = countOverlaps(ele_domains$TADs,geniune_lost_poised_enhancers_gr),
gained_enhancers_that_do_sth = countOverlaps(ele_domains$TADs,genuine_gained_enhancers_that_do_something),
lost_enhancers_that_do_sth = countOverlaps(ele_domains$TADs,genuine_lost_enhancers_that_do_something),
prom_number=countOverlaps(ele_domains$TADs,promoters_filtered_gr),
me3_number=countOverlaps(ele_domains$TADs,hs_me3),
size = width(ele_domains$TADs))
length(unique(queryHits(findOverlaps(promoters_HITS_UP,ele_domains$TADs))))## [1] 555
length(unique(queryHits(findOverlaps(promoters_HITS_DN,ele_domains$TADs))))## [1] 447
tads_with_up_gene = ele_domains_anno[ele_domains_anno$up_prom>0 & ele_domains_anno$dn_prom==0,]
tads_with_dn_gene = ele_domains_anno[ele_domains_anno$dn_prom>0 & ele_domains_anno$up_prom==0,]
colSums(tads_with_up_gene>0)## up_prom dn_prom
## 382 0
## up_vsPt dn_vsPt
## 382 72
## up_vsMm dn_vsMm
## 382 83
## number_of_enh_hs genuine_gained_enhancers
## 349 231
## genuine_gained_active_enhancers genuine_gained_poised_enhancers
## 154 176
## genuine_lost_enhancers geniune_lost_active_enhancers
## 92 19
## geniune_lost_poised_enhancers gained_enhancers_that_do_sth
## 55 228
## lost_enhancers_that_do_sth prom_number
## 8 382
## me3_number size
## 366 382
colSums(tads_with_dn_gene>0)## up_prom dn_prom
## 0 365
## up_vsPt dn_vsPt
## 38 365
## up_vsMm dn_vsMm
## 83 365
## number_of_enh_hs genuine_gained_enhancers
## 335 183
## genuine_gained_active_enhancers genuine_gained_poised_enhancers
## 99 133
## genuine_lost_enhancers geniune_lost_active_enhancers
## 98 20
## geniune_lost_poised_enhancers gained_enhancers_that_do_sth
## 50 12
## lost_enhancers_that_do_sth prom_number
## 93 365
## me3_number size
## 353 365
sum( rowSums(ele_domains_anno[,c("up_prom","dn_prom")]>0)==2 )## [1] 33
sum( rowSums(ele_domains_anno[,c("up_prom","dn_prom")]>0)==1 & rowSums(ele_domains_anno[,c("up_prom","dn_prom")])>1)## [1] 124
## compute co-occurence of up and down regulated genes in TADs
getStats = function(x,col1,col2){
tp = c( sum(x[,col1]>0 & x[,col2]==0),
sum(x[,col1]==0 & x[,col2]>0),
sum(x[,col1]>0 & x[,col2]>0 ) )
names(tp) = c("FirstOnly","SecondOnly","Both")
return(tp)
}
par(mfrow=c(1,1),mar=c(8,5,1,1))
m = rbind( getStats(ele_domains_anno,
which(colnames(ele_domains_anno)=="up_prom"),
which(colnames(ele_domains_anno)=="dn_prom")),
getStats(ele_domains_anno,
which(colnames(ele_domains_anno)=="up_vsPt"),
which(colnames(ele_domains_anno)=="dn_vsPt")),
getStats(ele_domains_anno,
which(colnames(ele_domains_anno)=="up_vsMm"),
which(colnames(ele_domains_anno)=="dn_vsMm")) )
barplot(t(m), beside=FALSE, col=c("green4","wheat3","gray60"),
names=c("Hs vs NHP","Hs vs. Pt","Hs vs. Mm"),las=2,
ylab="EAG")
axis(2,lwd=2,las=2)Figure showing how many genuine gained enhancers are there per domain. First of all there are many domains that only feature gained enhancer and no upregulated EAG. There are few domains where I do not see a gained enhancer despite the presence of an upregulated EAG. We see both the up-regulated EAG and a gained DOR in 253 TADs.
getStats(ele_domains_anno,
which(colnames(ele_domains_anno)=="genuine_gained_enhancers"),
which(colnames(ele_domains_anno)=="up_prom"))## FirstOnly SecondOnly Both
## 3420 162 253
hist(ele_domains_anno[ele_domains_anno$up_prom>0,"genuine_gained_enhancers"],n=14,
main="",col="green4",xlab="Number of gained putative enhancers",ylim=c(0,300))
axis(1,lwd=2)
axis(2,lwd=2)Majority of TADs have a gained enhancer and an upregulated EAG!
sum(ele_domains_anno$genuine_gained_enhancers>0)## [1] 3673
sum(ele_domains_anno[ele_domains_anno$up_prom>0,"genuine_gained_enhancers"]>0)## [1] 253
sum(ele_domains_anno$up_prom>0)## [1] 415
gained_enhancers_in_any_comp = hs_atac_for_Deseq2_Hs_vs_NHP_filt[ which( res_HS_NHP_filt$hs_pt_LFC>0 & res_HS_NHP_filt$hs_pt_Padj<pvalthr | res_HS_NHP_filt$hs_mm_LFC>0 & res_HS_NHP_filt$hs_mm_Padj<pvalthr) ]
gained_enhancers_in_any_comp = gained_enhancers_in_any_comp[- queryHits(findOverlaps(gained_enhancers_in_any_comp,c(hs_me3,promoters_tss_gr)))]
gained_enhancers_in_any_comp = gained_enhancers_in_any_comp[which(names(gained_enhancers_in_any_comp) %in% pt_atac_for_Deseq2_not_H3K27ac[pt_atac_for_Deseq2_not_H3K27ac %in% mm_atac_for_Deseq2_not_H3K27ac])]
sum( countOverlaps(ele_domains$TADs,gained_enhancers_in_any_comp)>0 & ele_domains_anno$up_prom>0 )## [1] 358
sum( countOverlaps(ele_domains$TADs,gained_enhancers_in_any_comp)>0 & rowSums( ele_domains_anno[,c("up_vsPt", "up_vsMm")]>0)>0 )## [1] 1607
Overall number of human gained enhancers as compared to chimps and macaques - is it explaining the fact that the log fold change in the human lineage is more pronounced when compared to macaques?
all(names(hs_atac_for_Deseq2)==rownames(res_HS_NHP))## [1] TRUE
proms_in_tads_wo_DORs = promoters_HITS_UP[queryHits(findOverlaps(promoters_HITS_UP, ele_domains$TADs[which(ele_domains_anno$up_prom>0 & ele_domains_anno$genuine_gained_enhancers==0)]))]
proms_in_tads_with_1_DORs = promoters_HITS_UP[queryHits(findOverlaps(promoters_HITS_UP, ele_domains$TADs[which(ele_domains_anno$up_prom>0 & ele_domains_anno$genuine_gained_enhancers==1)]))]
proms_in_tads_with_many_DORs = promoters_HITS_UP[queryHits(findOverlaps(promoters_HITS_UP, ele_domains$TADs[which(ele_domains_anno$up_prom>0 & ele_domains_anno$genuine_gained_enhancers>1 & ele_domains_anno$genuine_gained_enhancers<4)]))]
proms_in_tads_with_very_many_DORs = promoters_HITS_UP[queryHits(findOverlaps(promoters_HITS_UP, ele_domains$TADs[which(ele_domains_anno$up_prom>0 & ele_domains_anno$genuine_gained_enhancers>4)]))]
DORs_gained_Hs_Pt = hs_atac_for_Deseq2[which(res_HS_NHP$hs_pt_LFC>0 & res_HS_NHP$hs_pt_Padj<0.1)]
DORs_gained_Hs_Mm = hs_atac_for_Deseq2[which(res_HS_NHP$hs_mm_LFC>0 & res_HS_NHP$hs_mm_Padj<0.1)]
DORs_gained_Hs_Pt = DORs_gained_Hs_Pt[-unique(queryHits(findOverlaps(DORs_gained_Hs_Pt,c(promoters_tss_gr,hs_me3,gained_ATAC_gr))))]
DORs_gained_Hs_Mm = DORs_gained_Hs_Mm[-unique(queryHits(findOverlaps(DORs_gained_Hs_Mm,c(promoters_tss_gr,hs_me3,gained_ATAC_gr))))]
ele_domains_anno$DORs_gained_Hs_Pt = countOverlaps(ele_domains$TADs,DORs_gained_Hs_Pt)
ele_domains_anno$DORs_gained_Hs_Mm = countOverlaps(ele_domains$TADs,DORs_gained_Hs_Mm)
ele_domains_anno$DORs_anno = cut(ele_domains_anno$genuine_gained_enhancers,c(-Inf,0,1,3,1000))
par(mfrow=c(2,2),mar=c(3,2,1,2))
boxplot( all_Deseqs$log2FoldChange.x[all_Deseqs$Row.names %in% names(proms_in_tads_wo_DORs)],
# all_Deseqs$log2FoldChange.x[all_Deseqs$Row.names %in% names(proms_in_tads_with_DORs)],
all_Deseqs$log2FoldChange.x[all_Deseqs$Row.names %in% names(proms_in_tads_with_1_DORs)],
all_Deseqs$log2FoldChange.x[all_Deseqs$Row.names %in% names(proms_in_tads_with_many_DORs)],
all_Deseqs$log2FoldChange.x[all_Deseqs$Row.names %in% names(proms_in_tads_with_very_many_DORs)],
notch=TRUE, ylim=c(0,12), col="white",border=colorRampPalette(c("steelblue","green4"))(4),
names=c("0","1","2-3",">3"))
axis(1,lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)
boxplot( all_Deseqs$log2FoldChange.y[all_Deseqs$Row.names %in% names(proms_in_tads_wo_DORs)],
all_Deseqs$log2FoldChange.y[all_Deseqs$Row.names %in% names(proms_in_tads_with_1_DORs)],
all_Deseqs$log2FoldChange.y[all_Deseqs$Row.names %in% names(proms_in_tads_with_many_DORs)],
all_Deseqs$log2FoldChange.y[all_Deseqs$Row.names %in% names(proms_in_tads_with_very_many_DORs)],
notch=TRUE, ylim=c(0,12), col="white",border=colorRampPalette(c("steelblue","green4"))(4),
names=c("0","1","2-3",">3"))
axis(1,lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)
boxplot(split(ele_domains_anno$DORs_gained_Hs_Pt,ele_domains_anno$DORs_anno),ylim=c(0,30),notch=TRUE, col="white",border=colorRampPalette(c("steelblue","green4"))(4),names=c("0","1","2-3",">3"))## Warning in (function (z, notch = FALSE, width = NULL, varwidth = FALSE, : some
## notches went outside hinges ('box'): maybe set notch=FALSE
boxplot(split(ele_domains_anno$DORs_gained_Hs_Mm,ele_domains_anno$DORs_anno),ylim=c(0,30),notch=TRUE, col="white",border=colorRampPalette(c("steelblue","green4"))(4),names=c("0","1","2-3",">3"))DORs_lost_Hs_Pt = hs_atac_for_Deseq2[which(res_HS_NHP$hs_pt_LFC<0 & res_HS_NHP$hs_pt_Padj<0.1)]
DORs_lost_Hs_Mm = hs_atac_for_Deseq2[which(res_HS_NHP$hs_mm_LFC<0 & res_HS_NHP$hs_mm_Padj<0.1)]
DORs_lost_Hs_Pt = DORs_lost_Hs_Pt[-unique(queryHits(findOverlaps(DORs_lost_Hs_Pt,c(promoters_tss_gr,hs_me3,lost_ATAC_gr))))]
DORs_lost_Hs_Mm = DORs_lost_Hs_Mm[-unique(queryHits(findOverlaps(DORs_lost_Hs_Mm,c(promoters_tss_gr,hs_me3,lost_ATAC_gr))))]
ele_domains_anno$DORs_lost_Hs_Pt = countOverlaps(ele_domains$TADs,DORs_lost_Hs_Pt)
ele_domains_anno$DORs_lost_Hs_Mm = countOverlaps(ele_domains$TADs,DORs_lost_Hs_Mm)
ele_domains_anno$DORs_anno = cut(ele_domains_anno$genuine_lost_enhancers,c(-Inf,0,1,3,1000))
###
proms_in_tads_wo_lost_DORs = promoters_HITS_DN[queryHits(findOverlaps(promoters_HITS_DN, ele_domains$TADs[which(ele_domains_anno$dn_prom>0 & ele_domains_anno$genuine_lost_enhancers==0)]))]
proms_in_tads_with_1_lost_DORs = promoters_HITS_DN[queryHits(findOverlaps(promoters_HITS_DN, ele_domains$TADs[which(ele_domains_anno$dn_prom>0 & ele_domains_anno$genuine_lost_enhancers==1)]))]
proms_in_tads_with_many_lost_DORs = promoters_HITS_DN[queryHits(findOverlaps(promoters_HITS_DN, ele_domains$TADs[which(ele_domains_anno$dn_prom>0 & ele_domains_anno$genuine_lost_enhancers>1 & ele_domains_anno$genuine_gained_enhancers<4)]))]
proms_in_tads_with_very_many_lost_DORs = promoters_HITS_DN[queryHits(findOverlaps(promoters_HITS_DN, ele_domains$TADs[which(ele_domains_anno$dn_prom>0 & ele_domains_anno$genuine_lost_enhancers>3)]))]
par(mfrow=c(2,2),mar=c(3,2,1,1))
boxplot( all_Deseqs$log2FoldChange.x[all_Deseqs$Row.names %in% names(proms_in_tads_wo_lost_DORs)],
all_Deseqs$log2FoldChange.x[all_Deseqs$Row.names %in% names(proms_in_tads_with_1_lost_DORs)],
all_Deseqs$log2FoldChange.x[all_Deseqs$Row.names %in% names(proms_in_tads_with_many_lost_DORs)],
all_Deseqs$log2FoldChange.x[all_Deseqs$Row.names %in% names(proms_in_tads_with_very_many_lost_DORs)],
notch=FALSE, ylim=c(-12,2), col="white",border=colorRampPalette(c("black","red"))(4),
names=c("0","1","2-3",">3"))
boxplot( all_Deseqs$log2FoldChange.y[all_Deseqs$Row.names %in% names(proms_in_tads_wo_lost_DORs)],
all_Deseqs$log2FoldChange.y[all_Deseqs$Row.names %in% names(proms_in_tads_with_1_lost_DORs)],
all_Deseqs$log2FoldChange.y[all_Deseqs$Row.names %in% names(proms_in_tads_with_many_lost_DORs)],
all_Deseqs$log2FoldChange.y[all_Deseqs$Row.names %in% names(proms_in_tads_with_very_many_lost_DORs)],
notch=FALSE, ylim=c(-12,2), col="white",border=colorRampPalette(c("black","red"))(4),
names=c("0","1","2-3",">3"))
boxplot(split(ele_domains_anno$DORs_lost_Hs_Pt,ele_domains_anno$DORs_anno),
ylim=c(0,30),notch=TRUE, col="white",border=colorRampPalette(c("black","red"))(4),
names=c("0","1","2-3",">3"))
boxplot(split(ele_domains_anno$DORs_lost_Hs_Mm,ele_domains_anno$DORs_anno),ylim=c(0,30),notch=TRUE,
col="white",border=colorRampPalette(c("black","red"))(4),
names=c("0","1","2-3",">3"))phastCons = readRDS(paste0(objects_directory,'phastCons30way_signal_in_5bp_bins_for_all_ATAC_peaks_500Kb_around_summit.Rds'))
par(mfrow=c(1,1),mar=c(5,5,1,1))
plot( seq(-500,500,length.out=200),
colMeans(phastCons[rownames(phastCons) %in% names(conserved_enhancers),]),
ylab="PhastCons",
ty="l",col="black",lwd=3,ylim=c(0.0,0.4),xlim=c(-500,500),
xlab="distance from ATAC-seq peak summit" )
lines( seq(-500,500,length.out=200),
colMeans(phastCons[rownames(phastCons) %in% names(enhancers_linked_with_activation),]),ty="l",col="turquoise4",lwd=3 )
lines( seq(-500,500,length.out=200),
colMeans(phastCons[rownames(phastCons) %in% names(enhancers_not_linked_with_activation),]),ty="l",col="gray80",lwd=3 )
lines( seq(-500,500,length.out=200),
colMeans(phastCons[rownames(phastCons) %in% names(genuine_lost_enhancers_gr),]),ty="l",col="red",lwd=3 )
axis(1,lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)
abline(v=0,lwd=2,lty=2,col="gray")t.test(phastCons[rownames(phastCons) %in% names(conserved_enhancers),95],
phastCons[rownames(phastCons) %in% names(enhancers_linked_with_activation),95])##
## Welch Two Sample t-test
##
## data: phastCons[rownames(phastCons) %in% names(conserved_enhancers), 95] and phastCons[rownames(phastCons) %in% names(enhancers_linked_with_activation), 95]
## t = 18.66, df = 3400.3, p-value < 0.00000000000000022
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1738891 0.2147229
## sample estimates:
## mean of x mean of y
## 0.3625148 0.1682088
t.test(phastCons[rownames(phastCons) %in% names(enhancers_not_linked_with_activation),95],
phastCons[rownames(phastCons) %in% names(enhancers_linked_with_activation),95])##
## Welch Two Sample t-test
##
## data: phastCons[rownames(phastCons) %in% names(enhancers_not_linked_with_activation), 95] and phastCons[rownames(phastCons) %in% names(enhancers_linked_with_activation), 95]
## t = 4.7247, df = 3272.1, p-value = 0.000002401
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.02917625 0.07056904
## sample estimates:
## mean of x mean of y
## 0.2180814 0.1682088
DOR_Deseq2 = merge(as.data.frame(res_HS_PT),
as.data.frame(res_HS_MM),
by="row.names")
## all putative enhancers
seqlevelsStyle(hs_atac) = "UCSC"
all_primate_enhancers = hs_atac[ -unique( queryHits(findOverlaps(hs_atac,c(promoters_tss_gr,hs_me3)))) ]
changed_enhancers_ATAC_signal_change = DOR_Deseq2[DOR_Deseq2$Row.names %in% names(all_primate_enhancers),]
changed_enhancers_ATAC_signal_change = changed_enhancers_ATAC_signal_change[ !is.na(changed_enhancers_ATAC_signal_change$padj.x),]
changed_enhancers_ATAC_signal_change = changed_enhancers_ATAC_signal_change[ !is.na(changed_enhancers_ATAC_signal_change$padj.y),]
changed_enhancers_ATAC_signal_change = changed_enhancers_ATAC_signal_change[ changed_enhancers_ATAC_signal_change$padj.x<sqrt(0.1) | changed_enhancers_ATAC_signal_change$padj.y<sqrt(0.1), ]
changed_enhancers_ATAC_signal_change = changed_enhancers_ATAC_signal_change[! changed_enhancers_ATAC_signal_change$log2FoldChange.x==changed_enhancers_ATAC_signal_change$log2FoldChange.y,]
par(mfrow=c(1,1),mar=c(4,4,1,1))
boxplot(abs(changed_enhancers_ATAC_signal_change$log2FoldChange.x),
abs(changed_enhancers_ATAC_signal_change$log2FoldChange.y),
outline=FALSE, ylab=expression("Human/NHP [log"[2]*")]"),
names=c("Hs vs. Pt","Hs vs. Mm"),
col="white",border=c("red","blue"),lwd=2,ylim=c(0,7))
axis(1,lwd=2, at=c(1,2),labels=c("Hs vs. Pt","Hs vs. Mm"))
axis(2,lwd=2)
box(col="black",lwd=2)par(mfrow=c(1,1),pty='s')
heatscatter( changed_enhancers_ATAC_signal_change$log2FoldChange.x,
changed_enhancers_ATAC_signal_change$log2FoldChange.y,
colpal="blues",cex=0.5,
ylab=expression("Human/chimp [log"[2]*")]"),
xlab=expression("Human/macaque [log"[2]*")]"),
ylim=c(-10,10),
xlim=c(-10,10))
axis(1,lwd=2)
axis(2,lwd=2)Regulomes are less correlated than transcriptomes
cor.test(res_HS_NHP$hs_pt_LFC[res_HS_NHP$hs_pt_Padj<0.1 & res_HS_NHP$hs_mm_Padj<0.1],
res_HS_NHP$hs_mm_LFC[res_HS_NHP$hs_pt_Padj<0.1 & res_HS_NHP$hs_mm_Padj<0.1] )##
## Pearson's product-moment correlation
##
## data: res_HS_NHP$hs_pt_LFC[res_HS_NHP$hs_pt_Padj < 0.1 & res_HS_NHP$hs_mm_Padj < 0.1] and res_HS_NHP$hs_mm_LFC[res_HS_NHP$hs_pt_Padj < 0.1 & res_HS_NHP$hs_mm_Padj < 0.1]
## t = 241.55, df = 17201, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8753683 0.8821743
## sample estimates:
## cor
## 0.878816
cor.test(all_Deseqs$log2FoldChange.x[all_Deseqs$padj.x<0.01 & all_Deseqs$pvalue.y<0.01],
all_Deseqs$log2FoldChange.y[all_Deseqs$padj.x<0.01 & all_Deseqs$pvalue.y<0.01] )##
## Pearson's product-moment correlation
##
## data: all_Deseqs$log2FoldChange.x[all_Deseqs$padj.x < 0.01 & all_Deseqs$pvalue.y < 0.01] and all_Deseqs$log2FoldChange.y[all_Deseqs$padj.x < 0.01 & all_Deseqs$pvalue.y < 0.01]
## t = 49.93, df = 1680, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7529456 0.7914822
## sample estimates:
## cor
## 0.7729257
expressionRNA = cor.test(log_fold_dat$HSvPT_lfc_shrunk,log_fold_dat$HSvMM_lfc, conf.level = 0.99)
regulomeATAC = cor.test(changed_enhancers_ATAC_signal_change$log2FoldChange.x,
changed_enhancers_ATAC_signal_change$log2FoldChange.y, conf.level = 0.99)
par(pty="m")
barplot( c(expressionRNA$estimate,
regulomeATAC$estimate),
col=c('green4','steelblue'),ylim=c(0,1),
names=c("gene expression","ATAC"))
segments(0.7,expressionRNA$conf.int[[1]],0.7,expressionRNA$conf.int[[2]])
segments(1.9,regulomeATAC$conf.int[[1]],1.9,regulomeATAC$conf.int[[2]])
axis(2,lwd=2)linked_500 = GenomicRanges::resize(enhancers_linked_with_activation,1000000,fix="center")
not_linked_500 = GenomicRanges::resize(enhancers_not_linked_with_activation,1000000,fix="center")
sum(countOverlaps(linked_500,promoters_filtered_gr)>0)## [1] 1443
sum(countOverlaps(not_linked_500,promoters_filtered_gr)>0)## [1] 2675
Enhancers that do something are more open
atac_hs_bw = import.bw(paste0(outputs_directory,"ATAC_Seq_12-22_HomSap_i-Astro_ELE10-30_merged_hg38_RPGC.bw"))
k27_hs_bw = import.bw(paste0(outputs_directory,"ChIP_Seq_H3K27ac_12-22_HomSap_i-Astro_ELE10-30_merged_hg38_RPGC.bw"))
seqlevelsStyle(atac_hs_bw) = "ucsc"
seqlevelsStyle(k27_hs_bw) = "ucsc"
linked_GR = import.bed(paste0(outputs_directory,"enhancers_linked_with_activation.bed"))
names(linked_GR) = linked_GR$name
not_linked_GR = import.bed(paste0(outputs_directory,"enhancers_not_linked_with_activation.bed"))
names(not_linked_GR) = not_linked_GR$name
linked_GR_AP = GetAPRangesForGenomicRangesObject(linked_GR)
not_linked_GR_AP = GetAPRangesForGenomicRangesObject(not_linked_GR)
linked_atac_hs = getSignalInBins( linked_GR_AP, atac_hs_bw, 1 )
linked_k27_hs = getSignalInBins( linked_GR_AP, k27_hs_bw, 1 )
not_linked_atac_hs = getSignalInBins( not_linked_GR_AP, atac_hs_bw, 1 )
not_linked_k27_hs = getSignalInBins( not_linked_GR_AP, k27_hs_bw, 1 )
par(mfrow=c(1,2),mar=c(5,5,1,1),pty="m")
plot(seq(-1000,1000,length.out=200),
colMeans(linked_atac_hs),col="turquoise4",ty='l',lwd=2,
xlab="Distance from the DOR summit",ylab="ATAC-seq signal (RPGC)")
lines(seq(-1000,1000,length.out=200),
colMeans(not_linked_atac_hs),col="gray",lwd=2)
axis(1,lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)
plot(seq(-1000,1000,length.out=200),
colMeans(linked_k27_hs),col="turquoise4",ty='l',lwd=2,
xlab="Distance from the DOR summit",ylab="H3K27ac ChIP-seq signal (RPGC)",ylim=c(0,6))
lines(seq(-1000,1000,length.out=200),
colMeans(not_linked_k27_hs),col="gray",lwd=2)
axis(1,lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)Names of all the TFs in Hocomoco database.
AllTFs = c(list.files('~/human_beds/A'),
unlist( lapply( as.list(paste0("~/human_beds/A-kopia",c('',2,3,4,5,6))), function(x){list.files(x)} ) ) )
TFs = unique( do.call('c', lapply(strsplit(AllTFs,'.bed'),function(el){el[[1]]})) )
TFsEnsemblG = read.delim( file=paste0(outputs_directory,'TFsymbol_fixed.txt'),as.is=TRUE )
TFsEnsemblG$eig = genemapu$ensembl_gene_id[match(TFsEnsemblG$Fixed,genemapu$hgnc_symbol)]
TFsEnsemblG$names = AllTFs
TFsEnsemblG$names2 = unlist(strsplit(AllTFs,".bed"))
TFsEnsemblG$names3 = paste0( unlist(strsplit(AllTFs,".bed")), "_HG38.bed" )
save(TFsEnsemblG,file=paste0(objects_directory,"TFsEnsemblG.RData"))Load objects
human_stripe_factors = read.delim(paste0(outputs_directory,"human_stripe_factors.txt"),header=FALSE,as.is=TRUE)
load(paste0(objects_directory,"TFsEnsemblG.RData"))
load( paste0(objects_directory,"enhancers_functional_groups.RData"))
load( paste0(objects_directory,"conserved_enhancers.RData" ))
export.bed( conserved_enhancers, con=paste0(outputs_directory,"conserved_enhancers.bed" ))
genuine_gained_enhancers_gr = import.bed(paste0(outputs_directory,"genuine_gained_enhancers_ucsc.bed"))
load(paste0(objects_directory,"lost_enhancers_functional_groups.RData"))
genuine_lost_enhancers_that_do_something = import.bed(paste0(outputs_directory,"lost_enhancers_linked_with_activation.bed"))
genuine_lost_enhancers_that_do_nothing = import.bed(paste0(outputs_directory,"lost_enhancers_not_linked_with_activation.bed"))
enhancers_linked_with_activation_TADs = import.bed(paste0(outputs_directory,"enhancers_linked_with_activation_TADs.bed"))
enhancers_not_linked_with_activation_TADs = import.bed(paste0(outputs_directory,"enhancers_not_linked_with_activation_TADs.bed"))
names(enhancers_linked_with_activation_TADs) = enhancers_linked_with_activation_TADs$name
names(enhancers_not_linked_with_activation_TADs) = enhancers_not_linked_with_activation_TADs$nameAlign the chosen enhancer groups to chimp
cd ~/Documents/Tools/
## human to chimp
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/hg38ToPanTro6.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_PanTro.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_PanTro.unmapped.file
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/hg38ToPanTro6.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_PanTro.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_PanTro.unmapped.file
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_TADs.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/hg38ToPanTro6.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_TADs_PanTro.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_TADs_PanTro.unmapped.file
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_TADs.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/hg38ToPanTro6.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_TADs_PanTro.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_TADs_PanTro.unmapped.file
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/hg38ToPanTro6.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_PanTro.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_PanTro.unmapped.file
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_linked_with_activation.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/hg38ToPanTro6.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_linked_with_activation_PanTro.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_linked_with_activation_PanTro.unmapped.file
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_not_linked_with_activation.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/hg38ToPanTro6.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_not_linked_with_activation_PanTro.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_not_linked_with_activation_PanTro.unmapped.fileGet sequences for the enhancers
enhancers_linked_with_activation_pt = readBed_filterChroms(paste0(outputs_directory,'enhancers_linked_with_activation_PanTro.bed'),
chroms=paste0('chr',c(1,'2A','2B', 3:22,'X')),4)
enhancers_not_linked_with_activation_pt = readBed_filterChroms(paste0(outputs_directory,'enhancers_not_linked_with_activation_PanTro.bed'),
chroms=paste0('chr',c(1,'2A','2B', 3:22,'X')),4)
enhancers_linked_with_activation_TADs_pt = readBed_filterChroms(paste0(outputs_directory,'enhancers_linked_with_activation_TADs_PanTro.bed'),
chroms=paste0('chr',c(1,'2A','2B', 3:22,'X')),4)
enhancers_not_linked_with_activation_TADs_pt = readBed_filterChroms(paste0(outputs_directory,'enhancers_not_linked_with_activation_TADs_PanTro.bed'),
chroms=paste0('chr',c(1,'2A','2B', 3:22,'X')),4)
conserved_enhancers_pt = readBed_filterChroms(paste0(outputs_directory,'conserved_enhancers_PanTro.bed'),
chroms=paste0('chr',c(1,'2A','2B', 3:22,'X')),4)
lost_linked_pt = readBed_filterChroms(paste0(outputs_directory,'lost_enhancers_linked_with_activation_PanTro.bed'),
chroms=paste0('chr',c(1,'2A','2B', 3:22,'X')),4)
lost_not_linked_pt = readBed_filterChroms(paste0(outputs_directory,'lost_enhancers_not_linked_with_activation_PanTro.bed'),
chroms=paste0('chr',c(1,'2A','2B', 3:22,'X')),4)
## checks
enhancers_linked_with_activation = enhancers_linked_with_activation[match(names(enhancers_linked_with_activation_pt),names(enhancers_linked_with_activation))]
all(names(enhancers_linked_with_activation_pt)==names(enhancers_linked_with_activation))## [1] TRUE
all(names(enhancers_not_linked_with_activation_pt)==names(enhancers_not_linked_with_activation))## [1] TRUE
genuine_lost_enhancers_that_do_something = genuine_lost_enhancers_that_do_something[match(names(lost_linked_pt),genuine_lost_enhancers_that_do_something$name)]
genuine_lost_enhancers_that_do_nothing = genuine_lost_enhancers_that_do_nothing[match(names(lost_not_linked_pt),genuine_lost_enhancers_that_do_nothing$name)]
all(genuine_lost_enhancers_that_do_something$name==names(lost_linked_pt))## [1] TRUE
all(genuine_lost_enhancers_that_do_nothing$name==names(lost_not_linked_pt))## [1] TRUE
names(genuine_lost_enhancers_that_do_something) = genuine_lost_enhancers_that_do_something$name
names(genuine_lost_enhancers_that_do_nothing) = genuine_lost_enhancers_that_do_nothing$name
enhancers_linked_with_activation_TADs = enhancers_linked_with_activation_TADs[match(names(enhancers_linked_with_activation_TADs_pt),names(enhancers_linked_with_activation_TADs))]
enhancers_not_linked_with_activation_TADs = enhancers_not_linked_with_activation_TADs[match(names(enhancers_not_linked_with_activation_TADs_pt),names(enhancers_not_linked_with_activation_TADs))]
all(names(enhancers_linked_with_activation_TADs)==names(enhancers_linked_with_activation_TADs_pt))## [1] TRUE
all(names(enhancers_not_linked_with_activation_TADs)==names(enhancers_not_linked_with_activation_TADs_pt))## [1] TRUE
enhancers_linked_with_activation_seq_Hs = Biostrings::getSeq(BSgenome.Hsapiens.UCSC.hg38,enhancers_linked_with_activation)
enhancers_linked_with_activation_seq_Pt = Biostrings::getSeq(BSgenome.Ptroglodytes.UCSC.panTro6,enhancers_linked_with_activation_pt)
enhancers_not_linked_with_activation_seq_Hs = Biostrings::getSeq(BSgenome.Hsapiens.UCSC.hg38,enhancers_not_linked_with_activation)
enhancers_not_linked_with_activation_seq_Pt = Biostrings::getSeq(BSgenome.Ptroglodytes.UCSC.panTro6,enhancers_not_linked_with_activation_pt)
enhancers_linked_with_activation_TADs_seq_Hs = Biostrings::getSeq(BSgenome.Hsapiens.UCSC.hg38,enhancers_linked_with_activation_TADs)
enhancers_linked_with_activation_TADs_seq_Pt = Biostrings::getSeq(BSgenome.Ptroglodytes.UCSC.panTro6,enhancers_linked_with_activation_TADs_pt)
enhancers_not_linked_with_activation_TADs_seq_Hs = Biostrings::getSeq(BSgenome.Hsapiens.UCSC.hg38,enhancers_not_linked_with_activation_TADs)
enhancers_not_linked_with_activation_TADs_seq_Pt = Biostrings::getSeq(BSgenome.Ptroglodytes.UCSC.panTro6,enhancers_not_linked_with_activation_TADs_pt)
conserved_enhancers_seq_Hs = Biostrings::getSeq(BSgenome.Hsapiens.UCSC.hg38,conserved_enhancers)
conserved_enhancers_seq_Pt = Biostrings::getSeq(BSgenome.Ptroglodytes.UCSC.panTro6,conserved_enhancers_pt)
lost_enhancers_linked_seq_Hs = Biostrings::getSeq(BSgenome.Hsapiens.UCSC.hg38,genuine_lost_enhancers_that_do_something)
lost_enhancers_linked_seq_Pt = Biostrings::getSeq(BSgenome.Ptroglodytes.UCSC.panTro6,lost_linked_pt)
lost_enhancers_not_linked_seq_Hs = Biostrings::getSeq(BSgenome.Hsapiens.UCSC.hg38,genuine_lost_enhancers_that_do_nothing)
lost_enhancers_not_linked_seq_Pt = Biostrings::getSeq(BSgenome.Ptroglodytes.UCSC.panTro6,lost_not_linked_pt)Now let’s find the evolutionary mismatches between sequences. We compare human to chimp sequences.
test = mclapply( as.list(names(enhancers_linked_with_activation)),
function(enh){ Figure_out_mismatching_sequences( enhancers_linked_with_activation[which(names(enhancers_linked_with_activation)==enh)],
enhancers_linked_with_activation_seq_Hs[which(names(enhancers_linked_with_activation_seq_Hs)==enh)],
enhancers_linked_with_activation_pt[which(names(enhancers_linked_with_activation_pt)==enh)],
enhancers_linked_with_activation_seq_Pt[which(names(enhancers_linked_with_activation_seq_Pt)==enh)]) },
mc.cores = 4L )
enhancers_linked_with_activation_hs_vs_Pt = do.call("rbind",test)
save(enhancers_linked_with_activation_hs_vs_Pt,
file=paste0(objects_directory,"enhancers_linked_with_activation_hs_vs_Pt.RData") )
enhancers_linked_with_activation_hs_vs_Pt_gr = GRanges( seqnames=Rle(enhancers_linked_with_activation_hs_vs_Pt$seqnames),
ranges = IRanges( enhancers_linked_with_activation_hs_vs_Pt$start,
end=enhancers_linked_with_activation_hs_vs_Pt$end ),
kind=enhancers_linked_with_activation_hs_vs_Pt$type)
names(enhancers_linked_with_activation_hs_vs_Pt_gr) = enhancers_linked_with_activation_hs_vs_Pt$names
export.bed( enhancers_linked_with_activation_hs_vs_Pt_gr, con=paste0(outputs_directory,"enhancers_linked_with_activation_hs_vs_Pt_ucsc.bed"))
seqlevelsStyle(enhancers_linked_with_activation_hs_vs_Pt_gr) = "ncbi"
export.gff( enhancers_linked_with_activation_hs_vs_Pt_gr, paste0(outputs_directory,"enhancers_linked_with_activation_hs_vs_Pt.gtf"))
export.bed( enhancers_linked_with_activation_hs_vs_Pt_gr, paste0(outputs_directory,"enhancers_linked_with_activation_hs_vs_Pt.bed"))Intersect with bedtools
cd ~/Documents/Tools/bedtools2/
## ------------
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation/hs_specieis_enhancers_A.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation/hs_specieis_enhancers_A1.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia2/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation/hs_specieis_enhancers_A2.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia3/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation/hs_specieis_enhancers_A3.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia4/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation/hs_specieis_enhancers_A4.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia5/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation/hs_specieis_enhancers_A5.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia6/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation/hs_specieis_enhancers_A6.bedIntersect with chimp TFBS lifted over to the Hg38 genome assembly
cd ~/Documents/Tools/bedtools2/
## ------------
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TFBS_chimp/hs_specieis_enhancers_A.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TFBS_chimp/hs_specieis_enhancers_A1.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia2/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TFBS_chimp/hs_specieis_enhancers_A2.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia3/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TFBS_chimp/hs_specieis_enhancers_A3.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia4/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TFBS_chimp/hs_specieis_enhancers_A4.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia5/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TFBS_chimp/hs_specieis_enhancers_A5.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia6/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TFBS_chimp/hs_specieis_enhancers_A6.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia7/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TFBS_chimp/hs_specieis_enhancers_A7.bednorole = mclapply( as.list(names(enhancers_not_linked_with_activation)),
function(enh){ Figure_out_mismatching_sequences( enhancers_not_linked_with_activation[which(names(enhancers_not_linked_with_activation)==enh)],
enhancers_not_linked_with_activation_seq_Hs[which(names(enhancers_not_linked_with_activation_seq_Hs)==enh)],
enhancers_not_linked_with_activation_pt[which(names(enhancers_not_linked_with_activation_pt)==enh)],
enhancers_not_linked_with_activation_seq_Pt[which(names(enhancers_not_linked_with_activation_seq_Pt)==enh)]) },
mc.cores = 4L )
enhancers_not_linked_with_activation_hs_vs_Pt = do.call("rbind",norole)
save(enhancers_not_linked_with_activation_hs_vs_Pt,
file=paste0(objects_directory,"enhancers_not_linked_with_activation_hs_vs_Pt.RData"))
enhancers_not_linked_with_activation_hs_vs_Pt_gr = GRanges( seqnames=Rle(enhancers_not_linked_with_activation_hs_vs_Pt$seqnames),
ranges = IRanges( enhancers_not_linked_with_activation_hs_vs_Pt$start,
end=enhancers_not_linked_with_activation_hs_vs_Pt$end ),
kind=enhancers_not_linked_with_activation_hs_vs_Pt$type)
names(enhancers_not_linked_with_activation_hs_vs_Pt_gr) = enhancers_not_linked_with_activation_hs_vs_Pt$names
seqlevelsStyle(enhancers_not_linked_with_activation_hs_vs_Pt_gr) = "ncbi"
export.gff( enhancers_not_linked_with_activation_hs_vs_Pt_gr, paste0(outputs_directory,"enhancers_not_linked_with_activation_hs_vs_Pt.gtf"))
export.bed( enhancers_not_linked_with_activation_hs_vs_Pt_gr, paste0(outputs_directory,"enhancers_not_linked_with_activation_hs_vs_Pt.bed"))Intersect with bedtools
cd ~/Documents/Tools/bedtools2/
## ------------
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation/hs_specieis_enhancers_A.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation/hs_specieis_enhancers_A1.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia2/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation/hs_specieis_enhancers_A2.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia3/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation/hs_specieis_enhancers_A3.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia4/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation/hs_specieis_enhancers_A4.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia5/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation/hs_specieis_enhancers_A5.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia6/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation/hs_specieis_enhancers_A6.bedNow let’s find the evolutionary mismatches between sequences. We compare human to chimp sequences.
TADs_enh_linked = mclapply( as.list(names(enhancers_linked_with_activation_TADs)),
function(enh){ Figure_out_mismatching_sequences( enhancers_linked_with_activation_TADs[which(names(enhancers_linked_with_activation_TADs)==enh)],
enhancers_linked_with_activation_TADs_seq_Hs[which(names(enhancers_linked_with_activation_TADs_seq_Hs)==enh)],
enhancers_linked_with_activation_TADs_pt[which(names(enhancers_linked_with_activation_TADs_pt)==enh)],
enhancers_linked_with_activation_TADs_seq_Pt[which(names(enhancers_linked_with_activation_TADs_seq_Pt)==enh)]) },
mc.cores = 4L )
enhancers_linked_with_activation_TADs_hs_vs_Pt = do.call("rbind",TADs_enh_linked)
save(enhancers_linked_with_activation_TADs_hs_vs_Pt,
file=paste0(objects_directory,"eenhancers_linked_with_activation_TADs_hs_vs_Pt.RData") )
enhancers_linked_with_activation_TADs_hs_vs_Pt_gr = GRanges( seqnames=Rle(enhancers_linked_with_activation_TADs_hs_vs_Pt$seqnames),
ranges = IRanges( enhancers_linked_with_activation_TADs_hs_vs_Pt$start,
end=enhancers_linked_with_activation_TADs_hs_vs_Pt$end ),
kind=enhancers_linked_with_activation_TADs_hs_vs_Pt$type)
names(enhancers_linked_with_activation_TADs_hs_vs_Pt_gr) = enhancers_linked_with_activation_TADs_hs_vs_Pt$names
export.bed( enhancers_linked_with_activation_TADs_hs_vs_Pt_gr, con=paste0(outputs_directory,"enhancers_linked_with_activation_TADs_hs_vs_Pt_ucsc.bed"))
seqlevelsStyle(enhancers_linked_with_activation_TADs_hs_vs_Pt_gr) = "ncbi"
export.gff( enhancers_linked_with_activation_TADs_hs_vs_Pt_gr, paste0(outputs_directory,"enhancers_linked_with_activation_TADs_hs_vs_Pt.gtf"))
export.bed( enhancers_linked_with_activation_TADs_hs_vs_Pt_gr, paste0(outputs_directory,"enhancers_linked_with_activation_TADs_hs_vs_Pt.bed"))Intersect with bedtools
cd ~/Documents/Tools/bedtools2/
## ------------
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TADs/hs_specieis_enhancers_A.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TADs/hs_specieis_enhancers_A1.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia2/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TADs/hs_specieis_enhancers_A2.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia3/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TADs/hs_specieis_enhancers_A3.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia4/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TADs/hs_specieis_enhancers_A4.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia5/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TADs/hs_specieis_enhancers_A5.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia6/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TADs/hs_specieis_enhancers_A6.bedTADs_enh__not_linked = mclapply( as.list(names(enhancers_not_linked_with_activation_TADs)),
function(enh){ Figure_out_mismatching_sequences( enhancers_not_linked_with_activation_TADs[which(names(enhancers_not_linked_with_activation_TADs)==enh)],
enhancers_not_linked_with_activation_TADs_seq_Hs[which(names(enhancers_not_linked_with_activation_TADs_seq_Hs)==enh)],
enhancers_not_linked_with_activation_TADs_pt[which(names(enhancers_not_linked_with_activation_TADs_pt)==enh)],
enhancers_not_linked_with_activation_TADs_seq_Pt[which(names(enhancers_not_linked_with_activation_TADs_seq_Pt)==enh)]) },
mc.cores = 4L )
enhancers_not_linked_with_activation_TADs_hs_vs_Pt = do.call("rbind",TADs_enh__not_linked)
save(enhancers_not_linked_with_activation_TADs_hs_vs_Pt,
file=paste0(objects_directory,"enhancers_not_linked_with_activation_TADs_hs_vs_Pt.RData"))
enhancers_not_linked_with_activation_TADs_hs_vs_Pt_gr = GRanges( seqnames=Rle(enhancers_not_linked_with_activation_TADs_hs_vs_Pt$seqnames),
ranges = IRanges( enhancers_not_linked_with_activation_TADs_hs_vs_Pt$start,
end=enhancers_not_linked_with_activation_TADs_hs_vs_Pt$end ),
kind=enhancers_not_linked_with_activation_TADs_hs_vs_Pt$type)
names(enhancers_not_linked_with_activation_TADs_hs_vs_Pt_gr) = enhancers_not_linked_with_activation_TADs_hs_vs_Pt$names
seqlevelsStyle(enhancers_not_linked_with_activation_TADs_hs_vs_Pt_gr) = "ncbi"
export.gff( enhancers_not_linked_with_activation_TADs_hs_vs_Pt_gr, paste0(outputs_directory,"enhancers_not_linked_with_activation_TADs_hs_vs_Pt.gtf"))
export.bed( enhancers_not_linked_with_activation_TADs_hs_vs_Pt_gr, paste0(outputs_directory,"enhancers_not_linked_with_activation_TADs_hs_vs_Pt.bed"))Intersect with bedtools.
cd ~/Documents/Tools/bedtools2/
## ------------
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation_TADs/hs_specieis_enhancers_A.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation_TADs/hs_specieis_enhancers_A1.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia2/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation_TADs/hs_specieis_enhancers_A2.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia3/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation_TADs/hs_specieis_enhancers_A3.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia4/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation_TADs/hs_specieis_enhancers_A4.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia5/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation_TADs/hs_specieis_enhancers_A5.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia6/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation_TADs/hs_specieis_enhancers_A6.bedcons = mclapply( as.list(names(conserved_enhancers)),
function(enh){ Figure_out_mismatching_sequences( conserved_enhancers[which(names(conserved_enhancers)==enh)],
conserved_enhancers_seq_Hs[which(names(conserved_enhancers_seq_Hs)==enh)],
conserved_enhancers_pt[which(names(conserved_enhancers_pt)==enh)],
conserved_enhancers_seq_Pt[which(names(conserved_enhancers_seq_Pt)==enh)]) },
mc.cores = 4L )
conserved_enhancers_hs_vs_Pt = do.call("rbind",cons)
save(conserved_enhancers_hs_vs_Pt,
file=paste0(objects_directory,"conserved_enhancers_hs_vs_Pt.RData"))
conserved_enhancers_hs_vs_Pt_gr = GRanges( seqnames=Rle(conserved_enhancers_hs_vs_Pt$seqnames),
ranges = IRanges( conserved_enhancers_hs_vs_Pt$start,
end=conserved_enhancers_hs_vs_Pt$end ),
kind=conserved_enhancers_hs_vs_Pt$type)
names(conserved_enhancers_hs_vs_Pt_gr) = conserved_enhancers_hs_vs_Pt$names
export.bed( conserved_enhancers_hs_vs_Pt_gr, con=paste0(outputs_directory,"conserved_enhancers_hs_vs_Pt_ucsc.bed"))
seqlevelsStyle(conserved_enhancers_hs_vs_Pt_gr) = "ncbi"
export.gff( conserved_enhancers_hs_vs_Pt_gr, con=paste0(outputs_directory,"conserved_enhancers_hs_vs_Pt.gtf"))
export.bed( conserved_enhancers_hs_vs_Pt_gr, con=paste0(outputs_directory,"conserved_enhancers_hs_vs_Pt.bed"))Intersect with bedtools.
cd ~/Documents/Tools/bedtools2/
## ------------
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Hs/hs_specieis_enhancers_A.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Hs/hs_specieis_enhancers_A1.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia2/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Hs/hs_specieis_enhancers_A2.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia3/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Hs/hs_specieis_enhancers_A3.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia4/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Hs/hs_specieis_enhancers_A4.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia5/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Hs/hs_specieis_enhancers_A5.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia6/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Hs/hs_specieis_enhancers_A6.bedIntersect with chimp TFBS lifted over to the Hg38 genome assembly
cd ~/Tools/bedtools2/
## ------------
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Pt/hs_specieis_enhancers_A.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Pt/hs_specieis_enhancers_A1.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia2/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Pt/hs_specieis_enhancers_A2.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia3/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Pt/hs_specieis_enhancers_A3.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia4/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Pt/hs_specieis_enhancers_A4.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia5/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Pt/hs_specieis_enhancers_A5.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia6/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Pt/hs_specieis_enhancers_A6.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia7/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Pt/hs_specieis_enhancers_A7.bedlostE = mclapply( as.list(names(genuine_lost_enhancers_that_do_something)),
function(enh){ Figure_out_mismatching_sequences( genuine_lost_enhancers_that_do_something[which(names(genuine_lost_enhancers_that_do_something)==enh)],
lost_enhancers_linked_seq_Hs[which(names(lost_enhancers_linked_seq_Hs)==enh)],
lost_linked_pt[which(names(lost_linked_pt)==enh)],
lost_enhancers_linked_seq_Pt[which(names(lost_enhancers_linked_seq_Pt)==enh)]) },
mc.cores = 4L )
lost_enhancers_linked_hs_vs_Pt = do.call("rbind",lostE)
save(lost_enhancers_linked_hs_vs_Pt,
file=paste0(objects_directory,"lost_enhancers_linked_hs_vs_Pt.RData"))
lost_enhancers_linked_hs_vs_Pt_gr = GRanges( seqnames=Rle(lost_enhancers_linked_hs_vs_Pt$seqnames),
ranges = IRanges( lost_enhancers_linked_hs_vs_Pt$start,
end=lost_enhancers_linked_hs_vs_Pt$end ),
kind=lost_enhancers_linked_hs_vs_Pt$type)
names(lost_enhancers_linked_hs_vs_Pt_gr) = lost_enhancers_linked_hs_vs_Pt$names
export.bed( lost_enhancers_linked_hs_vs_Pt_gr, con=paste0(outputs_directory,"lost_enhancers_linked_hs_vs_Pt_gr_ucsc.bed"))
seqlevelsStyle(lost_enhancers_linked_hs_vs_Pt_gr) = "ncbi"
export.gff( lost_enhancers_linked_hs_vs_Pt_gr, con=paste0(outputs_directory,"lost_enhancers_linked_hs_vs_Pt_gr.gtf"))
export.bed( lost_enhancers_linked_hs_vs_Pt_gr, con=paste0(outputs_directory,"lost_enhancers_linked_hs_vs_Pt_gr.bed"))Intersect the positions of mismatches with TFBS inferred for Chimp and lifted over to human.
cd ~/Documents/Tools/bedtools2/
## ------------
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_linked_pt/hs_specieis_enhancers_A.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_linked_pt/hs_specieis_enhancers_A1.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia2/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_linked_pt/hs_specieis_enhancers_A2.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia3/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_linked_pt/hs_specieis_enhancers_A3.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia4/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_linked_pt/hs_specieis_enhancers_A4.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia5/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_linked_pt/hs_specieis_enhancers_A5.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia6/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_linked_pt/hs_specieis_enhancers_A6.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia7/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_linked_pt/hs_specieis_enhancers_A7.bedlostI = mclapply( as.list(names(genuine_lost_enhancers_that_do_nothing)),
function(enh){ Figure_out_mismatching_sequences( genuine_lost_enhancers_that_do_nothing[which(names(genuine_lost_enhancers_that_do_nothing)==enh)],
lost_enhancers_not_linked_seq_Hs[which(names(lost_enhancers_not_linked_seq_Hs)==enh)],
lost_not_linked_pt[which(names(lost_not_linked_pt)==enh)],
lost_enhancers_not_linked_seq_Pt[which(names(lost_enhancers_not_linked_seq_Pt)==enh)]) },
mc.cores = 4L )
lost_enhancers_not_linked_hs_vs_Pt = do.call("rbind",lostI)
save(lost_enhancers_not_linked_hs_vs_Pt,
file=paste0(outputs_directory,"lost_enhancers_not_linked_hs_vs_Pt.RData"))
lost_enhancers_not_linked_hs_vs_Pt_gr = GRanges( seqnames=Rle(lost_enhancers_not_linked_hs_vs_Pt$seqnames),
ranges = IRanges( lost_enhancers_not_linked_hs_vs_Pt$start,
end=lost_enhancers_not_linked_hs_vs_Pt$end ),
kind=lost_enhancers_not_linked_hs_vs_Pt$type)
names(lost_enhancers_not_linked_hs_vs_Pt_gr) = lost_enhancers_not_linked_hs_vs_Pt$names
export.bed( lost_enhancers_not_linked_hs_vs_Pt_gr, con=paste0(outputs_directory,"lost_enhancers_not_linked_hs_vs_Pt_gr_UCSC.bed"))
seqlevelsStyle(lost_enhancers_not_linked_hs_vs_Pt_gr) = "ncbi"
export.gff( lost_enhancers_not_linked_hs_vs_Pt_gr, con=paste0(outputs_directory,"lost_enhancers_not_linked_hs_vs_Pt_gr.gtf"))
export.bed( lost_enhancers_not_linked_hs_vs_Pt_gr, con=paste0(outputs_directory,"lost_enhancers_not_linked_hs_vs_Pt_gr.bed"))Again, intersect the positions of mismatches with TFBS inferred for Chimp and lifted over to human.
cd ~/Documents/Tools/bedtools2/
## ------------
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_not_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_not_linked_pt/hs_specieis_enhancers_A.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_not_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_not_linked_pt/hs_specieis_enhancers_A1.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_not_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia2/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_not_linked_pt/hs_specieis_enhancers_A2.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_not_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia3/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_not_linked_pt/hs_specieis_enhancers_A3.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_not_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia4/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_not_linked_pt/hs_specieis_enhancers_A4.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_not_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia5/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_not_linked_pt/hs_specieis_enhancers_A5.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_not_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia6/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_not_linked_pt/hs_specieis_enhancers_A6.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_not_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia7/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_not_linked_pt/hs_specieis_enhancers_A7.bedconserved_enhancers_hs_vs_Pt = import.bed(paste0(outputs_directory,"conserved_enhancers_hs_vs_Pt_ucsc.bed"))
conserved_enhancers = import.bed(paste0(outputs_directory,"conserved_enhancers.bed"))
names(conserved_enhancers) = conserved_enhancers$name
conserved_enhancers = conserved_enhancers[order(width(conserved_enhancers))]
conserved_enhancers = conserved_enhancers[which(!duplicated(names(conserved_enhancers))) ]
linked_with_activation_TFBSchange = readBedtools_res( filePath=paste0(outputs_directory,"TFBS_analysis/enhancers_linked_with_activation/"),
chroms = paste0("chr",c(1:22,'X','Y')),4,7)
linked_with_activation_TFBSchange_chimp = readBedtools_UCSC( filePath=paste0(outputs_directory,"TFBS_analysis/enhancers_linked_with_activation_TFBS_chimp/"),
chroms = paste0("chr",c(1:22,'X','Y')),4,7)
not_linked_with_activation_TFBSchange = readBedtools_res( filePath=paste0(outputs_directory,"TFBS_analysis/enhancers_not_linked_with_activation/"),
chroms = paste0("chr",c(1:22,'X','Y')),4,7)
conserved_TFBSchange_Hs = readBedtools_res( filePath=paste0(outputs_directory,"TFBS_analysis/conserved_enhancers_Hs/"),
chroms = paste0("chr",c(1:22,'X','Y')),4,7)
conserved_TFBSchange_Pt = readBedtools_UCSC( filePath=paste0(outputs_directory,"TFBS_analysis/conserved_enhancers_Pt/"),
chroms = paste0("chr",c(1:22,'X','Y')),4,7)
lost_linked = readBedtools_UCSC( filePath=paste0(outputs_directory,"TFBS_analysis/lost_enhancers_linked_pt/"),
chroms = paste0("chr",c(1:22,'X','Y')),4,7)
lost_not_linked = readBedtools_UCSC( filePath=paste0(outputs_directory,"TFBS_analysis/lost_enhancers_not_linked_pt/"),
chroms = paste0("chr",c(1:22,'X','Y')),4,7)
#### --------------------------------------------------------------
linked_with_activation_TFBSchange = processTFBSresult(linked_with_activation_TFBSchange,
tfanno=TFsEnsemblG,
nameColumn="names")
linked_with_activation_TFBSchange_chimp = processTFBSresult(linked_with_activation_TFBSchange_chimp,
tfanno=TFsEnsemblG,
nameColumn = "names3")
not_linked_with_activation_TFBSchange = processTFBSresult(not_linked_with_activation_TFBSchange,
tfanno=TFsEnsemblG,
nameColumn="names")
conserved_TFBSchange = processTFBSresult(conserved_TFBSchange_Hs,
tfanno=TFsEnsemblG,
nameColumn = "names")
conserved_TFBSchange_chimp = processTFBSresult(conserved_TFBSchange_Pt,
tfanno=TFsEnsemblG,
nameColumn = "names3")
lost_linked_TFBSchange = processTFBSresult(lost_linked,
tfanno=TFsEnsemblG,
nameColumn = "names3")
lost_not_linked_TFBSchange = processTFBSresult(lost_not_linked,
tfanno=TFsEnsemblG,
nameColumn = "names3")
save( linked_with_activation_TFBSchange, not_linked_with_activation_TFBSchange,linked_with_activation_TFBSchange_chimp,
conserved_TFBSchange,conserved_TFBSchange_chimp,
lost_linked_TFBSchange,lost_not_linked_TFBSchange,
file=paste0(objects_directory,"evolutionary_changes_in_TFBS.RData" ) )
save(linked_with_activation_TFBSchange,file=paste0(objects_directory,"linked_with_activation_TFBSchange.RData"))
save(linked_with_activation_TFBSchange_chimp,file=paste0(objects_directory,"linked_with_activation_TFBSchange_chimp.Rdata"))load(paste0(objects_directory,"evolutionary_changes_in_TFBS.RData" ))
conserved_enhancers_hs_vs_Pt = import.bed(paste0(outputs_directory,"conserved_enhancers_hs_vs_Pt_ucsc.bed"))
conserved_enhancers = import.bed(paste0(outputs_directory,"conserved_enhancers.bed"))
names(conserved_enhancers) = conserved_enhancers$name
conserved_enhancers = conserved_enhancers[order(width(conserved_enhancers))]
conserved_enhancers = conserved_enhancers[which(!duplicated(names(conserved_enhancers))) ]Overall conservation of TFBS - take conserved enhancers
conserved_TFBSchange_table = table( conserved_TFBSchange$TF )
conserved_TFBSchange_chimp_table = table(conserved_TFBSchange_chimp$TF)
conserved_TFBSchange_human_table = conserved_TFBSchange_table[ match(names(conserved_TFBSchange_chimp_table),names(conserved_TFBSchange_table))]
all(names(conserved_TFBSchange_human_table)==names(conserved_TFBSchange_chimp_table))## [1] TRUE
linked_TFBSchange_table = table( linked_with_activation_TFBSchange$TF )
linked_with_activation_TFBSchange_chimp_table = table(linked_with_activation_TFBSchange_chimp$TF)
linked_with_activation_TFBSchange_human_table = linked_TFBSchange_table[ match(names(linked_with_activation_TFBSchange_chimp_table),names(linked_TFBSchange_table))]
table(names(linked_with_activation_TFBSchange_human_table)==names(linked_with_activation_TFBSchange_human_table))##
## TRUE
## 674
boxplot( log2(conserved_TFBSchange_human_table/conserved_TFBSchange_chimp_table),
log2(linked_with_activation_TFBSchange_human_table/linked_with_activation_TFBSchange_chimp_table),
col="white",border=c("green4","turquoise4"),ylab="",
notch=TRUE, outline=FALSE, ylim=c(-1.5,1.5),
ylab="Change in TFBS [log2(human/Chimp)]")## Warning in (function (z, notch = FALSE, width = NULL, varwidth = FALSE, :
## Duplicated argument ylab = "Change in TFBS [log2(human/Chimp)]" is disregarded
axis(1,at=c(1,2),lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)t.test( log2(conserved_TFBSchange_human_table/conserved_TFBSchange_chimp_table),
log2(linked_with_activation_TFBSchange_human_table/linked_with_activation_TFBSchange_chimp_table) )##
## Welch Two Sample t-test
##
## data: log2(conserved_TFBSchange_human_table/conserved_TFBSchange_chimp_table) and log2(linked_with_activation_TFBSchange_human_table/linked_with_activation_TFBSchange_chimp_table)
## t = -8.9693, df = 1006.3, p-value < 0.00000000000000022
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2721183 -0.1744232
## sample estimates:
## mean of x mean of y
## 0.06966084 0.29293161
Conserved and species specific peaks feature TFBS changes frequently
howManyChangesPerPeak = function(tfbsobj){
unlist(lapply(split(start(tfbsobj),tfbsobj$peak),function(x){length(unique(x))}))
}
boxplot( howManyChangesPerPeak( linked_with_activation_TFBSchange ),
howManyChangesPerPeak( not_linked_with_activation_TFBSchange ),
howManyChangesPerPeak( conserved_TFBSchange ),
col="white",outline=FALSE,
names=c('linked','not linked','conserved'),las=2,
border=c('turquoise4','gray80','green4'),
ylab="changes in TFBS per element",lwd=2 )
axis(2,lwd=2,las=2)
axis(1,at=c(1,2,3),lwd=2,c('linked','not linked','conserved'),las=2)
box(col="black",lwd=2)t.test( howManyChangesPerPeak( linked_with_activation_TFBSchange ),
howManyChangesPerPeak( not_linked_with_activation_TFBSchange ) )##
## Welch Two Sample t-test
##
## data: howManyChangesPerPeak(linked_with_activation_TFBSchange) and howManyChangesPerPeak(not_linked_with_activation_TFBSchange)
## t = 7.0175, df = 1931.9, p-value = 0.000000000003114
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.7881839 1.3996136
## sample estimates:
## mean of x mean of y
## 6.746499 5.652600
t.test( howManyChangesPerPeak( conserved_TFBSchange ),
howManyChangesPerPeak( not_linked_with_activation_TFBSchange ) )##
## Welch Two Sample t-test
##
## data: howManyChangesPerPeak(conserved_TFBSchange) and howManyChangesPerPeak(not_linked_with_activation_TFBSchange)
## t = -6.2941, df = 6123.5, p-value = 0.0000000003306
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.6712331 -0.3524108
## sample estimates:
## mean of x mean of y
## 5.140778 5.652600
A=table(cut(width(conserved_TFBSchange),c(0,1,1000)))/length(conserved_TFBSchange)
B=table(cut(width(linked_with_activation_TFBSchange),c(0,1,1000)))/length(linked_with_activation_TFBSchange)
C=table(cut(width(not_linked_with_activation_TFBSchange),c(0,1,1000)))/length(not_linked_with_activation_TFBSchange)
ABC=rbind(A,B,C)
barplot( 100*ABC, beside=TRUE,col=c("green4","turquoise4","gray80"),ylim=c(0,100),
names=c("MM","Changes>1bp"),ylab="%" )
axis(2,lwd=2)
legend(x=5,y=90,c("Conserved","Linked","Not linked"),cex=1,
pch=15,col=c("green4","turquoise4","gray80"),bty="n")Any particular TFs? Preparations
## ------------------------------------------------------------
# matrix for each peak
TFmat_linked_with_activation = makeMatrixTFBS4peaks( tfmut=linked_with_activation_TFBSchange,
theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=enhancers_linked_with_activation )
TFmat_not_linked_with_activation = makeMatrixTFBS4peaks( tfmut=not_linked_with_activation_TFBSchange,
theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=enhancers_not_linked_with_activation )
TFmat_linked_with_activation_chimp = makeMatrixTFBS4peaks( tfmut=linked_with_activation_TFBSchange_chimp,
theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=enhancers_linked_with_activation )
TFmat_conserved = makeMatrixTFBS4peaks( tfmut=conserved_TFBSchange,
theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=conserved_enhancers )
TFmat_conserved_chimp = makeMatrixTFBS4peaks( tfmut=conserved_TFBSchange_chimp,
theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=conserved_enhancers )
TFmat_lost_linked = makeMatrixTFBS4peaks( tfmut=lost_linked_TFBSchange,
theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=genuine_lost_enhancers_that_do_something )
TFmat_lost_not_linked = makeMatrixTFBS4peaks( tfmut=lost_not_linked_TFBSchange,
theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=genuine_lost_enhancers_that_do_nothing )
save(TFmat_linked_with_activation,TFmat_not_linked_with_activation,TFmat_linked_with_activation_chimp,
TFmat_conserved, TFmat_conserved_chimp,TFmat_lost_linked,TFmat_lost_not_linked,
file=paste0(objects_directory,"TFmatrices_linked_not_linked.RData"))Assess the significance of the observed differences in frequency
load(paste0(objects_directory,"TFmatrices_linked_not_linked.RData"))
RES = do.call("rbind",
apply(TFmat_linked_with_activation,2,function(x){data.frame( Motif=sum(x>0),
noMotif=sum(x==0) ) } ) )
SER = do.call("rbind",
apply(TFmat_not_linked_with_activation,2,function(x){data.frame( Motif=sum(x>0),
noMotif=sum(x==0) ) } ) )
TFs_FT = data.frame()
for( i in colnames(TFmat_linked_with_activation) ){
m=rbind(linked=RES[rownames(RES)==i,],
notLinked=SER[rownames(SER)==i,])
tp = fisher.test(m)
tp = data.frame(p_val=tp$p.value,
odds=tp$estimate,
number_in_linked = RES[rownames(RES)==i,1],
number_in_not_linked = SER[rownames(SER)==i,1],
fraction_in_linked = RES[rownames(RES)==i,1]/rowSums(RES[rownames(RES)==i,]),
fraction_in_not_linked = SER[rownames(SER)==i,1]/rowSums(SER[rownames(SER)==i,]),
tf = i)
TFs_FT=rbind(tp,TFs_FT) }
TFs_FT$p_adjust = p.adjust(TFs_FT$p_val)
TFs_FT$p_adjust_bin = cut(-log10(TFs_FT$p_adjust), c(-1,0,1, seq(2,10,length.out=252),45) )
par(pty="s")
plot( x=TFs_FT$fraction_in_linked,
y=TFs_FT$fraction_in_not_linked,
pch=19, cex=0.5,
xlab="Linked with activation",
ylab="Not linked with activation",
xlim=c(0,0.3), ylim=c(0,0.3),
col=ifelse(TFs_FT$p_adjust<0.01,"blue3","wheat2"))
abline(a=0,b=1,col='black')
axis(1,lwd=2)
axis(2,lwd=2)
box(col='black',lwd=2)
text(x=TFs_FT$fraction_in_linked[TFs_FT$p_adjust<0.01 & TFs_FT$fraction_in_linked>0.1]+0.005,
y=TFs_FT$fraction_in_not_linked[TFs_FT$p_adjust<0.01 & TFs_FT$fraction_in_linked>0.1]+0.005,
TFs_FT$tf[TFs_FT$p_adjust<0.01 & TFs_FT$fraction_in_linked>0.1],
cex=1)Odds of seeing that many stripe TFs
TFs_FT_filt = TFs_FT[TFs_FT$p_adjust<0.01 ,]
sum( TFs_FT_filt$tf %in% human_stripe_factors$V1 )/nrow(TFs_FT_filt)## [1] 0.8651685
m = rbind( affected = c(stripe=sum( TFs_FT[TFs_FT$p_adjust<0.01,]$tf %in% human_stripe_factors$V1 ),
non_stripe = sum( ! TFs_FT[ TFs_FT$p_adjust<0.01,]$tf %in% human_stripe_factors$V1 )),
non_affected = c(stripe=sum( TFs_FT$tf %in% human_stripe_factors$V1 ),
non_stripe = sum( ! TFs_FT$tf %in% human_stripe_factors$V1)) )
m## stripe non_stripe
## affected 77 12
## non_affected 199 476
fisher.test(m)##
## Fisher's Exact Test for Count Data
##
## data: m
## p-value < 0.00000000000000022
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 8.046439 31.566055
## sample estimates:
## odds ratio
## 15.28785
par(pty="m",mar=c(5,3,3,1))
barplot( log2(fisher.test(m)$estimate), col="blue3", ylim=c(0,4))
axis(2,lwd=2)keyTFs = TFs_FT_filt$tf
keyTFs = keyTFs[keyTFs %in% human_stripe_factors$V1]l=(1+colSums(TFmat_lost_linked>0))/nrow(TFmat_lost_linked)
L=(1+colSums(TFmat_lost_not_linked>0))/nrow(TFmat_lost_not_linked)LOS = do.call("rbind",
apply(TFmat_lost_linked,2,function(x){data.frame( Motif=1+sum(x>0),
noMotif=1+sum(x==0) ) } ) )
SOL = do.call("rbind",
apply(TFmat_lost_not_linked,2,function(x){data.frame( Motif=1+sum(x>0),
noMotif=1+sum(x==0) ) } ) )
lost_TFs_FT_chimp = data.frame()
for( i in colnames(TFmat_linked_with_activation) ){
m=rbind(linked=LOS[rownames(LOS)==i,],
notLinked=SOL[rownames(SOL)==i,])
tp = fisher.test(m)
tp = data.frame(p_val=tp$p.value,
odds=tp$estimate,
number_in_linked = LOS[rownames(LOS)==i,1],
number_in_not_linked = SOL[rownames(SOL)==i,1],
fraction_in_linked = LOS[rownames(LOS)==i,1]/rowSums(LOS[rownames(LOS)==i,]),
fraction_in_not_linked = SOL[rownames(SOL)==i,1]/rowSums(SOL[rownames(SOL)==i,]),
tf = i)
lost_TFs_FT_chimp=rbind(tp,lost_TFs_FT_chimp) }
par(mfrow=c(1,1),mar=c(10,4,4,4), pty="m")
boxplot( lost_TFs_FT_chimp$odds[lost_TFs_FT_chimp$tf %in% keyTFs],
lost_TFs_FT_chimp$odds[lost_TFs_FT_chimp$tf %in% human_stripe_factors$V1],
lost_TFs_FT_chimp$odds[! lost_TFs_FT_chimp$tf %in% human_stripe_factors$V1],
col="white",border=c("blue3","steelblue","coral3"),
ylim=c(0,3),ylab="Odds",
outline=FALSE,axes=FALSE)
abline(h=1)
axis(1,lwd=2,at=c(1,2,3),c("77 stripe TFs","All stripe TFs","non-stripe TFs"),las=2)
axis(2,lwd=2)t.test(lost_TFs_FT_chimp$odds[lost_TFs_FT_chimp$tf %in% keyTFs],
lost_TFs_FT_chimp$odds[! lost_TFs_FT_chimp$tf %in% human_stripe_factors$V1])##
## Welch Two Sample t-test
##
## data: lost_TFs_FT_chimp$odds[lost_TFs_FT_chimp$tf %in% keyTFs] and lost_TFs_FT_chimp$odds[!lost_TFs_FT_chimp$tf %in% human_stripe_factors$V1]
## t = 4.2022, df = 332.24, p-value = 0.00003402
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1255168 0.3464608
## sample estimates:
## mean of x mean of y
## 1.374035 1.138047
t.test(lost_TFs_FT_chimp$odds[lost_TFs_FT_chimp$tf %in% human_stripe_factors$V1],
lost_TFs_FT_chimp$odds[! lost_TFs_FT_chimp$tf %in% human_stripe_factors$V1])##
## Welch Two Sample t-test
##
## data: lost_TFs_FT_chimp$odds[lost_TFs_FT_chimp$tf %in% human_stripe_factors$V1] and lost_TFs_FT_chimp$odds[!lost_TFs_FT_chimp$tf %in% human_stripe_factors$V1]
## t = 2.4189, df = 346.62, p-value = 0.01608
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.03773178 0.36604516
## sample estimates:
## mean of x mean of y
## 1.339935 1.138047
par(mfrow=c(1,1), pty="m")
m = rbind( linked=table(cut(rowSums(TFmat_linked_with_activation[,keyTFs]),
c(-Inf,0,1,2,4,Inf) ) )/nrow(TFmat_linked_with_activation),
not_linked=table(cut(rowSums(TFmat_not_linked_with_activation[,keyTFs]),
c(-Inf,0,1,2,4,Inf) ) )/nrow(TFmat_not_linked_with_activation))
par(mfrow=c(2,1),mar=c(4,4,1,1),pty="m")
barplot(m,beside=TRUE,col=c("turquoise4","gray70"),ylim=c(0,0.5),
names=c(0,1,2,4,">4"),ylab="Francion of sequences with TFs")
axis(2,lwd=2)
astroTFs = c("SOX9","SOX2","NFIA","NFIB","AFT3","RUNX2","NR1F2","DBX2","LHX2","STAT3")
astroTFs = astroTFs[astroTFs %in% colnames(TFmat_linked_with_activation)]
# par(mfrow=c(1,1))
M = rbind( linked=table(cut(rowSums(TFmat_linked_with_activation[,astroTFs]),
c(-Inf,0,1,2,4,Inf) ) )/nrow(TFmat_linked_with_activation),
not_linked=table(cut(rowSums(TFmat_not_linked_with_activation[,astroTFs]),
c(-Inf,0,1,2,4,Inf) ) )/nrow(TFmat_not_linked_with_activation))
barplot(M,beside=TRUE,col=c("turquoise4","gray70"),ylim=c(0,1),
names=c(0,1,2,4,">4"),ylab="Francion of sequences with TFs")
axis(2,lwd=2)table(cut(rowSums(TFmat_linked_with_activation[,keyTFs]),
c(-Inf,0,1,2,4,Inf) ) )##
## (-Inf,0] (0,1] (1,2] (2,4] (4, Inf]
## 325 185 102 147 684
1443 - sum(rowSums(TFmat_linked_with_activation[,keyTFs])==0)## [1] 1118
How many changes in TF not being stripe factors?
not_stripeTFs_changes = TFmat_linked_with_activation[,!colnames(TFmat_linked_with_activation) %in% human_stripe_factors$V1]
table(rowSums(not_stripeTFs_changes>0))##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## 25 27 51 64 67 68 67 66 77 59 82 73 71 65 47 61 60 42 37 35 33 25 29 23 24 19
## 26 27 28 29 30 31 32 33 34 35 36 37 38 40 41 42 43 44 45 46 47 48 50 53 54 56
## 20 19 16 9 11 15 10 7 3 8 2 3 2 2 1 2 1 2 3 1 1 1 1 1 1 1
## 66 68 70
## 1 1 1
Export these enhancers
enhancers__stripe_factors = TFmat_linked_with_activation[,keyTFs]
enhancers_with_stripe_factors = TFmat_linked_with_activation[rowSums(enhancers__stripe_factors)>0,]
enhancers_wo_stripe_factors = TFmat_linked_with_activation[rowSums(enhancers__stripe_factors)==0,]
save(enhancers_with_stripe_factors,
enhancers_wo_stripe_factors,
file=paste0(objects_directory,"enhancers_stripeTFs_no_stripeTFs.RData"))numberTFperPeak = function(tfmut,theTFs,allPeaks ){
# tfmut=linked_with_activation_TFBSchange_Hs_spe;theTFs=unique(TFsEnsemblG$Fixed);allPeaks=enhancers_linked_with_activationII
res = matrix(0L,
nrow=length(allPeaks),
ncol=length(unique(theTFs)) )
res = as.data.frame(res)
rownames(res) = names(allPeaks)
colnames(res) = unique(theTFs)
tp = split( tfmut$peak, tfmut$TF )
for( tf in unique(theTFs)) {
# tf="AFX3"
thisC = which( colnames(res)==tf )
theseRows = which( rownames(res) %in% tp[[tf]])
numbers4rows = table( tp[[tf]] )
coordinates = cbind(row=theseRows,
col=rep(thisC,length(theseRows)),
number = numbers4rows[match(rownames(res)[theseRows],names(numbers4rows))])
if(nrow(coordinates)>0){
res[ cbind( coordinates[,1], coordinates[,2]) ] = coordinates[,3] }
}
return(res) }
## -----------------------
linked_with_activation_TFBSchange_Hs_spe = linked_with_activation_TFBSchange[-queryHits(findOverlaps(linked_with_activation_TFBSchange,linked_with_activation_TFBSchange_chimp))]
linked_with_activation_TFBSchange_Pt_spe = linked_with_activation_TFBSchange_chimp[-queryHits(findOverlaps(linked_with_activation_TFBSchange_chimp,linked_with_activation_TFBSchange))]
TFmat_linked_with_activation_Hs = numberTFperPeak( tfmut=linked_with_activation_TFBSchange_Hs_spe,
theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=enhancers_linked_with_activation )
TFmat_linked_with_activation_Pt = numberTFperPeak( tfmut=linked_with_activation_TFBSchange_Pt_spe,
theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=enhancers_linked_with_activation )
conserved_TFBSchange_Hs_spe = conserved_TFBSchange[-queryHits(findOverlaps(conserved_TFBSchange,conserved_TFBSchange_chimp))]
conserved_TFBSchange_Pt_spe = conserved_TFBSchange_chimp[-queryHits(findOverlaps(conserved_TFBSchange_chimp,conserved_TFBSchange))]
TFmat_conserved_Hs = numberTFperPeak( tfmut=conserved_TFBSchange_Hs_spe,
theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=conserved_enhancers )
TFmat_conserved_Pt = numberTFperPeak( tfmut=conserved_TFBSchange_Pt_spe,
theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=conserved_enhancers )
all(rownames(TFmat_conserved_Hs)==rownames(TFmat_conserved_Pt))## [1] TRUE
net_TFBS_gain_conserved = (TFmat_conserved_Hs>TFmat_conserved_Pt)
net_TFBS_loss_conserved = (TFmat_conserved_Hs<TFmat_conserved_Pt)
net_TFBS_gain_linked = (TFmat_linked_with_activation_Hs>TFmat_linked_with_activation_Pt)
net_TFBS_loss_linked = (TFmat_linked_with_activation_Hs<TFmat_linked_with_activation_Pt)
## -----------------------
par(mfrow=c(1,2),pty="s",mar=c(4,4,3,3))
plot(colSums(net_TFBS_gain_linked),
colSums(net_TFBS_loss_linked),pch=19, cex=0.5,
main="Linked",ylab="TFBS loss",xlab="Gain in TFBS",
ylim=c(0,60),xlim=c(0,60),
col=ifelse(names(colSums(net_TFBS_gain_linked)) %in% keyTFs,"blue","gray"))
abline(a=0,b=1)
axis(1,lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)
plot(colSums(net_TFBS_gain_conserved),colSums(net_TFBS_loss_conserved), pch=19, cex=0.5,
main="Conserved",ylab="TFBS loss",xlab="Gain in TFBS" ,ylim=c(0,60),xlim=c(0,60),
col=ifelse(names(colSums(net_TFBS_gain_conserved)) %in% keyTFs,"blue","gray"))
abline(a=0,b=1)
axis(1,lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)up_set = HS_UP_Genes$ensembl_id
promoters_HITS_UP = promoters_filtered_gr[ which( promoters_filtered_gr$gene_id %in% up_set ) ]
promoters_HITS_UP_500 = resize(promoters_HITS_UP,1000000,fix="center")
promoters_HITS_UP_500_counting = data.frame( with_stripeTF = countOverlaps(promoters_HITS_UP_500,enhancers_linked_with_activation[which(names(enhancers_linked_with_activation) %in% rownames(enhancers_with_stripe_factors))]),
wo_stripeTF = countOverlaps(promoters_HITS_UP_500,enhancers_linked_with_activation[which(names(enhancers_linked_with_activation) %in% rownames(enhancers_wo_stripe_factors))]),
any = countOverlaps(promoters_HITS_UP_500,genuine_gained_enhancers_gr) )
## stripe no stripe
prom_with_with = rownames(promoters_HITS_UP_500_counting[promoters_HITS_UP_500_counting$with_stripeTF>0& promoters_HITS_UP_500_counting$wo_stripeTF>0,])
prom_with_wo = rownames(promoters_HITS_UP_500_counting[promoters_HITS_UP_500_counting$with_stripeTF>0 & promoters_HITS_UP_500_counting$wo_stripeTF==0,])
prom_wo_with = rownames(promoters_HITS_UP_500_counting[promoters_HITS_UP_500_counting$with_stripeTF==0 & promoters_HITS_UP_500_counting$wo_stripeTF>0,])
prom_wo_wo = rownames(promoters_HITS_UP_500_counting[promoters_HITS_UP_500_counting$with_stripeTF==0 & promoters_HITS_UP_500_counting$wo_stripeTF==0,])
promoters_HITS_UP_500_counting_enh = promoters_HITS_UP_500_counting[rowSums(promoters_HITS_UP_500_counting[,1:2])>0,]
m = promoters_HITS_UP_500_counting_enh>0
m = m[order(m[,1],m[,2]),1:2]
par(mar=c(1,1,1,1))
image(t(m),col=c("white","coral2"),axes=FALSE)
box(col="black",lwd=2)
abline(v=0.5,lwd=2)sum(m[,1]==0 & m[,2]>0)## [1] 29
sum(m[,1]>0 & m[,2]>0)## [1] 258
sum(m[,1]>0 & m[,2]==0)## [1] 299
readFootprintAnalysis_bed = function(TFdir,SPECIES){
allF = as.list( unlist(strsplit(list.files(TFdir),"_FootPrints")) )
res=do.call("rbind",lapply(allF,function(x){
# x = allF[[1]]
tp=read.delim(paste0(TFdir,"/",x,"_FootPrints/",x,".bed"),
sep="\t",header=FALSE )
return( data.frame(score=tp$V5, TF=unlist(strsplit(x,"_"))[1], species=SPECIES ) ) }))
return(res) }
footprintHg = readFootprintAnalysis_bed(paste0(outputs_directory,"footprint_analysis/Stripe_TF_HG38_Footprints_10bp/"), "Human")
footprintPt = readFootprintAnalysis_bed(paste0(outputs_directory,"footprint_analysis/Stripe_TF_PT06_Footprints_10bp/"), "Chimpanzee")
footprintHg_keyTFs = footprintHg[footprintHg$TF %in% TFsEnsemblG[ TFsEnsemblG$Fixed %in% keyTFs,1], ]
footprintPt_keyTFs = footprintPt[footprintPt$TF %in% TFsEnsemblG[ TFsEnsemblG$Fixed %in% keyTFs,1], ]
footprint_scores = rbind(footprintHg,footprintPt)
footprint_scores$species = factor(footprint_scores$species,levels=c("Human","Chimpanzee"))
p1=ggboxplot(footprint_scores, x="TF", y="score",color = "species",
palette=c("black","red"),outlier.shape = NA,rotate = TRUE)
ggpar(p1,ylim = c(0,500)) + rotate_x_text(90)## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
enhancers_linked_with_activation_TADs = import.bed(paste0(outputs_directory,"enhancers_linked_with_activation_TADs.bed"))
names(enhancers_linked_with_activation_TADs) = enhancers_linked_with_activation_TADs$name
enhancers_not_linked_with_activation_TADs = import.bed(paste0(outputs_directory,"enhancers_not_linked_with_activation_TADs.bed"))
names(enhancers_not_linked_with_activation_TADs) = enhancers_not_linked_with_activation_TADs$name
linked_with_activation_TADs_TFBSchange = readBedtools_res( filePath=paste0(outputs_directory,"/TFBS_analysis/enhancers_linked_with_activation_TADs/"),
chroms = paste0("chr",c(1:22,'X','Y')),4,7)
not_linked_with_activation_TADs_TFBSchange = readBedtools_res( filePath=paste0(outputs_directory,"/TFBS_analysis/enhancers_not_linked_with_activation_TADs/"),
chroms = paste0("chr",c(1:22,'X','Y')),4,7)
linked_with_activation_TADs_TFBSchange = processTFBSresult(linked_with_activation_TADs_TFBSchange,
tfanno=TFsEnsemblG,
nameColumn="names")
not_linked_with_activation_TADs_TFBSchange = processTFBSresult(not_linked_with_activation_TADs_TFBSchange,
tfanno=TFsEnsemblG,
nameColumn="names")
save(linked_with_activation_TADs_TFBSchange,
not_linked_with_activation_TADs_TFBSchange,
file=paste0(objects_directory,"linked_or_not_with_activation_TADs_TFBSchange_chimp.RData"))
TFmat_linked_with_activation_TADs = makeMatrixTFBS4peaks( tfmut=linked_with_activation_TADs_TFBSchange,
theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=enhancers_linked_with_activation_TADs )
TFmat_not_linked_with_activation_TADs = makeMatrixTFBS4peaks( tfmut=not_linked_with_activation_TADs_TFBSchange,
theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=enhancers_not_linked_with_activation_TADs )
save(TFmat_linked_with_activation_TADs,TFmat_not_linked_with_activation_TADs,
file=paste0(objects_directory,"TFmatrices_linked_not_linked_TADs.RData"))load(paste0(objects_directory,"TFmatrices_linked_not_linked_TADs.RData"))
TAL = do.call("rbind",
apply(TFmat_linked_with_activation_TADs,2,function(x){data.frame( Motif=sum(x>0),
noMotif=sum(x==0) ) } ) )
LAT = do.call("rbind",
apply(TFmat_not_linked_with_activation_TADs,2,function(x){data.frame( Motif=sum(x>0),
noMotif=sum(x==0) ) } ) )
TFs_TAD_FT = data.frame()
for( i in colnames(TFmat_linked_with_activation_TADs) ){
m=rbind(linked=TAL[rownames(TAL)==i,],
notLinked=LAT[rownames(LAT)==i,])
tp = fisher.test(m)
tp = data.frame(p_val=tp$p.value,
odds=tp$estimate,
number_in_linked = TAL[rownames(TAL)==i,1],
number_in_not_linked = LAT[rownames(LAT)==i,1],
fraction_in_linked = TAL[rownames(TAL)==i,1]/rowSums(TAL[rownames(TAL)==i,]),
fraction_in_not_linked = LAT[rownames(LAT)==i,1]/rowSums(LAT[rownames(LAT)==i,]),
tf = i)
TFs_TAD_FT=rbind(tp,TFs_TAD_FT) }
TFs_TAD_FT$p_adjust = p.adjust(TFs_TAD_FT$p_val)
par(pty="s",mfrow=c(1,1))
plot( x=TFs_TAD_FT$fraction_in_linked,
y=TFs_TAD_FT$fraction_in_not_linked,
pch=19, cex=0.5,
xlab="Linked with activation",
ylab="Not linked with activation",
xlim=c(0,0.3), ylim=c(0,0.3),
col=ifelse(TFs_TAD_FT$p_adjust<0.05 ,"blue3","wheat2"))
abline(a=0,b=1,col='black')
axis(1,lwd=2)
axis(2,lwd=2)
box(col='black',lwd=2)
text(x=TFs_TAD_FT$fraction_in_linked[TFs_TAD_FT$p_adjust<0.05 ]+0.005,
y=TFs_TAD_FT$fraction_in_not_linked[TFs_TAD_FT$p_adjust<0.05 ]+0.005,
TFs_TAD_FT$tf[TFs_TAD_FT$p_adjust<0.05 ],
cex=1)TFs_TAD_FT[TFs_TAD_FT$p_val<0.01 & TFs_TAD_FT$fraction_in_linked>0.1,]## p_val odds number_in_linked
## odds ratio673 0.00150988966405420457 1.608891 64
## odds ratio669 0.00061743780893750864 1.683180 63
## odds ratio664 0.00000001553881099418 1.845451 143
## odds ratio645 0.00002249720073059971 1.586955 135
## odds ratio639 0.00547001523869697359 1.401620 100
## odds ratio636 0.00001844604778766271 1.738707 91
## odds ratio635 0.00000177955637888710 1.693353 133
## odds ratio626 0.00051003649052152309 1.607932 79
## odds ratio623 0.00004148693474305783 1.610882 117
## odds ratio616 0.00020763714135284235 1.702754 72
## odds ratio603 0.00000000100972795250 2.345061 86
## odds ratio587 0.00044599744467552140 1.518042 109
## odds ratio584 0.00005385690669902456 1.799449 72
## odds ratio582 0.00000037117379643772 1.768726 130
## odds ratio579 0.00003393160944478522 1.537540 153
## odds ratio566 0.00000000003216698365 2.880740 68
## odds ratio538 0.00000410606699485287 1.786143 98
## odds ratio537 0.00000000049265940835 2.234227 101
## odds ratio514 0.00000028707333831881 1.828129 117
## odds ratio513 0.00000196034299164705 1.876367 89
## odds ratio512 0.00000000000004617375 2.398890 131
## odds ratio511 0.00000000010021448003 2.256986 106
## odds ratio478 0.00003385372076594007 1.704218 93
## odds ratio473 0.00003225347827537949 1.725282 90
## odds ratio458 0.00000033506023894474 1.969773 91
## odds ratio457 0.00000010818835810204 2.112920 80
## odds ratio408 0.00000000256319916115 1.926115 141
## odds ratio308 0.00000138201327477164 1.652499 155
## odds ratio287 0.00000002345307931912 2.008273 105
## odds ratio286 0.00000025777319341452 2.094659 77
## odds ratio284 0.00000000811899658359 2.205526 87
## odds ratio283 0.00000000007428138142 2.293758 103
## odds ratio282 0.00001458947286974133 1.698132 104
## odds ratio96 0.00000578088929590625 1.670073 124
## odds ratio95 0.00000000624475659928 2.205966 88
## odds ratio57 0.00001776370364941432 1.818494 78
## number_in_not_linked fraction_in_linked fraction_in_not_linked
## odds ratio673 316 0.1024 0.06620574
## odds ratio669 298 0.1008 0.06243453
## odds ratio664 661 0.2288 0.13848732
## odds ratio645 706 0.2160 0.14791536
## odds ratio639 571 0.1600 0.11963126
## odds ratio636 426 0.1456 0.08925204
## odds ratio635 657 0.2128 0.13764928
## odds ratio626 394 0.1264 0.08254766
## odds ratio623 597 0.1872 0.12507857
## odds ratio616 339 0.1152 0.07102451
## odds ratio603 304 0.1376 0.06369160
## odds ratio587 583 0.1744 0.12214540
## odds ratio584 322 0.1152 0.06746281
## odds ratio582 617 0.2080 0.12926880
## odds ratio579 831 0.2448 0.17410434
## odds ratio566 194 0.1088 0.04064530
## odds ratio538 450 0.1568 0.09428033
## odds ratio537 379 0.1616 0.07940499
## odds ratio514 534 0.1872 0.11187932
## odds ratio513 388 0.1424 0.08129059
## odds ratio512 475 0.2096 0.09951812
## odds ratio511 396 0.1696 0.08296669
## odds ratio478 444 0.1488 0.09302326
## odds ratio473 424 0.1440 0.08883302
## odds ratio458 380 0.1456 0.07961450
## odds ratio457 310 0.1280 0.06494867
## odds ratio408 627 0.2256 0.13136392
## odds ratio308 794 0.2480 0.16635240
## odds ratio287 436 0.1680 0.09134716
## odds ratio286 300 0.1232 0.06285355
## odds ratio284 326 0.1392 0.06830086
## odds ratio283 378 0.1648 0.07919547
## odds ratio282 502 0.1664 0.10517494
## odds ratio96 616 0.1984 0.12905929
## odds ratio95 330 0.1408 0.06913891
## odds ratio57 347 0.1248 0.07270061
## tf p_adjust
## odds ratio673 ZSCAN22 0.92254258473711903
## odds ratio669 ZNF76 0.38281144154125535
## odds ratio664 ZNF770 0.00001034884812212
## odds ratio645 ZNF467 0.01444320286904501
## odds ratio639 ZNF394 1.00000000000000000
## odds ratio636 ZNF350 0.01189770082304245
## odds ratio635 ZNF341 0.00116738898454994
## odds ratio626 ZNF281 0.31724269710438735
## odds ratio623 ZNF263 0.02638569049658478
## odds ratio616 ZNF148 0.13060376191093784
## odds ratio603 ZFX 0.00000067651772818
## odds ratio587 ZBTB17 0.27830240547752533
## odds ratio584 ZNF324 0.03419913575388060
## odds ratio582 WT1 0.00024460353185246
## odds ratio579 VEZF1 0.02168229843521776
## odds ratio566 THAP1 0.00000002168054698
## odds ratio538 TBX15 0.00267715568064407
## odds ratio537 TBX1 0.00000033057446300
## odds ratio514 SP4 0.00018975547662873
## odds ratio513 SP3 0.00128402465952881
## odds ratio512 SP2 0.00000000003116728
## odds ratio511 SP1 0.00000006734413058
## odds ratio478 RXRA 0.02166638129020165
## odds ratio473 RREB1 0.02067447957451825
## odds ratio458 RARA 0.00022113975770353
## odds ratio457 PURA 0.00007183706977975
## odds ratio408 PATZ1 0.00000171478023881
## odds ratio308 MAZ 0.00090798272152497
## odds ratio287 KLF6 0.00001559629774721
## odds ratio286 KLF5 0.00017064585404041
## odds ratio284 KLF3 0.00000541537072126
## odds ratio283 KLF16 0.00000004999136969
## odds ratio282 KLF15 0.00943938894672264
## odds ratio96 EGR2 0.00376335893163497
## odds ratio95 EGR1 0.00000417149740832
## odds ratio57 NR2F1 0.01147535255752165
ms=64000*1024^2
options(future.globals.maxSize=ms)
human1=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_F_667_S13_SingleCell/raw_feature_bc_matrix/')
human2=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_F_698_S15_SingleCell/raw_feature_bc_matrix/')
human3=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_M_700_S14_SingleCell/raw_feature_bc_matrix/')
human4=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_F_702_9C_SingleCell/raw_feature_bc_matrix/')
human5=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_F_667_60C_SingleCell/raw_feature_bc_matrix/')
human6=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_F_669_3C_SingleCell/raw_feature_bc_matrix/')
human7=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_F_671_64C_SingleCell/raw_feature_bc_matrix/')
human8=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_M_673_62C_SingleCell/raw_feature_bc_matrix/')
human9=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_M_675_23C_SingleCell/raw_feature_bc_matrix/')
human10=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_M_677_63C_SingleCell/raw_feature_bc_matrix/')
human11=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_M_679_30C_SingleCell/raw_feature_bc_matrix/')
human12=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_M_681_34C_SingleCell/raw_feature_bc_matrix/')
human13=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_F_686_56C_SingleCell/raw_feature_bc_matrix/')
human14=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_M_690_26C_SingleCell/raw_feature_bc_matrix/')
human15=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_F_688_11C_SingleCell/raw_feature_bc_matrix/')
human16=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_M_692_24C_SingleCell/raw_feature_bc_matrix/')
macaque1=paste0(outputs_directory,'/scRNA_published_data/RhMacaque/scRNA_syn17093056_RMB683_DFC/multi/count/raw_feature_bc_matrix/')
macaque2=paste0(outputs_directory,'/scRNA_published_data/RhMacaque/scRNA_syn17093056_RMB691_DFC/multi/count/raw_feature_bc_matrix/')
macaque_Ch_78_1=paste0(outputs_directory,'scRNA_published_data/RhMacaque/scRNA_SRR23687004_macaque/raw_feature_bc_matrix/')
macaque_Ch_110_DFC=paste0(outputs_directory,'scRNA_published_data/RhMacaque/E110_SRR23687017_M_DFC_scRNA/raw_feature_bc_matrix/')
macaque_Ch_110_OFC=paste0(outputs_directory,'scRNA_published_data/RhMacaque/E110_SRR23686999_M_OFC_scRNA/raw_feature_bc_matrix/')
macaque_Ch_93_DFC=paste0(outputs_directory,'scRNA_published_data/RhMacaque/E93_SRR23687065_M_DFC_scRNA/raw_feature_bc_matrix/')
macaque_Ch_110_DFC_S2=paste0(outputs_directory,'scRNA_published_data/RhMacaque/E110_SRR23687057_M_DFC_scRNA/raw_feature_bc_matrix/')
macaque_Ch_110_VFC=paste0(outputs_directory,'scRNA_published_data/RhMacaque/E110_SRR23687060_M_VFC_scRNA/raw_feature_bc_matrix/')
macaque_Ch_110_VFC_S2=paste0(outputs_directory,'scRNA_published_data/RhMacaque/E110_SRR23687012_M_VFC_scRNA/raw_feature_bc_matrix/')
macaque_Ch_77_Frontal=paste0(outputs_directory,'scRNA_published_data/RhMacaque/E77_F_Frontal/raw_feature_bc_matrix/')
macaque_Ch_64_Frontal=paste0(outputs_directory,'scRNA_published_data/RhMacaque/E64_F_Frontal/raw_feature_bc_matrix/')
macaque_Ch_62_Frontal=paste0(outputs_directory,'scRNA_published_data/RhMacaque/E62_F_frontal_scRNA/raw_feature_bc_matrix/')
human_metadata = read.csv( paste0(outputs_directory,'/scRNA_published_data/GSE217511_CorticalPlate_Seuratmetadata.csv' ))
human_sample_anno = paste0(outputs_directory,'/scRNA_published_data/MetaTable.txt' )
human_metadata$UMI = unlist(lapply(strsplit(human_metadata$X,"_"),function(x){x[[1]]}))Let’s consider the data from the foetal like cells
human1_expression=Read10X( human1 )
human2_expression=Read10X( human2 )
human3_expression=Read10X( human3 )
human4_expression=Read10X( human4 )
human5_expression=Read10X( human5 )
human6_expression=Read10X( human6 )
human7_expression=Read10X( human7 )
human8_expression=Read10X( human8 )
human9_expression=Read10X( human9 )
human10_expression=Read10X( human10 )
human11_expression=Read10X( human11 )
human12_expression=Read10X( human12 )
human13_expression=Read10X( human13 )
human14_expression=Read10X( human14 )
human15_expression=Read10X( human15 )
human16_expression=Read10X( human16 )
macaque1_expression=Read10X( macaque1 )
macaque2_expression=Read10X( macaque2 )
macaque_CN_78_1_expression=Read10X( macaque_Ch_78_1 )
macaque_DFC_110_1_expression=Read10X( macaque_Ch_110_DFC )
macaque_OFC_110_1_expression=Read10X( macaque_Ch_110_OFC )
macaque_93_DFC_expression= Read10X( macaque_Ch_93_DFC )
macaque_110_DFC_S2_expression= Read10X( macaque_Ch_110_DFC_S2 )
macaque_110_VFC_expression=Read10X( macaque_Ch_110_VFC)
macaque_110_VFC_S2_expression=Read10X( macaque_Ch_110_VFC_S2)
macaque_77_Frontal_expression=Read10X( macaque_Ch_77_Frontal)
macaque_64_Frontal_expression=Read10X( macaque_Ch_64_Frontal)
macaque_62_Frontal_expression=Read10X( macaque_Ch_62_Frontal)
## ---------------------------
human1_expression = CreateSeuratObject(human1_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human2_expression = CreateSeuratObject(human2_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human3_expression = CreateSeuratObject(human3_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human4_expression= CreateSeuratObject(human4_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human5_expression= CreateSeuratObject(human5_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human6_expression= CreateSeuratObject(human6_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human7_expression= CreateSeuratObject(human7_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human8_expression= CreateSeuratObject(human8_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human9_expression= CreateSeuratObject(human9_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human10_expression= CreateSeuratObject(human10_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human11_expression= CreateSeuratObject(human11_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human12_expression= CreateSeuratObject(human12_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human13_expression= CreateSeuratObject(human13_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human14_expression= CreateSeuratObject(human14_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human15_expression= CreateSeuratObject(human15_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human16_expression= CreateSeuratObject(human16_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque1_expression = CreateSeuratObject(macaque1_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque2_expression = CreateSeuratObject(macaque2_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque_CN_78_1_expression = CreateSeuratObject(macaque_CN_78_1_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque_DFC_110_1_expression = CreateSeuratObject(macaque_DFC_110_1_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque_OFC_110_1_expression = CreateSeuratObject(macaque_OFC_110_1_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque_93_DFC_expression= CreateSeuratObject(macaque_93_DFC_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque_110_DFC_S2_expression= CreateSeuratObject(macaque_110_DFC_S2_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque_110_VFC_expression= CreateSeuratObject(macaque_110_VFC_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque_110_VFC_S2_expression= CreateSeuratObject(macaque_110_VFC_S2_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque_77_Frontal_expression= CreateSeuratObject(macaque_77_Frontal_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque_64_Frontal_expression= CreateSeuratObject(macaque_64_Frontal_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque_62_Frontal_expression= CreateSeuratObject(macaque_62_Frontal_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
## ----------------
human1_expression[["percent.mt"]] <- PercentageFeatureSet(human1_expression, pattern = "^MT-")
human2_expression[["percent.mt"]] <- PercentageFeatureSet(human2_expression, pattern = "^MT-")
human3_expression[["percent.mt"]] <- PercentageFeatureSet(human3_expression, pattern = "^MT-")
human4_expression[["percent.mt"]] <- PercentageFeatureSet(human4_expression, pattern = "^MT-")
human5_expression[["percent.mt"]] <- PercentageFeatureSet(human5_expression, pattern = "^MT-")
human6_expression[["percent.mt"]] <- PercentageFeatureSet(human6_expression, pattern = "^MT-")
human7_expression[["percent.mt"]] <- PercentageFeatureSet(human7_expression, pattern = "^MT-")
human8_expression[["percent.mt"]] <- PercentageFeatureSet(human8_expression, pattern = "^MT-")
human9_expression[["percent.mt"]] <- PercentageFeatureSet(human9_expression, pattern = "^MT-")
human10_expression[["percent.mt"]] <- PercentageFeatureSet(human10_expression, pattern = "^MT-")
human11_expression[["percent.mt"]] <- PercentageFeatureSet(human11_expression, pattern = "^MT-")
human12_expression[["percent.mt"]] <- PercentageFeatureSet(human12_expression, pattern = "^MT-")
human13_expression[["percent.mt"]] <- PercentageFeatureSet(human13_expression, pattern = "^MT-")
human14_expression[["percent.mt"]] <- PercentageFeatureSet(human14_expression, pattern = "^MT-")
human15_expression[["percent.mt"]] <- PercentageFeatureSet(human15_expression, pattern = "^MT-")
human16_expression[["percent.mt"]] <- PercentageFeatureSet(human16_expression, pattern = "^MT-")
macaque1_expression[["percent.mt"]] <- PercentageFeatureSet(macaque1_expression, pattern = "^MT-")
macaque2_expression[["percent.mt"]] <- PercentageFeatureSet(macaque2_expression, pattern = "^MT-")
macaque_CN_78_1_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_CN_78_1_expression, pattern = "^MT-")
macaque_DFC_110_1_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_DFC_110_1_expression, pattern = "^MT-")
macaque_OFC_110_1_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_OFC_110_1_expression, pattern = "^MT-")
macaque_93_DFC_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_93_DFC_expression, pattern = "^MT-")
macaque_110_DFC_S2_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_110_DFC_S2_expression, pattern = "^MT-")
macaque_110_VFC_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_110_VFC_expression, pattern = "^MT-")
macaque_110_VFC_S2_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_110_VFC_S2_expression, pattern = "^MT-")
macaque_77_Frontal_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_77_Frontal_expression, pattern = "^MT-")
macaque_64_Frontal_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_64_Frontal_expression, pattern = "^MT-")
macaque_62_Frontal_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_62_Frontal_expression, pattern = "^MT-")
## --------------------------------------------
human1_expression$orig.ident = 'human1'
human2_expression$orig.ident = 'human2'
human3_expression$orig.ident = 'human3'
human4_expression$orig.ident = 'human4'
human5_expression$orig.ident = 'human5'
human6_expression$orig.ident = 'human6'
human7_expression$orig.ident = 'human7'
human8_expression$orig.ident = 'human8'
human9_expression$orig.ident = 'human9'
human10_expression$orig.ident = 'human10'
human11_expression$orig.ident = 'human11'
human12_expression$orig.ident = 'human12'
human13_expression$orig.ident = 'human13'
human14_expression$orig.ident = 'human14'
human15_expression$orig.ident = 'human15'
human16_expression$orig.ident = 'human16'
macaque1_expression$orig.ident = 'Macaque1'
macaque2_expression$orig.ident = 'Macaque2'
macaque_CN_78_1_expression$orig.ident = 'Macaque3_78_1'
macaque_DFC_110_1_expression$orig.ident = 'Macaque3_110_DFC'
macaque_OFC_110_1_expression$orig.ident = 'Macaque3_110_OFC'
macaque_93_DFC_expression$orig.ident = 'Macaque3_93_DFC'
macaque_110_DFC_S2_expression$orig.ident ='Macaque3_110_DFC_S2'
macaque_110_VFC_expression$orig.ident ='Macaque3_110_VFC'
macaque_110_VFC_S2_expression$orig.ident ='Macaque3_110_VFC_S2'
macaque_77_Frontal_expression$orig.ident ='Macaque3_77_Frontal'
macaque_64_Frontal_expression$orig.ident ='Macaque3_64_Frontal'
macaque_62_Frontal_expression$orig.ident ='Macaque3_62_Frontal'
## ------------------------------------------
all_genes = rownames(human1_expression)
human1_expression = CellCycleScoring(human1_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
human2_expression = CellCycleScoring(human2_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
human3_expression = CellCycleScoring(human3_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
human4_expression = CellCycleScoring(human4_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
human5_expression = CellCycleScoring(human5_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
human6_expression = CellCycleScoring(human6_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
human7_expression = CellCycleScoring(human7_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
human8_expression = CellCycleScoring(human8_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
human9_expression = CellCycleScoring(human9_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
human10_expression = CellCycleScoring(human10_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
human11_expression = CellCycleScoring(human11_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
human12_expression = CellCycleScoring(human12_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
human13_expression = CellCycleScoring(human13_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
human14_expression = CellCycleScoring(human14_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
human15_expression = CellCycleScoring(human15_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
human16_expression = CellCycleScoring(human16_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
macaque1_expression = CellCycleScoring(macaque1_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
macaque2_expression = CellCycleScoring(macaque2_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
macaque_CN_78_1_expression = CellCycleScoring(macaque_CN_78_1_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
macaque_DFC_110_1_expression = CellCycleScoring(macaque_DFC_110_1_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
macaque_OFC_110_1_expression = CellCycleScoring(macaque_OFC_110_1_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
macaque_93_DFC_expression= CellCycleScoring(macaque_93_DFC_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
macaque_110_DFC_S2_expression= CellCycleScoring(macaque_110_DFC_S2_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
macaque_110_VFC_expression= CellCycleScoring(macaque_110_VFC_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
macaque_110_VFC_S2_expression= CellCycleScoring(macaque_110_VFC_S2_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
macaque_77_Frontal_expression= CellCycleScoring(macaque_77_Frontal_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
macaque_64_Frontal_expression= CellCycleScoring(macaque_64_Frontal_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
macaque_62_Frontal_expression= CellCycleScoring(macaque_62_Frontal_expression,
g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
human1_expression[["percent.mt"]] <- PercentageFeatureSet(human1_expression, pattern = "^MT-")
human2_expression[["percent.mt"]] <- PercentageFeatureSet(human2_expression, pattern = "^MT-")
human3_expression[["percent.mt"]] <- PercentageFeatureSet(human3_expression, pattern = "^MT-")
human4_expression[["percent.mt"]] <- PercentageFeatureSet(human4_expression, pattern = "^MT-")
human5_expression[["percent.mt"]] <- PercentageFeatureSet(human5_expression, pattern = "^MT-")
human6_expression[["percent.mt"]] <- PercentageFeatureSet(human6_expression, pattern = "^MT-")
human7_expression[["percent.mt"]] <- PercentageFeatureSet(human7_expression, pattern = "^MT-")
human8_expression[["percent.mt"]] <- PercentageFeatureSet(human8_expression, pattern = "^MT-")
human9_expression[["percent.mt"]] <- PercentageFeatureSet(human9_expression, pattern = "^MT-")
human10_expression[["percent.mt"]] <- PercentageFeatureSet(human10_expression, pattern = "^MT-")
human11_expression[["percent.mt"]] <- PercentageFeatureSet(human11_expression, pattern = "^MT-")
human12_expression[["percent.mt"]] <- PercentageFeatureSet(human12_expression, pattern = "^MT-")
human13_expression[["percent.mt"]] <- PercentageFeatureSet(human13_expression, pattern = "^MT-")
human14_expression[["percent.mt"]] <- PercentageFeatureSet(human14_expression, pattern = "^MT-")
human15_expression[["percent.mt"]] <- PercentageFeatureSet(human15_expression, pattern = "^MT-")
human16_expression[["percent.mt"]] <- PercentageFeatureSet(human16_expression, pattern = "^MT-")
macaque1_expression[["percent.mt"]] <- PercentageFeatureSet(macaque1_expression, pattern = "^MT-")
macaque2_expression[["percent.mt"]] <- PercentageFeatureSet(macaque2_expression, pattern = "^MT-")
macaque_CN_78_1_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_CN_78_1_expression, pattern = "^MT-")
macaque_DFC_110_1_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_DFC_110_1_expression, pattern = "^MT-")
macaque_OFC_110_1_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_OFC_110_1_expression, pattern = "^MT-")
macaque_93_DFC_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_93_DFC_expression, pattern = "^MT-")
macaque_110_DFC_S2_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_110_DFC_S2_expression, pattern = "^MT-")
macaque_110_VFC_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_110_VFC_expression, pattern = "^MT-")
macaque_110_VFC_S2_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_110_VFC_S2_expression, pattern = "^MT-")
macaque_77_Frontal_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_77_Frontal_expression, pattern = "^MT-")
macaque_64_Frontal_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_64_Frontal_expression, pattern = "^MT-")
macaque_62_Frontal_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_62_Frontal_expression, pattern = "^MT-")
human1_expression = subset(human1_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human2_expression = subset(human2_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human3_expression = subset(human3_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human4_expression = subset(human4_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human5_expression = subset(human5_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human6_expression = subset(human6_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human7_expression = subset(human7_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human8_expression = subset(human8_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human9_expression = subset(human9_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human10_expression = subset(human10_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human11_expression = subset(human11_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human12_expression = subset(human12_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human13_expression = subset(human13_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human14_expression = subset(human14_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human15_expression = subset(human15_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human16_expression = subset(human16_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque1_expression = subset(macaque1_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque2_expression = subset(macaque2_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque_CN_78_1_expression = subset(macaque_CN_78_1_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque_DFC_110_1_expression = subset(macaque_DFC_110_1_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque_OFC_110_1_expression = subset(macaque_OFC_110_1_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque_93_DFC_expression = subset(macaque_93_DFC_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque_110_DFC_S2_expression= subset(macaque_110_DFC_S2_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque_110_VFC_expression= subset(macaque_110_VFC_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque_110_VFC_S2_expression= subset(macaque_110_VFC_S2_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque_77_Frontal_expression= subset(macaque_77_Frontal_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque_64_Frontal_expression= subset(macaque_64_Frontal_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque_62_Frontal_expression= subset(macaque_62_Frontal_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
alldata = merge( human1_expression,
c(human2_expression,
human3_expression,
human4_expression,
human5_expression,
human6_expression,
human7_expression,
human8_expression,
human9_expression,
human10_expression,
human11_expression,
human12_expression,
human13_expression,
human14_expression,
human15_expression,
human16_expression,
macaque1_expression,
macaque2_expression,
macaque_CN_78_1_expression,
macaque_DFC_110_1_expression,
macaque_OFC_110_1_expression,
macaque_93_DFC_expression,
macaque_110_DFC_S2_expression,
macaque_110_VFC_expression,
macaque_110_VFC_S2_expression,
macaque_77_Frontal_expression,
macaque_64_Frontal_expression,
macaque_62_Frontal_expression),
add.cell.ids = c('human1','human2','human3','human4','human5','human6','human7','human8','human9','human10','human11','human12','human13','human14','human15','human16',
"macaque1","macaque2","macaque_78_1","Macaque3_110_DFC","Macaque3_110_OFC","macaque_93_DFC_expression", "macaque_110_DFC_S2_expression", "macaque_110_VFC_expression", "macaque_110_VFC_S2_expression", "macaque_77_Frontal_expression", "macaque_64_Frontal_expression", "macaque_62_Frontal_expression"))
save(alldata,file=paste0(objects_directory,"scRNA_published_foetal_samples.RData"))load(paste0(objects_directory,"scRNA_published_foetal_samples.RData"))
split_seurat = SplitObject(alldata, split.by = "orig.ident")
human1 = perform_clustering_to_find_astrocytes(split_seurat[[1]])
human2 = perform_clustering_to_find_astrocytes(split_seurat[[2]])
human3 = perform_clustering_to_find_astrocytes(split_seurat[[3]])
human5 = perform_clustering_to_find_astrocytes(split_seurat[[5]])
human6 = perform_clustering_to_find_astrocytes(split_seurat[[6]])
human7 = perform_clustering_to_find_astrocytes(split_seurat[[7]])
human9 = perform_clustering_to_find_astrocytes(split_seurat[[9]])
human10 = perform_clustering_to_find_astrocytes(split_seurat[[10]])
human13 = perform_clustering_to_find_astrocytes(split_seurat[[13]])
human15 = perform_clustering_to_find_astrocytes(split_seurat[[15]])
human16 = perform_clustering_to_find_astrocytes(split_seurat[[16]])
## takes longer
human4 = perform_clustering_to_find_astrocytes(split_seurat[[4]]) # long
human8 = perform_clustering_to_find_astrocytes(split_seurat[[8]]) # long
human11 = perform_clustering_to_find_astrocytes(split_seurat[[11]]) # long
human12 = perform_clustering_to_find_astrocytes(split_seurat[[12]]) # long
human14 = perform_clustering_to_find_astrocytes(split_seurat[[14]]) # long
save( human1, file=paste0(objects_directory,"human1_scRNA.RData"))
save( human2, file=paste0(objects_directory,"human2_scRNA.RData"))
save( human3, file=paste0(objects_directory,"human3_scRNA.RData"))
save( human4, file=paste0(objects_directory,"human4_scRNA.RData"))
save( human5, file=paste0(objects_directory,"human5_scRNA.RData"))
save( human6, file=paste0(objects_directory,"human6_scRNA.RData"))
save( human7, file=paste0(objects_directory,"human7_scRNA.RData"))
save( human8, file=paste0(objects_directory,"human8_scRNA.RData"))
save( human9, file=paste0(objects_directory,"human9_scRNA.RData"))
save( human10, file=paste0(objects_directory,"human10_scRNA.RData"))
save( human11, file=paste0(objects_directory,"human11_scRNA.RData"))
save( human12, file=paste0(objects_directory,"human12_scRNA.RData"))
save( human13, file=paste0(objects_directory,"human13_scRNA.RData"))
save( human14, file=paste0(objects_directory,"human14_scRNA.RData"))
save( human15, file=paste0(objects_directory,"human15_scRNA.RData"))
save( human16, file=paste0(objects_directory,"human16_scRNA.RData"))
objects_directory="~/Desktop/Ciuba_et_al_SM/data/objects/"
macaque1 = perform_clustering_to_find_astrocytes(split_seurat[["Macaque1"]])
save( macaque1, file=paste0(objects_directory,"macaque1_scRNA.RData"))
macaque2 = perform_clustering_to_find_astrocytes(split_seurat[["Macaque2"]])
save( macaque2, file=paste0(objects_directory,"macaque2_scRNA.RData"))
macaque3 = perform_clustering_to_find_astrocytes(split_seurat[["Macaque3_78_1"]])
save( macaque3, file=paste0(objects_directory,"macaque3_scRNA.RData"))
macaque4 = perform_clustering_to_find_astrocytes(split_seurat[["Macaque3_110_DFC"]])
save( macaque4, file=paste0(objects_directory,"macaque4_scRNA.RData"))
macaque5 = perform_clustering_to_find_astrocytes(split_seurat[["Macaque3_110_OFC"]])
save( macaque5, file=paste0(objects_directory,"macaque5_scRNA.RData"))
macaque6 = perform_clustering_to_find_astrocytes(split_seurat[["Macaque3_93_DFC"]])
save( macaque6, file=paste0(objects_directory,"macaque6_scRNA.RData"))
macaque7 = perform_clustering_to_find_astrocytes(split_seurat[["Macaque3_110_DFC_S2"]])
save( macaque7, file=paste0(objects_directory,"macaque7_scRNA.RData"))
macaque8 = perform_clustering_to_find_astrocytes(split_seurat[["Macaque3_110_VFC"]])
save( macaque8, file=paste0(objects_directory,"macaque8_scRNA.RData"))
macaque9 = perform_clustering_to_find_astrocytes(split_seurat[["Macaque3_110_VFC_S2"]])
save( macaque9, file=paste0(objects_directory,"macaque9_scRNA.RData"))
macaque10 = perform_clustering_to_find_astrocytes(split_seurat[["Macaque3_77_Frontal"]])
save( macaque10, file=paste0(objects_directory,"macaque10_scRNA.RData"))
macaque11 = perform_clustering_to_find_astrocytes(split_seurat[["Macaque3_64_Frontal"]])
save( macaque11, file=paste0(objects_directory,"macaque11_scRNA.RData"))
macaque12 = perform_clustering_to_find_astrocytes(split_seurat[["Macaque3_62_Frontal"]])
save( macaque12, file=paste0(objects_directory,"macaque12_scRNA.RData"))load(paste0(objects_directory,"human1_scRNA.RData"))
load(paste0(objects_directory,"human2_scRNA.RData"))
load(paste0(objects_directory,"human3_scRNA.RData"))
load(paste0(objects_directory,"human4_scRNA.RData"))
load(paste0(objects_directory,"human5_scRNA.RData"))
load(paste0(objects_directory,"human6_scRNA.RData"))
load(paste0(objects_directory,"human7_scRNA.RData"))
load(paste0(objects_directory,"human8_scRNA.RData"))
load(paste0(objects_directory,"human9_scRNA.RData"))
load(paste0(objects_directory,"human10_scRNA.RData"))
load(paste0(objects_directory,"human11_scRNA.RData"))
load(paste0(objects_directory,"human12_scRNA.RData"))
load(paste0(objects_directory,"human13_scRNA.RData"))
load(paste0(objects_directory,"human14_scRNA.RData"))
load(paste0(objects_directory,"human15_scRNA.RData"))
load(paste0(objects_directory,"human16_scRNA.RData"))
human1_astrocyte_counts = findClusterCorrespondingToAstrocytes(human1,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
human2_astrocyte_counts = findClusterCorrespondingToAstrocytes(human2,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
human3_astrocyte_counts = findClusterCorrespondingToAstrocytes(human3,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
human4_astrocyte_counts = findClusterCorrespondingToAstrocytes(human4,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
human5_astrocyte_counts = findClusterCorrespondingToAstrocytes(human5,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
human6_astrocyte_counts = findClusterCorrespondingToAstrocytes(human6,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
human7_astrocyte_counts = findClusterCorrespondingToAstrocytes(human7,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
human8_astrocyte_counts = findClusterCorrespondingToAstrocytes(human8,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
human9_astrocyte_counts = findClusterCorrespondingToAstrocytes(human9,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
human10_astrocyte_counts = findClusterCorrespondingToAstrocytes(human10,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
human11_astrocyte_counts = findClusterCorrespondingToAstrocytes(human11,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
human12_astrocyte_counts = findClusterCorrespondingToAstrocytes(human12,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
human13_astrocyte_counts = findClusterCorrespondingToAstrocytes(human13,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
human14_astrocyte_counts = findClusterCorrespondingToAstrocytes(human14,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
human15_astrocyte_counts = findClusterCorrespondingToAstrocytes(human15,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
human16_astrocyte_counts = findClusterCorrespondingToAstrocytes(human16,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
save( human1_astrocyte_counts, human2_astrocyte_counts, human3_astrocyte_counts, human4_astrocyte_counts, human5_astrocyte_counts,
human6_astrocyte_counts, human7_astrocyte_counts, human8_astrocyte_counts, human9_astrocyte_counts, human10_astrocyte_counts, human11_astrocyte_counts, human12_astrocyte_counts, human13_astrocyte_counts, human14_astrocyte_counts, human15_astrocyte_counts, human16_astrocyte_counts, file=paste0(objects_directory,"human_scRNA_pseudobulk_data.RData"))
human1_astrocyte = getAstrocytes(human1,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 153
human2_astrocyte = getAstrocytes(human2,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 276
human3_astrocyte = getAstrocytes(human3,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 252
human4_astrocyte = getAstrocytes(human4,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 280
human5_astrocyte = getAstrocytes(human5,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 153
human6_astrocyte = getAstrocytes(human6,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 85
human7_astrocyte = getAstrocytes(human7,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 41
human8_astrocyte = getAstrocytes(human8,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 205
human9_astrocyte = getAstrocytes(human9,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 40
human10_astrocyte = getAstrocytes(human10,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 153
human11_astrocyte = getAstrocytes(human11,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 153
human12_astrocyte = getAstrocytes(human12,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 153
human13_astrocyte = getAstrocytes(human13,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 153
human14_astrocyte = getAstrocytes(human14,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 153
human15_astrocyte = getAstrocytes(human15,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 153
human16_astrocyte = getAstrocytes(human16,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 153human1_astrocyte = getAstrocytes(human1,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
[1] "astrocytes are in cluster 26 found 153 astrocytes"
> human2_astrocyte = getAstrocytes(human2,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
[1] "astrocytes are in cluster 9 found 276 astrocytes"
> human3_astrocyte = getAstrocytes(human3,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
[1] "astrocytes are in cluster 10 found 252 astrocytes"
> human4_astrocyte = getAstrocytes(human4,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
[1] "astrocytes are in cluster 22 found 280 astrocytes"
> human5_astrocyte = getAstrocytes(human5,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
[1] "astrocytes are in cluster 26 found 153 astrocytes"
> human6_astrocyte = getAstrocytes(human6,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
[1] "astrocytes are in cluster 20 found 85 astrocytes"
> human7_astrocyte = getAstrocytes(human7,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
[1] "astrocytes are in cluster 23 found 41 astrocytes"
> human8_astrocyte = getAstrocytes(human8,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
[1] "astrocytes are in cluster 21 found 205 astrocytes"
> human9_astrocyte = getAstrocytes(human9,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
[1] "astrocytes are in cluster 22 found 40 astrocytes"
> human10_astrocyte = getAstrocytes(human10,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
[1] "astrocytes are in cluster 21 found 82 astrocytes"
> human11_astrocyte = getAstrocytes(human11,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
[1] "astrocytes are in cluster 31 found 259 astrocytes"
> human12_astrocyte = getAstrocytes(human12,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
[1] "astrocytes are in cluster 27 found 96 astrocytes"
> human13_astrocyte = getAstrocytes(human13,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
[1] "astrocytes are in cluster 12 found 112 astrocytes"load(paste0(objects_directory,"human_scRNA_pseudobulk_data.RData"))
human_astrocyte_counts = data.frame(human1=human1_astrocyte_counts,
human2=human2_astrocyte_counts,
human3=human3_astrocyte_counts,
human4=human4_astrocyte_counts,
human5=human5_astrocyte_counts,
human6=human6_astrocyte_counts,
human7=human7_astrocyte_counts,
human8=human8_astrocyte_counts,
human9=human9_astrocyte_counts,
human10=human10_astrocyte_counts,
human11=human11_astrocyte_counts,
human12=human12_astrocyte_counts,
human13=human13_astrocyte_counts,
human14=human14_astrocyte_counts,
human15=human15_astrocyte_counts,
human16=human16_astrocyte_counts,
row.names = names(human1_astrocyte_counts))
human_astrocyte_counts_metadata = data.frame(Species=rep("Human",ncol(human_astrocyte_counts)),
Human_NHP=rep("Human",ncol(human_astrocyte_counts)),
study=rep("Mixed",ncol(human_astrocyte_counts)),
stage=rep("Foetal",ncol(human_astrocyte_counts)),
row.names=colnames(human_astrocyte_counts))
save( human_astrocyte_counts, human_astrocyte_counts_metadata,
file=paste0(objects_directory,"human_astrocyte_counts.RData"))load(paste0(objects_directory,"macaque1_scRNA.RData"))
load(paste0(objects_directory,"macaque2_scRNA.RData"))
load(paste0(objects_directory,"macaque3_scRNA.RData"))
load(paste0(objects_directory,"macaque4_scRNA.RData"))
load(paste0(objects_directory,"macaque5_scRNA.RData"))
load(paste0(objects_directory,"macaque6_scRNA.RData"))
load(paste0(objects_directory,"macaque7_scRNA.RData"))
load(paste0(objects_directory,"macaque8_scRNA.RData"))
load(paste0(objects_directory,"macaque9_scRNA.RData"))
load(paste0(objects_directory,"macaque10_scRNA.RData"))
load(paste0(objects_directory,"macaque11_scRNA.RData"))
load(paste0(objects_directory,"macaque12_scRNA.RData"))
macaque1_astrocyte_counts = findClusterCorrespondingToAstrocytes(macaque1,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
macaque2_astrocyte_counts = findClusterCorrespondingToAstrocytes(macaque2,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
macaque3_astrocyte_counts = findClusterCorrespondingToAstrocytes(macaque3,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
save( macaque1_astrocyte_counts, macaque2_astrocyte_counts, macaque3_astrocyte_counts,
file="~/Desktop/macaques123.RData")
rm(list=c("macaque1","macaque2","macaque3"))
gc()
macaque4_astrocyte_counts = findClusterCorrespondingToAstrocytes(macaque4,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
macaque5_astrocyte_counts = findClusterCorrespondingToAstrocytes(macaque5,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
save( macaque4_astrocyte_counts, macaque5_astrocyte_counts,
file="~/Desktop/macaques45.RData")
rm(list=c("macaque4","macaque5"))
gc()
macaque6_astrocyte_counts = findClusterCorrespondingToAstrocytes(macaque6,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
macaque7_astrocyte_counts = findClusterCorrespondingToAstrocytes(macaque7,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
save( macaque6_astrocyte_counts, macaque7_astrocyte_counts,
file="~/Desktop/macaques67.RData")
rm(list=c("macaque6","macaque7"))
gc()
macaque8_astrocyte_counts = findClusterCorrespondingToAstrocytes(macaque8,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
macaque9_astrocyte_counts = findClusterCorrespondingToAstrocytes(macaque9,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
save( macaque8_astrocyte_counts,macaque9_astrocyte_counts,
file="~/Desktop/macaques89.RData")
rm(list=c("macaque8","macaque9"))
gc()
macaque10_astrocyte_counts = findClusterCorrespondingToAstrocytes(macaque10,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
macaque11_astrocyte_counts = findClusterCorrespondingToAstrocytes(macaque11,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
save( macaque10_astrocyte_counts,macaque11_astrocyte_counts,
file="~/Desktop/macaques10_11.RData")
rm(list=c("macaque10","macaque11"))
gc()
macaque12_astrocyte_counts = findClusterCorrespondingToAstrocytes(macaque12,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
save( macaque12_astrocyte_counts,
file="~/Desktop/macaques_12.RData")
rm(list=c("macaque12"))
gc()
macaque1_astrocyte = getAstrocytes(macaque1,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
macaque2_astrocyte = getAstrocytes(macaque2,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
macaque3_astrocyte = getAstrocytes(macaque3,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
macaque4_astrocyte = getAstrocytes(macaque4,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
macaque5_astrocyte = getAstrocytes(macaque5,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
macaque6_astrocyte = getAstrocytes(macaque6,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
macaque7_astrocyte = getAstrocytes(macaque7,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
macaque8_astrocyte = getAstrocytes(macaque8,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
macaque9_astrocyte = getAstrocytes(macaque9,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
macaque10_astrocyte = getAstrocytes(macaque10,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
macaque11_astrocyte = getAstrocytes(macaque11,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
macaque12_astrocyte = getAstrocytes(macaque12,chosenClusterSet = "RNA_snn_res.2",
astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
macaque_astrocyte_counts = data.frame(macaque1_late=macaque1_astrocyte_counts, # 807
macaque2_late=macaque2_astrocyte_counts, # 36
Macaque3_78_1=macaque3_astrocyte_counts, # 687
Macaque3_110_DFC=macaque4_astrocyte_counts, # 783
Macaque3_110_OFC=macaque5_astrocyte_counts, # 114
Macaque3_93_DFC=macaque6_astrocyte_counts, # 613
Macaque3_110_DFC_S2=macaque7_astrocyte_counts, # 37
Macaque3_110_VFC=macaque8_astrocyte_counts, # 713
Macaque3_110_VFC_S2=macaque9_astrocyte_counts, # 265
Macaque3_77_Frontal=macaque10_astrocyte_counts, # 441
Macaque3_64_Frontal=macaque11_astrocyte_counts, # 314
Macaque3_62_Frontal=macaque12_astrocyte_counts, # 246
row.names = names(macaque1_astrocyte_counts))
macaque_astrocyte_counts_metadata = data.frame(Species=rep("Macaque",ncol(macaque_astrocyte_counts)),
Human_NHP=rep("NHP",ncol(macaque_astrocyte_counts)),
study=rep("Mixed",ncol(macaque_astrocyte_counts)),
stage=rep("Foetal",ncol(macaque_astrocyte_counts)),
row.names=colnames(macaque_astrocyte_counts))
save( macaque_astrocyte_counts, macaque_astrocyte_counts_metadata,
file=paste0(objects_directory,"macaque_astrocyte_counts.RData"))“human14”,“human7”,“human13” here we find only few astrocytes, we remove these samples from the analysis.
load(paste0(objects_directory,"macaque_astrocyte_counts.RData"))
load(paste0(objects_directory,"human_astrocyte_counts.RData"))
all(rownames(macaque_astrocyte_counts)==rownames(human_astrocyte_counts))## [1] TRUE
stitched_counts = data.frame( macaque_astrocyte_counts, human_astrocyte_counts )
st_metadata = rbind( macaque_astrocyte_counts_metadata,human_astrocyte_counts_metadata )
all(colnames(stitched_counts)==rownames(st_metadata))## [1] TRUE
data = DESeqDataSetFromMatrix( countData = stitched_counts[,! colnames(stitched_counts) %in% c("human14","human7","human13")],
colData = st_metadata[! rownames(st_metadata) %in% c("human14","human7","human13"),],
design = ~ Human_NHP )## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
data = estimateSizeFactors(data)
data = estimateDispersions(data, fitType = "local")## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
data = DESeq(data, fitType = 'local')## using pre-existing size factors
## estimating dispersions
## found already estimated dispersions, replacing these
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 89 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
degs = results(data, contrast = c("Human_NHP","Human", "NHP") )
human_macaque_fetal_norm_counts = counts(data,normalized=TRUE)
human_macaque_fetal_unnorm_counts = counts(data,normalized=FALSE)
save(data,degs,
human_macaque_fetal_norm_counts,
human_macaque_fetal_unnorm_counts,
file=paste0(objects_directory,"pseudobulk_published_scRNA_Foetal.RData"))degs = degs[! is.na(degs$padj), ]
degs_01 = degs[ degs$padj < 0.1, ]
sum( degs_01$log2FoldChange>0 )## [1] 5212
sum( degs_01$log2FoldChange<0 )## [1] 6219
We confirm (87/237) 36% of up-regulated genes and 28% (104/301) of down-regulated genes.
up.hits.ensids = read.delim(paste0(outputs_directory,"up_engs.txt"),as.is=TRUE, header=FALSE)
dn.hits.ensids = read.delim(paste0(outputs_directory,"dn_engs.txt"),as.is=TRUE, header=FALSE)
up.hits.geneN = unique(genemap$hgnc_symbol[genemap$ensembl_gene_id %in% up.hits.ensids$V1])
dn.hits.geneN = unique(genemap$hgnc_symbol[genemap$ensembl_gene_id %in% dn.hits.ensids$V1])
up.hits.geneN_filt = up.hits.geneN[up.hits.geneN %in% rownames(degs)]
dn.hits.geneN_filt = dn.hits.geneN[dn.hits.geneN %in% rownames(degs)]
length(up.hits.geneN_filt)## [1] 238
length(dn.hits.geneN_filt)## [1] 301
degs_us_up = degs[rownames(degs) %in% up.hits.geneN_filt,]
degs_us_dn = degs[rownames(degs) %in% dn.hits.geneN_filt,]
degs_us_up[degs_us_up$padj<0.1 & degs_us_up$log2FoldChange>0,]## log2 fold change (MLE): Human_NHP Human vs NHP
## Wald test p-value: Human_NHP Human vs NHP
## DataFrame with 87 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue padj
## <numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
## SCNN1D 6.61796 5.252182 0.580160 9.05299 1.39105e-19 9.86892e-19
## CDK11A 35.76199 2.444782 0.184597 13.24392 4.89269e-40 9.54270e-39
## SLC35E2A 58.70908 2.730201 0.189547 14.40384 4.89453e-47 1.27041e-45
## H6PD 20.64759 1.287565 0.170613 7.54670 4.46417e-14 2.23169e-13
## DFFA 17.20583 0.636637 0.168038 3.78865 1.51468e-04 3.34767e-04
## ... ... ... ... ... ... ...
## ADA2 8.39128 2.439363 0.293316 8.31651 9.05931e-17 5.33378e-16
## LZTR1 25.92802 0.882482 0.168153 5.24808 1.53688e-07 4.68215e-07
## C1QTNF6 7.36232 1.673785 0.369422 4.53082 5.87541e-06 1.52153e-05
## MT-ATP8 24.80009 8.290334 0.666553 12.43761 1.63309e-35 2.54872e-34
## MT-ATP6 15.51741 7.188377 0.712925 10.08294 6.57311e-24 5.92171e-23
degs_us_dn[degs_us_dn$padj<0.1 & degs_us_dn$log2FoldChange<0,]## log2 fold change (MLE): Human_NHP Human vs NHP
## Wald test p-value: Human_NHP Human vs NHP
## DataFrame with 104 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## SRSF10 104.4759 -0.932483 0.197947 -4.71077 0.000002467874236896
## RCAN3 15.2877 -2.830897 0.421129 -6.72216 0.000000000017904770
## PDIK1L 14.7172 -1.528401 0.215272 -7.09987 0.000000000001248702
## SRSF4 109.6152 -0.319722 0.169547 -1.88574 0.059329607370328094
## PRPF38A 32.0105 -1.528840 0.200637 -7.61992 0.000000000000025384
## ... ... ... ... ... ...
## ZNF776 24.28439 -2.44103 0.202366 -12.06248 1.66682e-33
## OLIG2 23.58954 -2.73026 0.728066 -3.75002 1.76820e-04
## DONSON 10.98414 -1.23557 0.311389 -3.96792 7.25017e-05
## MT-CO3 138.76363 -4.75420 0.436041 -10.90310 1.11399e-27
## C1GALT1C1L 1.77877 -3.83977 0.630962 -6.08558 1.16069e-09
## padj
## <numeric>
## SRSF10 0.000006656184798295
## RCAN3 0.000000000075424177
## PDIK1L 0.000000000005675825
## SRSF4 0.089430349211630344
## PRPF38A 0.000000000000128673
## ... ...
## ZNF776 2.37697e-32
## OLIG2 3.88128e-04
## DONSON 1.66126e-04
## MT-CO3 1.20738e-26
## C1GALT1C1L 4.24191e-09
conf_up = rownames(degs_us_up[degs_us_up$padj<0.1 & degs_us_up$log2FoldChange>0,])
conf_down = rownames(degs_us_dn[degs_us_dn$padj<0.1 & degs_us_dn$log2FoldChange<0,])
barplot( c(length(conf_up)/length(up.hits.geneN_filt),
length(conf_down)/length(dn.hits.geneN_filt)),
col=c("green4","wheat3"), ylim=c(0,0.5),ylab="Fraction",
names=c("Up","Down"),xlab="EAGs")
axis(2,lwd=1)Boxplots of chosen genes
sa=st_metadata[! rownames(st_metadata) %in% c("human14","human7","human13"),]
plotAGene = function( ct, gene, sa, cols ){
# ct = human_macaque_fetal_norm_counts; gene="CTCF"
# sa = st_metadata[! rownames(st_metadata) %in% c("human14","human7","human13"),]
# cols = c("black","blue")
x = split( ct[rownames(ct)==gene,], sa$Species )[c("Human","Macaque")]
boxplot(x,border=cols,main=gene,col="white")
}
degs["CTCF",]## log2 fold change (MLE): Human_NHP Human vs NHP
## Wald test p-value: Human_NHP Human vs NHP
## DataFrame with 1 row and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue padj
## <numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
## CTCF 73.2475 -0.458664 0.186933 -2.45363 0.0141422 0.0240199
plotAGene( human_macaque_fetal_norm_counts, "CTCF", sa, c("black","blue")) # P=0.0147813 degs["TEAD3",]## log2 fold change (MLE): Human_NHP Human vs NHP
## Wald test p-value: Human_NHP Human vs NHP
## DataFrame with 1 row and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## TEAD3 11.8658 1.36589 0.24607 5.55085 0.0000000284291
## padj
## <numeric>
## TEAD3 0.0000000923959
plotAGene( human_macaque_fetal_norm_counts, "TEAD3", sa, c("black","blue")) # P=3.20079e-08 kanton_hits = read.delim(paste0(outputs_directory,"Supplementary_Table_15_human_DE.txt"))
kanton_hits_up = kanton_hits[kanton_hits$Average.expression..human.>kanton_hits$Average.expression..chimp.,]
kanton_hits_dn = kanton_hits[kanton_hits$Average.expression..human.<kanton_hits$Average.expression..chimp.,]
kanton_hits_up = kanton_hits_up$Symbol
kanton_hits_dn = kanton_hits_dn$Symbolhs_pt = read.delim( paste0(outputs_directory,'Jorstad_tables/Astro_human_vs_chimp_sig_genes.txt'),sep=",")
hs_pp = read.delim( paste0(outputs_directory,'Jorstad_tables/Astro_human_vs_gorilla_sig_genes.txt'),sep=",")
hs_rm = read.delim( paste0(outputs_directory,'Jorstad_tables/Astro_human_vs_rhesus_sig_genes.txt'), sep=",")
hs_cj = read.delim( paste0(outputs_directory,'Jorstad_tables/Astro_human_vs_marmoset_sig_genes.txt'),sep=",")
hs_pt = hs_pt[!is.na(hs_pt$padj),]
hs_pp = hs_pp[!is.na(hs_pp$padj),]
hs_rm = hs_rm[!is.na(hs_rm$padj),]
hs_cj = hs_cj[!is.na(hs_cj$padj),]
# identify downregulated and up regulated genes
thr=0
down1 = hs_pt$gene[hs_pt$log2FoldChange<(-1*thr) & hs_pt$padj<0.1]
down2 = hs_pp$gene[hs_pp$log2FoldChange<(-1*thr) & hs_pp$padj<0.1]
down3 = hs_rm$gene[hs_rm$log2FoldChange<(-1*thr) & hs_rm$padj<0.1]
down4 = hs_cj$gene[hs_cj$log2FoldChange<(-1*thr) & hs_cj$padj<0.1]
up1 = hs_pt$gene[hs_pt$log2FoldChange>thr & hs_pt$padj<0.1]
up2 = hs_pp$gene[hs_pp$log2FoldChange>thr & hs_pp$padj<0.1]
up3 = hs_rm$gene[hs_rm$log2FoldChange>thr & hs_rm$padj<0.1]
up4 = hs_cj$gene[hs_cj$log2FoldChange>thr & hs_cj$padj<0.1]
down1234=down1[down1 %in% down2[down2 %in% down3[down3 %in% down4]]]
up1234=up1[up1 %in% up2[up2 %in% up3[up3 %in% up4]]]
down12=down1[down1 %in% down2]
up12=up1[up1 %in% up2]
down123=down1[down1 %in% down2[down2 %in% down3]]
up123=up1[up1 %in% up2[up2 %in% up3]]these are the tables I obtained from Shaojie Ma directly.
load(paste0(outputs_directory,'/Wilcox_DEG_results_raw.Rdata'))
deg_species_filt = deg_species[deg_species$cluster=="Astro" & deg_species$p_val_adj<0.01,]
#############################
deg_species_filt_astro_hs = deg_species_filt[log2(deg_species_filt$ratio_fc)>0,]
deg_species_filt_astro_hs = deg_species_filt_astro_hs[deg_species_filt_astro_hs$species1=="Human" & deg_species_filt_astro_hs$species2 %in% c("Chimpanzee","Rhesus"),]
up_genes = table(deg_species_filt_astro_hs$gene)
up_genes = names(up_genes[up_genes>1])
#############################
deg_species_filt_astro_hs = deg_species_filt[log2(deg_species_filt$ratio_fc)<(0),]
deg_species_filt_astro_hs = deg_species_filt_astro_hs[deg_species_filt_astro_hs$species1=="Human" & deg_species_filt_astro_hs$species2 %in% c("Chimpanzee","Rhesus"),]
dn_genes = table(deg_species_filt_astro_hs$gene)
dn_genes = names(dn_genes[dn_genes>1])
any( up_genes %in% dn_genes )## [1] FALSE
length(up_genes)## [1] 1429
length(dn_genes)## [1] 1123
all_up_all = unique( c(kanton_hits_up,up123,up_genes,up.hits.geneN,conf_up))
all_up_all = data.frame( Kanton = all_up_all %in% kanton_hits_up,
Jorstad = all_up_all %in% up123,
Ma = all_up_all %in% up_genes,
Foetal = all_up_all %in% conf_up,
Ciuba = all_up_all %in% up.hits.geneN,
row.names = all_up_all)
all_up_all = all_up_all[all_up_all$Ciuba & rowSums(all_up_all[,1:4])>0,]
all_up_all## Kanton Jorstad Ma Foetal Ciuba
## NBPF11 TRUE FALSE FALSE TRUE TRUE
## NBPF14 TRUE FALSE FALSE TRUE TRUE
## PABPC1L TRUE FALSE FALSE TRUE TRUE
## PALLD TRUE TRUE TRUE TRUE TRUE
## PCAT6 TRUE FALSE FALSE FALSE TRUE
## PIGZ TRUE TRUE FALSE TRUE TRUE
## SCNN1D TRUE FALSE FALSE TRUE TRUE
## SCRG1 TRUE TRUE TRUE FALSE TRUE
## THBS4 TRUE FALSE TRUE TRUE TRUE
## PAGR1 FALSE TRUE FALSE TRUE TRUE
## STK33 FALSE TRUE TRUE TRUE TRUE
## VKORC1 FALSE TRUE FALSE FALSE TRUE
## AQP1 FALSE TRUE TRUE FALSE TRUE
## MTCH1 FALSE TRUE TRUE FALSE TRUE
## PRDX6 FALSE TRUE TRUE FALSE TRUE
## RMDN1 FALSE TRUE FALSE FALSE TRUE
## RANGRF FALSE TRUE FALSE FALSE TRUE
## GUK1 FALSE TRUE TRUE FALSE TRUE
## ATP6V1E2 FALSE TRUE TRUE TRUE TRUE
## S100A13 FALSE TRUE FALSE FALSE TRUE
## FAM228B FALSE TRUE FALSE FALSE TRUE
## LIN7A FALSE TRUE FALSE TRUE TRUE
## ACACA FALSE FALSE TRUE TRUE TRUE
## BAIAP3 FALSE FALSE TRUE TRUE TRUE
## C1orf54 FALSE FALSE TRUE TRUE TRUE
## C1QTNF6 FALSE FALSE TRUE TRUE TRUE
## C22orf46 FALSE FALSE TRUE FALSE TRUE
## CPS1 FALSE FALSE TRUE FALSE TRUE
## DGCR6L FALSE FALSE TRUE FALSE TRUE
## EFHD1 FALSE FALSE TRUE TRUE TRUE
## GFPT2 FALSE FALSE TRUE TRUE TRUE
## GTF3C5 FALSE FALSE TRUE TRUE TRUE
## HSPB1 FALSE FALSE TRUE FALSE TRUE
## MLH1 FALSE FALSE TRUE TRUE TRUE
## MMP19 FALSE FALSE TRUE FALSE TRUE
## MOV10 FALSE FALSE TRUE TRUE TRUE
## NDUFV1 FALSE FALSE TRUE FALSE TRUE
## NR1H3 FALSE FALSE TRUE TRUE TRUE
## PDLIM7 FALSE FALSE TRUE FALSE TRUE
## RHOBTB3 FALSE FALSE TRUE FALSE TRUE
## SIRT3 FALSE FALSE TRUE TRUE TRUE
## STYXL1 FALSE FALSE TRUE TRUE TRUE
## TCF25 FALSE FALSE TRUE TRUE TRUE
## TCTN3 FALSE FALSE TRUE TRUE TRUE
## TMEM9B-AS1 FALSE FALSE TRUE FALSE TRUE
## TRIP6 FALSE FALSE TRUE FALSE TRUE
## TSR3 FALSE FALSE TRUE FALSE TRUE
## VIM FALSE FALSE TRUE FALSE TRUE
## ZNF266 FALSE FALSE TRUE TRUE TRUE
## ZNHIT3 FALSE FALSE TRUE FALSE TRUE
## CDK11A FALSE FALSE FALSE TRUE TRUE
## H6PD FALSE FALSE FALSE TRUE TRUE
## DFFA FALSE FALSE FALSE TRUE TRUE
## PAQR7 FALSE FALSE FALSE TRUE TRUE
## SRGAP2B FALSE FALSE FALSE TRUE TRUE
## HHLA3 FALSE FALSE FALSE TRUE TRUE
## SLC35E2A FALSE FALSE FALSE TRUE TRUE
## NBPF1 FALSE FALSE FALSE TRUE TRUE
## NBPF15 FALSE FALSE FALSE TRUE TRUE
## NBPF9 FALSE FALSE FALSE TRUE TRUE
## NBPF19 FALSE FALSE FALSE TRUE TRUE
## NBPF26 FALSE FALSE FALSE TRUE TRUE
## ACOX3 FALSE FALSE FALSE TRUE TRUE
## CBR4 FALSE FALSE FALSE TRUE TRUE
## TMEM129 FALSE FALSE FALSE TRUE TRUE
## SULT1C4 FALSE FALSE FALSE TRUE TRUE
## TEAD3 FALSE FALSE FALSE TRUE TRUE
## MAN2B2 FALSE FALSE FALSE TRUE TRUE
## RIPK1 FALSE FALSE FALSE TRUE TRUE
## SRD5A1 FALSE FALSE FALSE TRUE TRUE
## WDR27 FALSE FALSE FALSE TRUE TRUE
## INSYN2B FALSE FALSE FALSE TRUE TRUE
## C1QTNF3-AMACR FALSE FALSE FALSE TRUE TRUE
## ABCB4 FALSE FALSE FALSE TRUE TRUE
## CCND3 FALSE FALSE FALSE TRUE TRUE
## MAPKAP1 FALSE FALSE FALSE TRUE TRUE
## NUP43 FALSE FALSE FALSE TRUE TRUE
## PDE1C FALSE FALSE FALSE TRUE TRUE
## POLR2J3 FALSE FALSE FALSE TRUE TRUE
## ADAM9 FALSE FALSE FALSE TRUE TRUE
## EPHB4 FALSE FALSE FALSE TRUE TRUE
## COL27A1 FALSE FALSE FALSE TRUE TRUE
## SPDYE3 FALSE FALSE FALSE TRUE TRUE
## OSBPL5 FALSE FALSE FALSE TRUE TRUE
## CUBN FALSE FALSE FALSE TRUE TRUE
## ELMOD1 FALSE FALSE FALSE TRUE TRUE
## SHLD2 FALSE FALSE FALSE TRUE TRUE
## EML3 FALSE FALSE FALSE TRUE TRUE
## TIMM23B-AGAP6 FALSE FALSE FALSE TRUE TRUE
## AGAP4 FALSE FALSE FALSE TRUE TRUE
## FAM111B FALSE FALSE FALSE TRUE TRUE
## TIMM23B FALSE FALSE FALSE TRUE TRUE
## AGAP9 FALSE FALSE FALSE TRUE TRUE
## DGKA FALSE FALSE FALSE TRUE TRUE
## DHRS12 FALSE FALSE FALSE TRUE TRUE
## RFLNA FALSE FALSE FALSE TRUE TRUE
## LTB4R FALSE FALSE FALSE TRUE TRUE
## LPCAT2 FALSE FALSE FALSE TRUE TRUE
## CNTNAP1 FALSE FALSE FALSE TRUE TRUE
## ADCY9 FALSE FALSE FALSE TRUE TRUE
## SLCO3A1 FALSE FALSE FALSE TRUE TRUE
## NPIPA1 FALSE FALSE FALSE TRUE TRUE
## ADA2 FALSE FALSE FALSE TRUE TRUE
## MAN2B1 FALSE FALSE FALSE TRUE TRUE
## CARD8 FALSE FALSE FALSE TRUE TRUE
## SLC66A2 FALSE FALSE FALSE TRUE TRUE
## ZNF486 FALSE FALSE FALSE TRUE TRUE
## GYG2 FALSE FALSE FALSE TRUE TRUE
## LZTR1 FALSE FALSE FALSE TRUE TRUE
## MT-ATP6 FALSE FALSE FALSE TRUE TRUE
## MT-ATP8 FALSE FALSE FALSE TRUE TRUE
all_dn_all = unique( c(kanton_hits_dn,down123,dn_genes,dn.hits.geneN,conf_down))
all_dn_all = data.frame( Kanton = all_dn_all %in% kanton_hits_dn,
Jorstad = all_dn_all %in% down123,
Ma = all_dn_all %in% dn_genes,
foetal = all_dn_all %in% conf_down,
Ciuba = all_dn_all %in% dn.hits.geneN,
row.names = all_dn_all)
all_dn_all = all_dn_all[all_dn_all$Ciuba & rowSums(all_dn_all[,1:4])>0,]
all_dn_all## Kanton Jorstad Ma foetal Ciuba
## CELF4 TRUE FALSE FALSE FALSE TRUE
## FGF13 TRUE FALSE FALSE FALSE TRUE
## SYN1 TRUE FALSE TRUE TRUE TRUE
## PDZRN4 FALSE TRUE TRUE TRUE TRUE
## PLCL2 FALSE TRUE TRUE FALSE TRUE
## SRSF4 FALSE TRUE FALSE TRUE TRUE
## PBLD FALSE TRUE TRUE TRUE TRUE
## GABPB1 FALSE TRUE TRUE FALSE TRUE
## UNC5D FALSE TRUE TRUE TRUE TRUE
## RAPGEF5 FALSE TRUE FALSE FALSE TRUE
## DCC FALSE TRUE TRUE FALSE TRUE
## ATP8A2 FALSE TRUE TRUE FALSE TRUE
## PANK3 FALSE TRUE FALSE TRUE TRUE
## RCAN3 FALSE TRUE TRUE TRUE TRUE
## MAP3K2 FALSE TRUE FALSE FALSE TRUE
## NUDT4 FALSE TRUE FALSE TRUE TRUE
## RND3 FALSE TRUE TRUE TRUE TRUE
## SPAST FALSE TRUE TRUE FALSE TRUE
## FBXO11 FALSE TRUE FALSE FALSE TRUE
## ACIN1 FALSE FALSE TRUE FALSE TRUE
## AHCTF1 FALSE FALSE TRUE FALSE TRUE
## ATAD2B FALSE FALSE TRUE FALSE TRUE
## CECR2 FALSE FALSE TRUE FALSE TRUE
## CEP104 FALSE FALSE TRUE FALSE TRUE
## CREBRF FALSE FALSE TRUE FALSE TRUE
## CSPP1 FALSE FALSE TRUE FALSE TRUE
## DONSON FALSE FALSE TRUE TRUE TRUE
## DYRK2 FALSE FALSE TRUE TRUE TRUE
## EED FALSE FALSE TRUE TRUE TRUE
## EFL1 FALSE FALSE TRUE TRUE TRUE
## ERCC6L2 FALSE FALSE TRUE FALSE TRUE
## FBXW7 FALSE FALSE TRUE FALSE TRUE
## GRK4 FALSE FALSE TRUE FALSE TRUE
## INA FALSE FALSE TRUE TRUE TRUE
## INSR FALSE FALSE TRUE FALSE TRUE
## KAT6A FALSE FALSE TRUE FALSE TRUE
## KLHL24 FALSE FALSE TRUE FALSE TRUE
## MBTD1 FALSE FALSE TRUE FALSE TRUE
## MIB1 FALSE FALSE TRUE FALSE TRUE
## MLLT10 FALSE FALSE TRUE FALSE TRUE
## PGBD2 FALSE FALSE TRUE FALSE TRUE
## POLR1B FALSE FALSE TRUE TRUE TRUE
## PPM1A FALSE FALSE TRUE TRUE TRUE
## PPP4R3B FALSE FALSE TRUE TRUE TRUE
## PTPN4 FALSE FALSE TRUE FALSE TRUE
## RAB3A FALSE FALSE TRUE TRUE TRUE
## RPRD2 FALSE FALSE TRUE FALSE TRUE
## STRN3 FALSE FALSE TRUE FALSE TRUE
## STXBP1 FALSE FALSE TRUE FALSE TRUE
## SYT16 FALSE FALSE TRUE FALSE TRUE
## TERF1 FALSE FALSE TRUE TRUE TRUE
## TFDP2 FALSE FALSE TRUE FALSE TRUE
## TRIM2 FALSE FALSE TRUE TRUE TRUE
## TRIM23 FALSE FALSE TRUE TRUE TRUE
## TTC33 FALSE FALSE TRUE TRUE TRUE
## TUBB4A FALSE FALSE TRUE TRUE TRUE
## UBN2 FALSE FALSE TRUE FALSE TRUE
## ZNF148 FALSE FALSE TRUE FALSE TRUE
## ZNF595 FALSE FALSE TRUE FALSE TRUE
## ZRANB3 FALSE FALSE TRUE FALSE TRUE
## NUP133 FALSE FALSE FALSE TRUE TRUE
## RSBN1 FALSE FALSE FALSE TRUE TRUE
## CCDC181 FALSE FALSE FALSE TRUE TRUE
## PRPF38A FALSE FALSE FALSE TRUE TRUE
## ETAA1 FALSE FALSE FALSE TRUE TRUE
## PDIK1L FALSE FALSE FALSE TRUE TRUE
## SOX11 FALSE FALSE FALSE TRUE TRUE
## AIDA FALSE FALSE FALSE TRUE TRUE
## SRSF10 FALSE FALSE FALSE TRUE TRUE
## GDAP2 FALSE FALSE FALSE TRUE TRUE
## ARL6 FALSE FALSE FALSE TRUE TRUE
## PHOSPHO2 FALSE FALSE FALSE TRUE TRUE
## SMARCA5 FALSE FALSE FALSE TRUE TRUE
## KCNH7 FALSE FALSE FALSE TRUE TRUE
## C1GALT1C1L FALSE FALSE FALSE TRUE TRUE
## KIF2A FALSE FALSE FALSE TRUE TRUE
## CDC5L FALSE FALSE FALSE TRUE TRUE
## PRPF4B FALSE FALSE FALSE TRUE TRUE
## CLK4 FALSE FALSE FALSE TRUE TRUE
## OARD1 FALSE FALSE FALSE TRUE TRUE
## KIF3A FALSE FALSE FALSE TRUE TRUE
## CEP162 FALSE FALSE FALSE TRUE TRUE
## EIF4E FALSE FALSE FALSE TRUE TRUE
## ZUP1 FALSE FALSE FALSE TRUE TRUE
## ZCCHC10 FALSE FALSE FALSE TRUE TRUE
## PGM2 FALSE FALSE FALSE TRUE TRUE
## HDAC2 FALSE FALSE FALSE TRUE TRUE
## ZKSCAN8 FALSE FALSE FALSE TRUE TRUE
## BRD2 FALSE FALSE FALSE TRUE TRUE
## CFAP69 FALSE FALSE FALSE TRUE TRUE
## CBLL1 FALSE FALSE FALSE TRUE TRUE
## RANBP6 FALSE FALSE FALSE TRUE TRUE
## ZNF92 FALSE FALSE FALSE TRUE TRUE
## C9orf72 FALSE FALSE FALSE TRUE TRUE
## TMEM196 FALSE FALSE FALSE TRUE TRUE
## ZBTB10 FALSE FALSE FALSE TRUE TRUE
## UBXN2B FALSE FALSE FALSE TRUE TRUE
## RPAP3 FALSE FALSE FALSE TRUE TRUE
## FAM76B FALSE FALSE FALSE TRUE TRUE
## FOLH1 FALSE FALSE FALSE TRUE TRUE
## IKZF5 FALSE FALSE FALSE TRUE TRUE
## SMC3 FALSE FALSE FALSE TRUE TRUE
## KMT5B FALSE FALSE FALSE TRUE TRUE
## DPF2 FALSE FALSE FALSE TRUE TRUE
## LIN7C FALSE FALSE FALSE TRUE TRUE
## DCDC1 FALSE FALSE FALSE TRUE TRUE
## GVQW3 FALSE FALSE FALSE TRUE TRUE
## HSPA14 FALSE FALSE FALSE TRUE TRUE
## C10orf143 FALSE FALSE FALSE TRUE TRUE
## YAF2 FALSE FALSE FALSE TRUE TRUE
## PKP2 FALSE FALSE FALSE TRUE TRUE
## ATP2B1 FALSE FALSE FALSE TRUE TRUE
## VCPKMT FALSE FALSE FALSE TRUE TRUE
## CAND1 FALSE FALSE FALSE TRUE TRUE
## ZC2HC1C FALSE FALSE FALSE TRUE TRUE
## RBM26 FALSE FALSE FALSE TRUE TRUE
## THTPA FALSE FALSE FALSE TRUE TRUE
## ZNF200 FALSE FALSE FALSE TRUE TRUE
## CTCF FALSE FALSE FALSE TRUE TRUE
## AKTIP FALSE FALSE FALSE TRUE TRUE
## NRG4 FALSE FALSE FALSE TRUE TRUE
## ADAP2 FALSE FALSE FALSE TRUE TRUE
## DLL3 FALSE FALSE FALSE TRUE TRUE
## ZNF175 FALSE FALSE FALSE TRUE TRUE
## APOE FALSE FALSE FALSE TRUE TRUE
## OSBPL2 FALSE FALSE FALSE TRUE TRUE
## ZNF304 FALSE FALSE FALSE TRUE TRUE
## ZNF776 FALSE FALSE FALSE TRUE TRUE
## EID2B FALSE FALSE FALSE TRUE TRUE
## MEX3C FALSE FALSE FALSE TRUE TRUE
## ZNF17 FALSE FALSE FALSE TRUE TRUE
## ZNF600 FALSE FALSE FALSE TRUE TRUE
## ZNF181 FALSE FALSE FALSE TRUE TRUE
## PEG3 FALSE FALSE FALSE TRUE TRUE
## OLIG2 FALSE FALSE FALSE TRUE TRUE
## ZNF134 FALSE FALSE FALSE TRUE TRUE
## RBMX FALSE FALSE FALSE TRUE TRUE
## PHF6 FALSE FALSE FALSE TRUE TRUE
## MT-CO3 FALSE FALSE FALSE TRUE TRUE
## PGAM4 FALSE FALSE FALSE TRUE TRUE
## RTL5 FALSE FALSE FALSE TRUE TRUE
sessionInfo()## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] pl_PL.UTF-8/pl_PL.UTF-8/pl_PL.UTF-8/C/pl_PL.UTF-8/pl_PL.UTF-8
##
## attached base packages:
## [1] tools grid stats4 parallel stats graphics grDevices
## [8] utils datasets methods base
##
## other attached packages:
## [1] ggpubr_0.6.0
## [2] readr_2.1.4
## [3] SeuratObject_4.1.3
## [4] Seurat_4.3.0
## [5] ggVennDiagram_1.2.2
## [6] rBLAST_0.99.2
## [7] Rsubread_2.6.4
## [8] BSgenome.Ptroglodytes.UCSC.panTro6_1.4.2
## [9] beeswarm_0.4.0
## [10] VennDiagram_1.7.3
## [11] futile.logger_1.4.3
## [12] scuttle_1.2.1
## [13] SingleCellExperiment_1.14.1
## [14] forcats_1.0.0
## [15] RColorBrewer_1.1-3
## [16] glmGamPoi_1.4.0
## [17] reshape2_1.4.4
## [18] kableExtra_1.3.4
## [19] plotly_4.10.1
## [20] dplyr_1.1.2
## [21] ggrepel_0.9.3
## [22] data.table_1.14.8
## [23] pheatmap_1.0.12
## [24] LSD_4.1-0
## [25] BSgenome.Hsapiens.UCSC.hg38_1.4.3
## [26] BSgenome_1.60.0
## [27] colorspace_2.1-0
## [28] rtracklayer_1.52.1
## [29] Rsamtools_2.8.0
## [30] Biostrings_2.60.2
## [31] XVector_0.32.0
## [32] GenomicFeatures_1.44.2
## [33] biomaRt_2.48.3
## [34] Gviz_1.36.2
## [35] st_1.2.7
## [36] sda_1.3.8
## [37] fdrtool_1.2.17
## [38] corpcor_1.6.10
## [39] entropy_1.3.1
## [40] smoothmest_0.1-3
## [41] MASS_7.3-58.3
## [42] genefilter_1.74.1
## [43] edgeR_3.34.1
## [44] limma_3.48.3
## [45] DESeq2_1.32.0
## [46] SummarizedExperiment_1.22.0
## [47] MatrixGenerics_1.4.3
## [48] matrixStats_0.63.0
## [49] GenomicRanges_1.44.0
## [50] GenomeInfoDb_1.28.4
## [51] geneplotter_1.70.0
## [52] annotate_1.70.0
## [53] XML_3.99-0.14
## [54] AnnotationDbi_1.54.1
## [55] IRanges_2.26.0
## [56] S4Vectors_0.30.2
## [57] lattice_0.21-8
## [58] locfit_1.5-9.7
## [59] Biobase_2.52.0
## [60] BiocGenerics_0.38.0
## [61] plyr_1.8.8
## [62] ggplot2_3.4.2
## [63] Matrix_1.5-4
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.3 scattermore_0.8
## [3] tidyr_1.3.0 bit64_4.0.5
## [5] knitr_1.42 irlba_2.3.5.1
## [7] DelayedArray_0.18.0 rpart_4.1.19
## [9] KEGGREST_1.32.0 RCurl_1.98-1.12
## [11] AnnotationFilter_1.16.0 generics_0.1.3
## [13] cowplot_1.1.1 lambda.r_1.2.4
## [15] RSQLite_2.3.1 RANN_2.6.1
## [17] proxy_0.4-27 future_1.32.0
## [19] tzdb_0.3.0 bit_4.0.5
## [21] spatstat.data_3.0-1 webshot_0.5.4
## [23] xml2_1.3.3 httpuv_1.6.9
## [25] xfun_0.38 hms_1.1.3
## [27] jquerylib_0.1.4 evaluate_0.20
## [29] promises_1.2.0.1 fansi_1.0.4
## [31] restfulr_0.0.15 progress_1.2.2
## [33] dbplyr_2.3.2 igraph_1.4.2
## [35] DBI_1.1.3 htmlwidgets_1.6.2
## [37] spatstat.geom_3.1-0 purrr_1.0.1
## [39] ellipsis_0.3.2 backports_1.4.1
## [41] deldir_1.0-6 sparseMatrixStats_1.4.2
## [43] vctrs_0.6.1 ensembldb_2.16.4
## [45] ROCR_1.0-11 abind_1.4-5
## [47] cachem_1.0.7 withr_2.5.0
## [49] RVenn_1.1.0 progressr_0.13.0
## [51] checkmate_2.1.0 sctransform_0.3.5
## [53] GenomicAlignments_1.28.0 prettyunits_1.1.1
## [55] goftest_1.2-3 svglite_2.1.1
## [57] cluster_2.1.4 lazyeval_0.2.2
## [59] crayon_1.5.2 spatstat.explore_3.1-0
## [61] units_0.8-1 labeling_0.4.2
## [63] pkgconfig_2.0.3 nlme_3.1-162
## [65] ProtGenerics_1.24.0 nnet_7.3-18
## [67] rlang_1.1.0 globals_0.16.2
## [69] lifecycle_1.0.3 miniUI_0.1.1.1
## [71] filelock_1.0.2 BiocFileCache_2.0.0
## [73] dichromat_2.0-0.1 invgamma_1.1
## [75] polyclip_1.10-4 lmtest_0.9-40
## [77] ashr_2.2-54 carData_3.0-5
## [79] zoo_1.8-11 base64enc_0.1-3
## [81] ggridges_0.5.4 png_0.1-8
## [83] viridisLite_0.4.1 rjson_0.2.21
## [85] bitops_1.0-7 KernSmooth_2.23-20
## [87] blob_1.2.4 DelayedMatrixStats_1.14.3
## [89] classInt_0.4-9 mixsqp_0.3-48
## [91] SQUAREM_2021.1 stringr_1.5.0
## [93] spatstat.random_3.1-4 parallelly_1.35.0
## [95] rstatix_0.7.2 jpeg_0.1-10
## [97] ggsignif_0.6.4 beachmat_2.8.1
## [99] scales_1.2.1 memoise_2.0.1
## [101] magrittr_2.0.3 ica_1.0-3
## [103] zlibbioc_1.38.0 compiler_4.1.0
## [105] BiocIO_1.2.0 fitdistrplus_1.1-8
## [107] cli_3.6.1 listenv_0.9.0
## [109] patchwork_1.1.2 pbapply_1.7-0
## [111] htmlTable_2.4.1 formatR_1.14
## [113] Formula_1.2-5 tidyselect_1.2.0
## [115] stringi_1.7.12 highr_0.10
## [117] yaml_2.3.7 latticeExtra_0.6-30
## [119] sass_0.4.5 VariantAnnotation_1.38.0
## [121] future.apply_1.10.0 rstudioapi_0.14
## [123] foreign_0.8-84 gridExtra_2.3
## [125] farver_2.1.1 Rtsne_0.16
## [127] digest_0.6.31 shiny_1.7.4
## [129] Rcpp_1.0.10 car_3.1-2
## [131] broom_1.0.4 later_1.3.0
## [133] RcppAnnoy_0.0.20 httr_1.4.5
## [135] biovizBase_1.40.0 sf_1.0-12
## [137] tensor_1.5 rvest_1.0.3
## [139] reticulate_1.28 truncnorm_1.0-9
## [141] splines_4.1.0 uwot_0.1.14
## [143] spatstat.utils_3.0-2 sp_1.6-0
## [145] systemfonts_1.0.4 xtable_1.8-4
## [147] jsonlite_1.8.4 futile.options_1.0.1
## [149] R6_2.5.1 Hmisc_5.0-1
## [151] pillar_1.9.0 htmltools_0.5.5
## [153] mime_0.12 glue_1.6.2
## [155] fastmap_1.1.1 BiocParallel_1.26.2
## [157] class_7.3-21 codetools_0.2-19
## [159] utf8_1.2.3 spatstat.sparse_3.0-1
## [161] bslib_0.4.2 tibble_3.2.1
## [163] curl_5.0.0 leiden_0.4.3
## [165] interp_1.1-4 survival_3.5-5
## [167] rmarkdown_2.21 munsell_0.5.0
## [169] e1071_1.7-13 GenomeInfoDbData_1.2.6
## [171] gtable_0.3.3