Genemapping as of February 2022.
= useEnsembl(biomart="ENSEMBL_MART_ENSEMBL", dataset="hsapiens_gene_ensembl", mirror="useast")
ensembl_hg38 = getBM( attributes = c("ensembl_gene_id","chromosome_name","start_position",
genemap "end_position","transcript_start","transcript_end",
"transcript_length","strand","percentage_gene_gc_content",
"transcription_start_site","external_gene_name",
"go_id","gene_biotype","hgnc_symbol","arrayexpress"),
filters = "ensembl_gene_id",
values = human_count$Geneid,
mart = ensembl_hg38)
save(ensembl_hg38_genemap, file=paste0(objects_directory,"ensembl_hg38_genemap.RData"))
= import(paste0(outputs_directory,'hg38_ensembl.gtf'))
gtf = data.frame( chr=as.character(chrom(gtf)),
promoters_ap start=as.numeric(start(gtf)),
end=as.numeric(end(gtf)),
strand=as.character(strand(gtf)),
transcript_id=as.character(gtf$transcript_id),
gene_id=as.character(gtf$gene_id),
gene_name = as.character(gtf$gene_name),
gene_biotype = as.character(gtf$gene_biotype),
type = gtf$type,
stringsAsFactors = FALSE )
length(unique(promoters_ap$gene_id))
= promoters_ap[promoters_ap$type == "transcript",]
promoters_ap = split(promoters_ap,promoters_ap$transcript_id)
promoters_sp
## for each transcript find the TSS
= do.call('rbind', lapply( promoters_sp, function(x){
promoters_tss = ifelse( as.character(unique(x$strand))=="+",
tss which.min(x$start),'start'],
x[which.max(x$end),'end'] )
x[if( as.character(unique(x$strand))=="+") tp = x[which.min(x$start),] else tp = x[which.max(x$end),]
$tss = tss
tpreturn(tp)
}))
##
= GRanges(seqnames = promoters_tss$chr,
promoters_tss_gr ranges = IRanges(as.numeric(promoters_tss$tss)-500 ,
end=as.numeric(promoters_tss$tss) + 500,
names=promoters_tss$transcript_id),
strand = promoters_tss$strand,
gene_id = promoters_tss$gene_id,
gene_name = promoters_tss$gene_name,
gene_biotype = promoters_tss$gene_biotype,
tss = promoters_tss$tss)
seqlevelsStyle(promoters_tss_gr) = 'ucsc'
$me3_peak = 0
promoters_tss$me3_peak[queryHits(findOverlaps(promoters_tss_gr,hs_me3))]=subjectHits(findOverlaps(promoters_tss_gr,hs_me3))
promoters_tsslength(unique(promoters_tss$gene_id))
all(names(promoters_tss_gr)==promoters_tss$transcript_id)
= split(promoters_tss,promoters_tss$gene_id)
promoters_tss_split length(promoters_tss_split)
= do.call('rbind', lapply(promoters_tss_split,function(p){
promoters_filtered if( sum(p$me3_peak)>0 ){
= p[p$me3_peak>0,]
PS if( unique(PS$strand)=='-') res=PS[which.max(PS$tss),] else res=PS[which.min(PS$tss),] }
if( sum(p$me3_peak)==0 ) { if( unique(p$strand)=='+') res=p[which.min(p$tss),] else res=p[which.max(p$tss),] }
return(res)
} ))length(unique(promoters_filtered$gene_id))
= GRanges(seqnames = promoters_filtered$chr,
promoters_filtered_gr ranges = IRanges(as.numeric(promoters_filtered$tss)-500 ,
end=as.numeric(promoters_filtered$tss) + 500,
names=promoters_filtered$gene_id),
strand = promoters_filtered$strand,
gene_id = promoters_filtered$gene_id,
gene_name = promoters_filtered$gene_name,
gene_biotype = promoters_filtered$gene_biotype,
tss = promoters_filtered$tss)
seqlevelsStyle(promoters_filtered_gr)='ucsc'
save( promoters_filtered, promoters_filtered_gr, promoters_tss,promoters_tss_gr,
file=paste0(objects_directory,'tss_objects.RData') )
load(paste0(objects_directory,"Zhang_DataBundle.RData"))
load(paste0(objects_directory,"GTF_Annotation.RData"))
load(paste0(objects_directory,"ensembl_hg38_genemap.RData"))
= genemap[!duplicated(genemap$ensembl_gene_id),]
genemapu = c( 'MF' = '#66CCFF', 'HS' = '#000000', 'PT' = '#FF3300', 'MM' = '#0033FF') species.colors
= read.table(file = paste0(outputs_directory, "Zhang_gene_counts_redownloaded.txt"), header = T)
countdata_zhang
= data.frame(
zhang_countdata_4tpm fetal=rowSums(countdata_zhang[,c(36,28,29,23,24,11)]),
adult=rowSums(countdata_zhang[,colnames(countdata_zhang) %like% "YO_ATL_Astro|YO_HPC_Astro"]),
Length=countdata_zhang$Length,
row.names = countdata_zhang$Geneid,
stringsAsFactors = FALSE)
= as.data.frame(GetTPM(zhang_countdata_4tpm,1:2,
zhang_countdata_tpm rownames(zhang_countdata_4tpm)))
= zhang_countdata_tpm[zhang_countdata_tpm$fetal>1 | zhang_countdata_tpm$adult>1, ]
expressed = zhang_countdata_tpm[zhang_countdata_tpm$fetal<0.1 & zhang_countdata_tpm$adult<0.1, ]
not_expressed
= zhang_countdata_tpm[zhang_countdata_tpm$fetal>1, ]
expressed_fetal = zhang_countdata_tpm[zhang_countdata_tpm$adult>1, ]
expressed_adult
= rownames(expressed_fetal)[! rownames(expressed_fetal) %in% rownames(expressed_adult) ]
expressed_only_fetal = rownames(expressed_adult)[! rownames(expressed_adult) %in% rownames(expressed_fetal) ]
expressed_only_adult = rownames(expressed_fetal)[rownames(expressed_fetal) %in% rownames(expressed_adult) ]
expressed_fetal_adult
= unique(genemap$hgnc_symbol[genemap$ensembl_gene_id %in% Fetal_Markers])
fetal_markers_geneName = unique(genemap$hgnc_symbol[genemap$ensembl_gene_id %in% Adult_Markers]) adult_markers_geneName
Based on data quality and previous analyses the selected samples are processed and filtered. We perform a differential expression analysis of genes between 4 species - Humans, Chimps, Rhesus Macaques and Crab Eating Macaques, based upon their expression profile on the Consensus Genome. This sheet details the steps of the differential analysis with relevant graphs for overview of the data and finally list the significant hits based on a the canonical workflow using DESeq2.
= read.table(file = paste0(outputs_directory,"featureCounts_Counts_MO_All.tsv"), header = T)
countdata = read.table(file = paste0(outputs_directory,'tcw_latest_gene_counts.txt'), header = T)
countdata_tcw_iAstrocytes
## prep the tables
colnames(countdata_tcw_iAstrocytes)[7:ncol(countdata_tcw_iAstrocytes)]=c('tcw_3651_Astros','tcw_3651_NPCs','tcw_9319_Astros',
'tcw_9429_Astros', 'tcw_9429_NPCs','tcw_BJ_Astros',
'Cerebral_Cortex_pAstros','Midbrain_pAstros')
all(rownames(countdata) == countdata_tcw_iAstrocytes$Geneid)
## [1] TRUE
= data.frame( PrimaryFetal_F = countdata$PrimaryFetal_F,
countdata PrimaryFetal_M = countdata$PrimaryFetal_M,
PrimaryFetal_1 = countdata$PrimaryFetal_1,
HSapiens_ELE10 = countdata$HSapiens_ELE10_1 + countdata$HSapiens_ELE10_2,
HSapiens_ELE30 = countdata$HSapiens_ELE30_1 + countdata$HSapiens_ELE30_2,
HSapiens_TCW_F1 = countdata_tcw_iAstrocytes[,'tcw_3651_Astros'],
HSapiens_TCW_F3 = countdata_tcw_iAstrocytes[,'tcw_9319_Astros'],
HSapiens_TCW_F4 = countdata_tcw_iAstrocytes[,'tcw_9429_Astros'],
Chimp_SandraA = countdata$Chimp_Sandra_BD1 + countdata$Chimp_Sandra_BD2 + countdata$Chimp_Sandra_nwNPC,
Chimp_Mandy6 = countdata$Chimp_Mandy6 + countdata$Chimp_Mandy6_New,
Chimp_Mandy4 = countdata$Chimp_Mandy4_New,
RhMacaque_Becky = countdata$RhMacaque_Becky_BD1 + countdata$RhMacaque_Becky_BD2,
row.names = rownames(countdata))
= c("PrimaryFetal_F",
sample_names "PrimaryFetal_M",
"PrimaryFetal_1",
"HSapiens_ELE10",
"HSapiens_ELE30",
"HSapiens_TCW_F1",
"HSapiens_TCW_F3",
"HSapiens_TCW_F4",
"Chimp_SandraA",
"Chimp_Mandy6",
"Chimp_Mandy4",
"RhMacaque_Becky" )
# Setting up metadata for included samples
= c(rep("HS",8),rep("PT",3),"MM")
species = c(rep("Fetal",3),rep("iPSC",9))
sources = c("PF","PF","PF","ELE10","ELE30","TCW_F1","TCW_F3","TCW_F4",
sub_class "SandraA","Mandy6","Mandy4",
"Becky")
= data.frame(species=as.factor(species),
metadata sources=as.factor(sources),
class=as.factor(sub_class),
row.names = sample_names,
gender=c('F','M','F',rep('F',9)),
lab=c('other','other',rep("PL",3),rep("other",3),rep("PL",4)))
$lp = 1:nrow(metadata)
metadataall(colnames(countdata)==rownames(metadata))
## [1] TRUE
# Creating a TPM normalized table for the read counts for all genes
=countdata_tcw_iAstrocytes$Length[match(rownames(countdata),countdata_tcw_iAstrocytes$Geneid)]
Length= GetTPM(data.frame(cbind(countdata,Length=Length)),
tpm_norm_count_table 1:ncol(countdata),
rownames(countdata))
# Creating a TPM normalized table for the read counts for all genes
all(rownames(tpm_norm_count_table)==rownames(zhang_countdata_tpm))
## [1] TRUE
= cbind( zhang_countdata_tpm,tpm_norm_count_table )
tpm_norm_count_all all(rownames(zhang_countdata_tpm) == rownames(tpm_norm_count_table))
## [1] TRUE
all(rownames(tpm_norm_count_table)==rownames(zhang_countdata_tpm))
## [1] TRUE
# filtering out expressed fetal and adult genes
= zhang_countdata_tpm[zhang_countdata_tpm$fetal>5, ]
expressed_fetal_str = zhang_countdata_tpm[zhang_countdata_tpm$adult>5, ]
expressed_adult_str = rownames(expressed_fetal_str)[! rownames(expressed_fetal_str) %in% rownames(expressed_adult_str) ]
expressed_only_fetal_str = rownames(expressed_adult_str)[! rownames(expressed_adult_str) %in% rownames(expressed_fetal_str) ] expressed_only_adult_str
= data.frame(countdata_zhang[,c(36,28,29,23,24,11)],
zhang_countdata_DS colnames(countdata_zhang) %like% "YO_ATL_Astro|YO_HPC_Astro"],
countdata_zhang[,row.names=countdata_zhang$Geneid)
= log( colSums(zhang_countdata_DS[rownames(zhang_countdata_DS) %in% Adult_Markers,])/
zhang_countdata_DS colSums(zhang_countdata_DS[rownames(zhang_countdata_DS) %in% Fetal_Markers,]) )
= log( colSums(countdata[rownames(countdata) %in% Adult_Markers,])/colSums(countdata[rownames(countdata) %in% Fetal_Markers,]) )
countdata_DS
= c(zhang_countdata_DS,countdata_DS)
ds = c(rep('acute_fetal',6),rep('acute_adult',15),
sampleType rep('fetal_cultured',3),rep('iAstrocytes',9))
=split(ds,sampleType)
dsd
par(mfrow=c(1,1),mar=c(5,4,1,1))
beeswarm(ds ~ sampleType, pch = 19,
col = c( 'blue4', 'turquoise3', 'purple3', 'pink4'),
method = "swarm", ylim=c(-2,5), ylab="Log[2] Differentiation score" )
axis(2,lwd=2)
box(col="black",lwd=2)
= factor( c(rep('human',5),rep('chimpanzee',3),rep('rhesus',1)),
sampleType levels=c('human','chimpanzee','rhesus') )
beeswarm(countdata_DS[4:length(countdata_DS)] ~ sampleType, pch = 19,
col = c( 'black', 'red', 'blue'),
method = "swarm", ylim=c(-2,5), ylab="Log[2] Differentiation score" )
axis(2,lwd=2)
box(col="black",lwd=2)
beeswarm(countdata_DS[4:length(countdata_DS)] ~ sampleType, pch = 19,
col = c( 'black', 'red', 'blue'),
method = "swarm", ylim=c(-2,5), ylab="Log[2] Differentiation score" )
axis(2,lwd=2)
box(col="black",lwd=2)
= unlist(unique( genemap[genemap$go_id == 'GO:0048708','hgnc_symbol']))
astro_genes = unique( c(astro_genes,
astro_genes 'ABL1','ABL2', 'ARP3','ADORA2A', 'AGER', 'AGT',
'APP', 'ATF5', 'BIN','BMP2', 'C1QA', 'C5AR1',
'CNTF','CNTN2','DAB1','DLL1','DLL3','DRD1',
'EIF2B5','EPHA4','F2','FGFR3','GCM1','GFAP',
'GM5849','GPR37l1','GRN','HES1','HES5','HMGA2',
'ID2','ID4','IFNG','IFNGR1','IL1B','IL6ST',
'KDM4A','LAMB2','LDLR','MAG','MAP2K1','MAPK3',
'MBD1','MECP2','MT3','MYCN','NF1','NFIX',
'NKX2-2','NOG','NOTCH1','NR1D1','NR2E1','NTRK3',
'PLP1','PLPP3','POU3F2','PRPF19','PSEN1','PTPN11',
'ROR2','S100A8','S100A9','SERPINE2','SHH','SMO',
'SOX6','SOX8','SOX9','STAT3','TAL1','TLR4',
'TREM2','TSPAN2','TTC21B','VIM', 'SLC1A3'))
= data.frame(unique(genemap[which(genemap$hgnc_symbol %in% astro_genes),c('ensembl_gene_id','hgnc_symbol')]))
astro_genes
= tpm_norm_count_table[rownames(tpm_norm_count_table) %in% astro_genes$ensembl_gene_id, ]
tmp_count_table rownames(tmp_count_table) = astro_genes$hgnc_symbol[match(rownames(tmp_count_table), astro_genes$ensembl_gene_id)]
= as.data.frame(log10(tmp_count_table))
count_frame = count_frame %>% replace(.=='-Inf', 0)
count_frame = count_frame[order(apply(count_frame, 1, median), decreasing = T),]
count_frame
pheatmap(count_frame, cellheight = 10,
treeheight_row = 0,
cluster_cols = F,
cluster_rows = F,
scale = "none",
angle_col = '315')
We consider comparisons between human and chimpanzee and between human and macaque samples separately.
=1:nrow(countdata)
ids
## DEGs in the comparison between humans and chimps
<- DESeqDataSetFromMatrix(
res_HSvPT countData = countdata[ids,which(metadata$species %in% c("HS","PT") & metadata$sources=="iPSC" & metadata$gender=='F' )],
colData = metadata[which(metadata$species %in% c("HS","PT") & metadata$sources=="iPSC" & metadata$gender=='F'),],
design = ~ 0 + species )
## factor levels were dropped which had no samples
$species = relevel(res_HSvPT$species, "HS")
res_HSvPT<- DESeq(res_HSvPT,fitType="local") res_HSvPT
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
resultsNames(res_HSvPT)
## [1] "speciesHS" "speciesPT"
<- lfcShrink(res_HSvPT, contrast = c("species","HS","PT"),type='ashr') res_HSvPT_sh
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
## Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
## https://doi.org/10.1093/biostatistics/kxw041
<- results(res_HSvPT, contrast = c("species","HS","PT") )
res_HSvPT summary(res_HSvPT)
##
## out of 46457 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 3881, 8.4%
## LFC < 0 (down) : 3890, 8.4%
## outliers [1] : 307, 0.66%
## low counts [2] : 13293, 29%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
##
<- DESeqDataSetFromMatrix(
res_HSvMM countData = countdata[ids,c(metadata$species %in% c("HS","MM") & metadata$sources=="iPSC" & metadata$gender=='F')],
colData = metadata[c(metadata$species %in% c("HS","MM") & metadata$sources=="iPSC" & metadata$gender=='F'),],
design = ~ 0 + species
)
## factor levels were dropped which had no samples
$species = relevel(res_HSvMM$species, "HS")
res_HSvMM<- DESeq(res_HSvMM,fitType="local") res_HSvMM
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
<- lfcShrink(res_HSvMM, contrast = c("species","HS","MM"), type="ashr") res_HSvMM_sh
## using 'ashr' for LFC shrinkage. If used in published research, please cite:
## Stephens, M. (2016) False discovery rates: a new deal. Biostatistics, 18:2.
## https://doi.org/10.1093/biostatistics/kxw041
<- results(res_HSvMM, contrast = c("species","HS","MM") )
res_HSvMM summary(res_HSvMM)
##
## out of 45191 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 5517, 12%
## LFC < 0 (down) : 4670, 10%
## outliers [1] : 133, 0.29%
## low counts [2] : 15485, 34%
## (mean count < 2)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
<- as.data.frame(res_HSvPT)
HSvPT_DEG .1 <- HSvPT_DEG %>% filter(padj <= 0.01 )
HSvPT_DEG_0= as.data.frame(res_HSvPT_sh)
res_HSvPT_sh <- res_HSvPT_sh %>% filter(padj <= 0.01 )
HSvPT_DEG_sh setDT(HSvPT_DEG_0.1, keep.rownames = TRUE)
<- as.data.frame(res_HSvMM)
HSvMM_DEG .1 <- HSvMM_DEG %>% filter(padj <= 0.01 )
HSvMM_DEG_0setDT(HSvMM_DEG_0.1, keep.rownames = TRUE)
= as.data.frame(res_HSvMM_sh)
res_HSvMM_sh <- res_HSvMM_sh %>% filter(padj <= 0.01 )
HSvMM_DEG_sh
sum( HSvMM_DEG_0.1$rn %in% HSvPT_DEG_0.1$rn )
## [1] 1271
sum(! HSvMM_DEG_0.1$rn %in% HSvPT_DEG_0.1$rn )
## [1] 3885
all( HSvMM_DEG_sh$rn == HSvMM_DEG_0.1$rn )
## [1] TRUE
.1$lfc_sh = HSvMM_DEG_sh$log2FoldChange
HSvMM_DEG_0.1$lfc_sh = HSvPT_DEG_sh$log2FoldChange HSvPT_DEG_0
Human versus chimpanzee - volcano
Human versus macaque - volcano
Largely congruent changes in gene expression
# tpm_norm_count_table_thresholded_top = apply(tpm_norm_count_table,2,function(x){x>(quantile(x[x>0])[3])})
= tpm_norm_count_table>1
tpm_norm_count_table_thresholded_top
## ------------------------------
<- gtf_annotation_table[,c(1,7,12)]
log_fold_dat $gene_biotype %like% "pseudogene",]$gene_biotype = "pseudogene"
log_fold_dat[log_fold_dat$gene_biotype %like% "TR_",]$gene_biotype = "TR_genes"
log_fold_dat[log_fold_dat$rn = log_fold_dat$ensembl_gene_id
log_fold_dat
= merge(log_fold_dat,HSvPT_DEG_0.1[,c(1,3,8)], by='rn')
log_fold_dat colnames(log_fold_dat)[5] ="HSvPT_lfc"
colnames(log_fold_dat)[6] ="HSvPT_lfc_shrunk"
= merge(log_fold_dat,HSvMM_DEG_0.1[,c(1,3,8)], by='rn')
log_fold_dat colnames(log_fold_dat)[7] ="HSvMM_lfc"
colnames(log_fold_dat)[8] ="HSvMM_lfc_shrunk"
## ------------------------------
all(colnames(tpm_norm_count_table) == rownames(metadata))
## [1] TRUE
= data.frame( human = rowMeans(tpm_norm_count_table[,which(metadata$species =="HS" & metadata$sources=="iPSC" & metadata$gender=='F')]),
tpm_norm_count_table_df chimp = rowMeans(tpm_norm_count_table[,which(metadata$species =="PT" & metadata$sources=="iPSC" & metadata$gender=='F')]),
macaque=tpm_norm_count_table[,which(metadata$species =="MM" & metadata$sources=="iPSC" & metadata$gender=='F')],
rn=rownames(tpm_norm_count_table) )
= merge(log_fold_dat, tpm_norm_count_table_df, by='rn')
log_fold_dat $rn=NULL
tpm_norm_count_table_df$rn=NULL
log_fold_dat
=log_fold_dat[log_fold_dat$gene_biotype %in% c('protein_coding',
log_fold_dat_biotype'pseudogene',
'lncRNA','miRNA'),]
$col = rep('steelblue',nrow(log_fold_dat_biotype))
log_fold_dat_biotype$col[log_fold_dat_biotype$gene_biotype=='pseudogene']='thistle3'
log_fold_dat_biotype$col[log_fold_dat_biotype$gene_biotype=='lncRNA']='red3'
log_fold_dat_biotype$col[log_fold_dat_biotype$gene_biotype=='miRNA']='black'
log_fold_dat_biotype$DEX = 0
log_fold_dat_biotype# log_fold_dat_biotype$DEX[log_fold_dat_biotype$ensembl_gene_id %in% eid] = 1
par(mfrow=c(1,1),mar=c(5,5,5,5),cex.lab=2,pty='s')
plot(x=log_fold_dat_biotype$HSvPT_lfc_shrunk,
y=log_fold_dat_biotype$HSvMM_lfc_shrunk,
xlim=c(-10,10),ylim=c(-10,10),
col=log_fold_dat_biotype$col,pch=19,cex=0.5,
ylab='Hs vs. Pt',xlab='Hs vs. Mm',axes=F)
axis(1,lwd=2,cex.axis=2)
axis(2,lwd=2,cex.axis=2)
abline(a=0,b=1)
abline(h=0,v=0,lwd=2,col='gray')
box(col='black',lwd=2)
cor.test(log_fold_dat_biotype$HSvPT_lfc,log_fold_dat_biotype$HSvMM_lfc)
##
## Pearson's product-moment correlation
##
## data: log_fold_dat_biotype$HSvPT_lfc and log_fold_dat_biotype$HSvMM_lfc
## t = 42.744, df = 1243, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7479379 0.7930088
## sample estimates:
## cor
## 0.7714392
colnames(log_fold_dat)[1:3] = c("ensembl_id","hgnc_symbol","gene_biotype")
EAGs, the expression of how many of these genes is detected in the human astrocytes.
<- log_fold_dat %>% filter(HSvPT_lfc > 0 & HSvMM_lfc > 0)
HS_UP_Genes <- log_fold_dat %>% filter(HSvPT_lfc < (-0) & HSvMM_lfc < (-0) )
HS_DN_Genes
all(HS_UP_Genes$ensembl_id %in% HSvMM_DEG_0.1$rn )
## [1] TRUE
all(HS_UP_Genes$ensembl_id %in% HSvPT_DEG_0.1$rn )
## [1] TRUE
= as.data.frame(HS_UP_Genes)
hits_up = as.data.frame(HS_DN_Genes)
hits_dn
dim(hits_up) # 677
## [1] 677 10
dim(hits_dn) # 486
## [1] 486 10
sum(hits_up$ensembl_id %in% rownames(expressed))/nrow(hits_up)
## [1] 0.8227474
sum(hits_dn$ensembl_id %in% rownames(expressed))/nrow(hits_dn)
## [1] 0.8395062
= hits_up[hits_up$ensembl_id %in% rownames(tpm_norm_count_table_thresholded_top[rowSums(tpm_norm_count_table_thresholded_top[,which(metadata$species =="HS" & metadata$sources=="iPSC" & metadata$gender=='F')])>3,]),]
hits_up = hits_dn[hits_dn$ensembl_id %in% rownames(tpm_norm_count_table_thresholded_top[rowSums(tpm_norm_count_table_thresholded_top[,which(metadata$species %in% c("PT","MM"))])>2,]),]
hits_dn sum(hits_up$ensembl_id %in% rownames(expressed))/nrow(hits_up)
## [1] 0.8464052
sum(hits_dn$ensembl_id %in% rownames(expressed))/nrow(hits_dn)
## [1] 0.8883929
## boxplot expression in zhang
= cbind(not_expressed = sum(hits_up$ensembl_id %in% rownames(not_expressed )),
u fetal=sum(hits_up$ensembl_id %in% expressed_only_fetal ),
adult=sum(hits_up$ensembl_id %in% expressed_only_adult ),
both=sum(hits_up$ensembl_id %in% expressed_fetal_adult ))
= cbind(not_expressed = sum(hits_dn$ensembl_id %in% rownames(not_expressed )),
d fetal=sum(hits_dn$ensembl_id %in% expressed_only_fetal ),
adult=sum(hits_dn$ensembl_id %in% expressed_only_adult ),
both=sum(hits_dn$ensembl_id %in% expressed_fetal_adult ))
=rbind(u,d)
m m
## not_expressed fetal adult both
## [1,] 33 106 61 351
## [2,] 26 29 26 343
par(lwd=2, cex.axis=1.5,mar=c(5,5,1,1),pty='m')
barplot(t(m/rowSums(m)),col=c('red4','white','black','gray'),
ylab="%",names=c("Up","Down"),xlab="EAGs",
cex.names=2.5,cex.lab=2)
axis(2,lwd=3)
par(mfrow=c(2,2))
= split(hits_up$HSvPT_lfc,hits_up$gene_biotype)
hits_up_split_pt boxplot( hits_up_split_pt[c('protein_coding','lncRNA','pseudogene')],ylim=c(0,15),col='white',border=c('steelblue','red3','thistle3'))
= split(hits_up$HSvMM_lfc,hits_up$gene_biotype)
hits_up_split_mm boxplot( hits_up_split_mm[c('protein_coding','lncRNA','pseudogene')],ylim=c(0,15),col='white',border=c('steelblue','red3','thistle3'))
= split(hits_dn$HSvPT_lfc,hits_dn$gene_biotype)
hits_dn_split_pt boxplot( hits_dn_split_pt[c('protein_coding','lncRNA','pseudogene')],ylim=c(-15,0),col='white',border=c('steelblue','red3','thistle3'))
= split(hits_dn$HSvMM_lfc,hits_dn$gene_biotype)
hits_dn_split_mm boxplot( hits_dn_split_mm[c('protein_coding','lncRNA','pseudogene')],
ylim=c(-15,0),col='white',border=c('steelblue','red3','thistle3'))
= hits_up[hits_up$ensembl_id %in% rownames(expressed),]
hits_up dim(hits_up)
## [1] 518 10
= hits_dn[hits_dn$ensembl_id %in% rownames(expressed),]
hits_dn dim(hits_dn)
## [1] 398 10
sum(hits_up$ensembl_id %in% Fetal_Markers)
## [1] 25
sum(hits_dn$ensembl_id %in% Fetal_Markers)
## [1] 26
sum(hits_up$ensembl_id %in% Adult_Markers)
## [1] 20
sum(hits_dn$ensembl_id %in% Adult_Markers)
## [1] 25
= hits_up[! hits_up$ensembl_id %in% Fetal_Markers,]
hits_up = hits_dn[! hits_dn$ensembl_id %in% Adult_Markers,]
hits_dn dim(hits_up)
## [1] 493 10
dim(hits_dn)
## [1] 373 10
par(lwd=2, cex.axis=1.5,mar=c(5,5,3,1),mfrow=c(1,1))
barplot( c(up=nrow(hits_up),
down=nrow(hits_dn)),
col=c("green4","wheat3"),
ylim=c(0,500),ylab="EAGs",cex.axis = 1.5, cex.lab=2)
axis(2,lwd=2)
=hits_up$ensembl_id
x=hits_dn$ensembl_id
y
write.table(y,file=paste0(outputs_directory,'dn_engs.txt'),quote=FALSE, row.names=FALSE,col.names=FALSE,sep='\n')
write.table(x,file=paste0(outputs_directory,'up_engs.txt'),quote=FALSE, row.names=FALSE,col.names=FALSE,sep='\n')
## are genes affected by evolution frequently totally on or off??
table(hits_up[hits_up$chimp<0.1 & hits_up$macaque<0.1,'gene_biotype'])
##
## lncRNA protein_coding pseudogene TEC
## 7 1 6 1
nrow(hits_up[hits_up$chimp<0.1 & hits_up$macaque<0.1,])
## [1] 15
table(hits_dn[hits_dn$human<0.1,'gene_biotype'])
##
## lncRNA Mt_tRNA protein_coding pseudogene
## 3 1 11 2
nrow(hits_dn[hits_dn$human<0.1,])
## [1] 17
= merge( HSvPT_DEG,HSvMM_DEG,by=0,all=TRUE ) # data frame of merged results
all_Deseqs
save( hits_dn, hits_up, HS_DN_Genes, HS_UP_Genes, all_Deseqs,log_fold_dat,tpm_norm_count_table,
file=paste0(objects_directory,"DEseq2_RNA.RData"))
load(paste0(objects_directory,"bda_final.RData"))
= bda_final[bda_final$ensid %in% rownames(countdata),]
bda_final
= nrow(hits_up) + nrow(hits_dn)
tot_n_EAG
= bda_final[bda_final$ensid %in% hits_up$ensembl_id | bda_final$Gene.symbol %in% hits_up$hgnc_symbol,]
x $or = paste(x$ensid,x$Disease,sep='-')
x= x[!duplicated(x$or),]
x
= bda_final[bda_final$ensid %in% hits_dn$ensembl_id | bda_final$Gene.symbol %in% hits_dn$hgnc_symbol,]
y $or = paste(y$ensid,y$Disease,sep='-')
y= y[!duplicated(y$or),]
y
length(unique(c(x$Disease,y$Disease)))
## [1] 23
= table(bda_final$Disease)
All_diseases = table(x$Disease)
X = table(y$Disease)
Y
= table(bda_final$Disease)
All_diseases = All_diseases[order(All_diseases,decreasing=TRUE)]
All_diseases
= rep(0, length(All_diseases))
nulv names(nulv) = names(All_diseases)
=nulv
nulv1match(names(X),names(nulv1))]=X
nulv1[
=nulv
nulv2match(names(Y),names(nulv2))]=Y
nulv2[
=rbind(nulv1,nulv2)
m=m[,colSums(m)>0]
mcut1
dim(mcut1)
## [1] 2 23
par(mar=c(12,4,1,1),mfrow=c(1,1))
barplot(mcut1, beside=TRUE, col=c('green4','wheat3'),las=2,
ylim=c(0,25),axes=FALSE,ylab="EAG")
axis(2,lwd=3)
= matrix( c( length(unique(x$Gene.symbol)),
M length(unique(hits_up$ensembl_id)),
length(unique(y$Gene.symbol)),
length(unique(hits_dn$ensembl_id))),ncol=2,nrow=2)
prop.test( M )
##
## 2-sample test for equality of proportions with continuity correction
##
## data: M
## X-squared = 20.933, df = 1, p-value = 0.000004757
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.3106748 -0.1256033
## sample estimates:
## prop 1 prop 2
## 0.3511450 0.5692841
fisher.test( M )
##
## Fisher's Exact Test for Count Data
##
## data: M
## p-value = 0.000003401
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.2727455 0.6096183
## sample estimates:
## odds ratio
## 0.4098171
M
## [,1] [,2]
## [1,] 46 85
## [2,] 493 373
= matrix( c( length(unique(x$Gene.symbol[x$Disease=="Intellectual Disability"])),
M length(unique(hits_up$ensembl_id)),
length(unique(y$Gene.symbol[y$Disease=="Intellectual Disability"])),
length(unique(hits_dn$ensembl_id))),ncol=2,nrow=2)
chisq.test( M )
## Warning in chisq.test(M): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: M
## X-squared = 8.2604, df = 1, p-value = 0.004052
fisher.test( M )
##
## Fisher's Exact Test for Count Data
##
## data: M
## p-value = 0.001251
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.0000000 0.4471496
## sample estimates:
## odds ratio
## 0
unique(bda_final$Disease)
## [1] "Autism Spectrum Disorder"
## [2] "Alzheimer's Disease"
## [3] "Amyotrophic Lateral Sclerosis"
## [4] "Multiple Sclerosis"
## [5] "Epilepsy"
## [6] "Intracranial Aneurysm"
## [7] "Neuroblastoma"
## [8] "Parkinson's Disease"
## [9] "Restless legs Syndrome"
## [10] "Meningioma"
## [11] "Narcolepsy"
## [12] "Glioma"
## [13] "Prader-Willi Syndrome"
## [14] "Progressive Supranuclear Plasy"
## [15] "Restless Legs Syndrome"
## [16] "Rett Syndrome"
## [17] "Rolandic Epilepsy with Speech impairment"
## [18] "Shy Drager Syndrome"
## [19] "Spasmodic Dysphonia"
## [20] "Stroke"
## [21] "Tay-Sachs Disease"
## [22] "Tourette Syndrome"
## [23] "Tuberous Sclerosis"
## [24] "Von Hippel-Lindau Syndrome"
## [25] "X-linked Hydrocephalus"
## [26] "Agenesis Corpus Callosum"
## [27] "Alopecia with Mental Retardation"
## [28] "Alpha-Thalassemia X-Linked Intellectual Disability Syndrome"
## [29] "Alternating Hemiplegia of Childhood"
## [30] "Aphasia"
## [31] "Attention Deficit Hyperactivity Disorder"
## [32] "Autosomal Dominant Nocturnal Frontal Lobe Epilepsy"
## [33] "Autosomal Dominant Partial Epilepsy with Auditory Features"
## [34] "Autosomal Recessive Cerebellar Ataxia Type 1"
## [35] "Batten Disease"
## [36] "Benign Familial Neonatal Seizures"
## [37] "Benign Hereditary Chorea"
## [38] "Cerebral Aneurysm"
## [39] "Cerebellar Ataxia, Mental Retardation and Disequilibrium Syndrome"
## [40] "Cerebral Palsy"
## [41] "Cerebro-Oculo-Facio-Skeletal Syndrome"
## [42] "Cerebrocostomandibular Syndrome"
## [43] "Charcot-Marie-Tooth Disease"
## [44] "Chiari Malformation"
## [45] "Chronic Inflammatory Demyelinating Polyneuropathy"
## [46] "Coma"
## [47] "Creutzfeldt Jakob Disease"
## [48] "Dementia (Non Alzheimer)"
## [49] "Down Syndrome"
## [50] "Dysautonomia"
## [51] "Dyslexia"
## [52] "Dyspraxia"
## [53] "Dystonia"
## [54] "Encephalitis"
## [55] "Essential Tremor"
## [56] "Familial Focal Epilepsy with Variable Foci"
## [57] "Ferro-Cerebro-Cutaneous Syndrome"
## [58] "Friedreich Ataxia"
## [59] "Gaucher Disease"
## [60] "Generalized Epilepsy with Febrile Seizures Plus"
## [61] "Huntington's Disease"
## [62] "Hydrocephalus"
## [63] "Intellectual Disability"
## [64] "Meningitis"
## [65] "Motor Neurone Disease"
## [66] "Muscular Dystrophy"
## [67] "Neurodegenerative Disease"
## [68] "Paraganglioma"
## [69] "Schizophrenia"
## [70] "Pontocerebellar Hypoplasia"
## [71] "Depression Disorder"
## [72] "Neurofibromatosis"
## [73] "Major Depression Disorder"
## [74] "Ischemic Stroke"
## [75] "Ataxia Telangiectasia"
## [76] "Spinocerebellar Ataxia"
## [77] "Smith-Magenis Syndrome"
## [78] "Anorexia Nervosa"
## [79] "Bipolar Disorder"
## [80] "Frontotemporal Lobar Degeneration"
## [81] "Neurodevelopmental Disability"
## [82] "Panic Disorder"
## [83] "Post-traumatic Stress Disorder"
## [84] "Amyotrophic lateral Sclerosis"
## [85] "Angelman Syndrome"
## [86] "Cerebral infarction"
## [87] "Cognitive Functions and Neuronal plasticity"
## [88] "Fragile X Syndrome"
## [89] "Neurological Disorder"
## [90] "Non-functioning Pituitary Adenoma"
## [91] "Pituitary Adenoma"
## [92] "Plexiform Neurofibroma"
## [93] "Prader-willi Syndrome and Angelman Syndrome"
## [94] "Psychiatric Disease"
## [95] "West Syndrome"
## [96] "Non-functioning Pituitary Neoplasms"
## [97] "Pituitary Neoplasms"
## [98] "Forebrain Ischemia"
## [99] "Status Epilepticus"
## [100] "Acute Cerebral Infarction"
## [101] "Acute Cerebral Ischemia"
## [102] "Brain Neoplasms"
## [103] "Cerebellum Cancer"
## [104] "Cerebral Cavernous Malformation"
## [105] "Cerebral Ischemia"
## [106] "Cerebral Malaria"
## [107] "Encephalomyelitis"
## [108] "Intracerebral Hemorrhage"
## [109] "Mild Cognitive Impairment"
## [110] "Neurilemmoma"
## [111] "Neuroendocrine Tumor"
## [112] "Neuroepithelial Tumor"
## [113] "Neuroma"
## [114] "Neuronal Apoptosis-Related Disease"
## [115] "Frontotemporal Dementia"
## [116] "Anxiety Disorder"
## [117] "Acute Ischemic Stroke"
## [118] "Aneurysmal Subarachnoid Hemorrhage"
## [119] "Central Nervous System Embryonal Tumor"
= read.delim(paste0(outputs_directory,"uniprotkb_keyword_KW_0991_2023_09_01.tsv"),
id_DAVID_all header=TRUE)
= unlist(lapply(split(id_DAVID_all$Gene.Names,id_DAVID_all$Entry),
id_DAVID_all function(x){strsplit(x," ")}))
= unique( genemapu$ensembl_gene_id[genemapu$external_gene_name %in% id_DAVID_all ] )
id_DAVID_all_ensg
fisher.test(matrix(c(sum(hits_dn$ensembl_id %in% id_DAVID_all_ensg),
sum(hits_up$ensembl_id %in% id_DAVID_all_ensg),
nrow(hits_dn),
nrow(hits_up)),2,2))
##
## Fisher's Exact Test for Count Data
##
## data: matrix(c(sum(hits_dn$ensembl_id %in% id_DAVID_all_ensg), sum(hits_up$ensembl_id %in% id_DAVID_all_ensg), nrow(hits_dn), nrow(hits_up)), 2, 2)
## p-value = 0.000000000003697
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 7.914837 1916.514987
## sample estimates:
## odds ratio
## 47.39888
$hgnc_symbol[hits_dn$hgnc_symbol %in% id_DAVID_all ] hits_dn
## [1] "KMT2E" "SYN1" "CDH1" "NUP133" "ATP2B1" "OPHN1" "KAT6A"
## [8] "ZC3H14" "CTCF" "SMC3" "FBXW7" "KMT5B" "ARL6" "CEP104"
## [15] "ZMYM2" "FGF13" "DPP6" "ATP8A2" "CDK8" "DPF2" "STXBP1"
## [22] "FBXO11" "ASXL2" "PHIP" "TLK2" "RBMX" "AFF2" "PHF6"
## [29] "DYRK1A" "ZNF148" "HNRNPH1" "SOX11" "DCC" "USP7" "ZNF292"
## [36] "PGAP1"
$hgnc_symbol[hits_up$hgnc_symbol %in% id_DAVID_all ] hits_up
## [1] "ZNHIT3"
Heatmap of ID related genes
= hits_dn$ensembl_id[ hits_dn$ensembl_id %in% id_DAVID_all_ensg ]
idgenesensembl = tpm_norm_count_table[rownames(tpm_norm_count_table) %in% idgenesensembl,3:ncol(tpm_norm_count_table)]
tpm_norm_count_table_ID
= as.data.frame(log10(0.1+tpm_norm_count_table_ID))
tpm_norm_count_table_id rownames(tpm_norm_count_table_id) = genemapu$external_gene_name[match(idgenesensembl,genemapu$ensembl_gene_id)]
png(paste0(plots_directory,'/ID_heatmap.png'),
width = 5000, height = 10000, res = 1200 )
pheatmap(tpm_norm_count_table_id,
cellheight = 10,
treeheight_row = 0,
cluster_cols = F,
cluster_rows = T,
scale = "row",color=colorRampPalette(c("blue","white","red"))(100),
angle_col = '315')
dev.off()
## quartz_off_screen
## 3
= read.delim(paste0(outputs_directory,"hits_up_DAVID_KEGG.txt"))
up_fa = unique( unlist(strsplit(up_fa$Genes[up_fa$Term=="GO:0070062~extracellular exosome"],", ")) )
exosomal_genes = exosomal_genes[-which(exosomal_genes %in% "ENSG00000285762")]
exosomal_genes
= tpm_norm_count_table[rownames(tpm_norm_count_table) %in% exosomal_genes,3:ncol(tpm_norm_count_table)]
tpm_norm_count_table_EX = as.data.frame(log10(0.1+tpm_norm_count_table_EX))
tpm_norm_count_table_ex rownames(tpm_norm_count_table_ex) = hits_up$hgnc_symbol[match(rownames(tpm_norm_count_table_ex),hits_up$ensembl_id)]
png(paste0(plots_directory,'/Exosome_heatmap_GeneNames.png'),
width = 5000, height = 10000, res = 1200 )
pheatmap(tpm_norm_count_table_ex,
cellheight = 10,
treeheight_row = 0,
cluster_cols = F,
cluster_rows = T,
scale = "row",color=colorRampPalette(c("blue","white","red"))(100),
angle_col = '315')
dev.off()
## quartz_off_screen
## 3
= read.delim( paste0(outputs_directory,'dn_engs_DAVID_KEGG.txt' ))
go_dn = go_dn[order(go_dn$Benjamini,decreasing=TRUE),]
go_dn = go_dn[go_dn$Benjamini<0.05,]
go_dn $anyGo = unlist(lapply(strsplit(go_dn$Term,":"),function(x){x[[1]]}))
go_dn= go_dn[go_dn$anyGo %in% c("GO","hsa01100"),]
go_dn par(mfrow=c(1,1),mar=c(5,30, 1,1))
barplot(-log10(go_dn$Benjamini), horiz=TRUE,
names=go_dn$Term,las=2,xlim=c(0,20),xlab="-Log[10]B-H adj. P-val")
axis(1,lwd=2,las=2)
Genes related to nucleus
length(unique(unlist(strsplit(go_dn$Genes,", "))))
## [1] 249
= read.delim( paste0(outputs_directory,'hits_up_DAVID_KEGG.txt' ))
go_up = go_up[order(go_up$Benjamini,decreasing=TRUE),]
go_up = go_up[go_up$Benjamini<0.01,]
go_up $anyGo = unlist(lapply(strsplit(go_up$Term,":"),function(x){x[[1]]}))
go_up= go_up[go_up$anyGo %in% c("GO","hsa01100"),]
go_up par(mfrow=c(1,1),mar=c(5,20, 1,1))
barplot(-log10(go_up$Benjamini), horiz=TRUE,
names=go_up$Term,las=2,xlim=c(0,3),xlab="-Log[10]B-H adj. P-val")
axis(1,lwd=2,las=2)
= read.delim(paste0(outputs_directory,'Conserved_Pluripotency_genes.txt'),header=FALSE,as.is=TRUE)
pluripotencyGenes = read.delim( paste0(outputs_directory,'gene_counts_Mandy.txt'),skip=1, as.is=TRUE)
df = df[,c(7,8,9,10,11,6)]
countTable rownames(countTable) = df$Geneid
=GetTPM(countTable,1:5,rownames(countTable))
chimp_tpmcolnames(chimp_tpm) = unlist(strsplit(colnames(chimp_tpm),"analyses.star.RNA_Seq_02.22_PanTro_iPSC_WT_"))[seq(2,2*ncol(chimp_tpm),by=2)]
colnames(chimp_tpm) = unlist(strsplit(colnames(chimp_tpm),"_Rep_1_Aligned.sortedByCoord.out.bam"))
= df[,c(7,8,9,10,11)]
countTable rownames(countTable) = df$Geneid
colnames(countTable) = unlist(strsplit(colnames(countTable),"analyses.star.RNA_Seq_02.22_PanTro_iPSC_WT_"))[seq(2,2*ncol(chimp_tpm),by=2)]
colnames(countTable) = unlist(strsplit(colnames(countTable),"_Rep_1_Aligned.sortedByCoord.out.bam"))
= data.frame(condition=c(rep("Mandy",4),"SandraA"))
coldata rownames(coldata) = colnames(countTable)
<- DESeqDataSetFromMatrix(
dds countData = countTable,
colData = coldata,
design = ~ condition )
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
<- DESeq(dds) dds
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
resultsNames(dds)
## [1] "Intercept" "condition_SandraA_vs_Mandy"
= results(dds)
res <- res [order(res$padj),]
res <- vst(dds, blind=FALSE)
vsdat =assay(vsdat) ## variance stablised data
matvsdat=counts(dds, normalized=TRUE) ## variance stablised data
mat
= mat[rowSums(mat)>20,]
mat
par(mfrow=c(2,2),mar=c(5,5,5,5),pty="s",bty="O")
heatscatter( log2(0.1+mat[,'Mandy4']),log2(0.1+mat[,'SandraA']),
colpal = 'crazyblue',pch=19, cex=0.5,
xlab="Mandy4 [log2(counts)]", ylab="SandraA [log2(counts)]")
box(col="black")
heatscatter( log2(0.1+mat[,'Mandy6']),log2(0.1+mat[,'SandraA']),
colpal = 'crazyblue',pch=19, cex=0.5,
xlab="Mandy6 [log2(counts)]", ylab="SandraA [log2(counts)]")
box(col="black")
heatscatter( log2(0.1+mat[,'Mandy4']),log2(0.1+mat[,'Mandy6']),
colpal = 'crazyblue',pch=19, cex=0.5,
xlab="Mandy4 [log2(counts)]", ylab="Mandy6 [log2(counts)]")
box(col="black")
par(mfrow=c(1,1),mar=c(7,5,5,1),bty='n')
boxplot( chimp_tpm[, 'Mandy6'],
rownames(chimp_tpm) %in% pluripotencyGenes$V1, 'Mandy6'],
chimp_tpm['Mandy4'],
chimp_tpm[, rownames(chimp_tpm) %in% pluripotencyGenes$V1, 'Mandy4'],
chimp_tpm['SandraA'],
chimp_tpm[, rownames(chimp_tpm) %in% pluripotencyGenes$V1, 'SandraA'],
chimp_tpm[border=rep(c('gray','red'),3), main='',col='white',
ylab=expression('TPM'), outline=FALSE,
ylim=c(0,60), bty='n',notch=FALSE,lwd=2,
names=rep(c("all genes","Pluripotency"),3),las=2 )
= read.delim(paste0(outputs_directory,"uniprotkb_keyword_KW_0991_2023_09_01.tsv"),
id_DAVID_all header=TRUE)
= unlist(lapply(split(id_DAVID_all$Gene.Names,id_DAVID_all$Entry),
id_DAVID_all function(x){strsplit(x," ")}))
= unique( genemapu$ensembl_gene_id[genemapu$external_gene_name %in% id_DAVID_all ] )
id_DAVID_all_ensg = id_DAVID_all_ensg[id_DAVID_all_ensg %in% hits_dn$ensembl_id]
id
= read.delim(paste0(outputs_directory,"hits_up_DAVID_KEGG.txt"))
up_fa = unique( unlist(strsplit(up_fa$Genes[up_fa$Term=="GO:0070062~extracellular exosome"],", ")) )
exosomal_genes = exosomal_genes[-which(exosomal_genes %in% "ENSG00000285762")]
exosome
= tpm_norm_count_table[rownames(tpm_norm_count_table) %in% exosomal_genes,3:ncol(tpm_norm_count_table)]
tpm_norm_count_table_EX = as.data.frame(log10(0.1+tpm_norm_count_table_EX))
tpm_norm_count_table_ex rownames(tpm_norm_count_table_ex) = hits_up$hgnc_symbol[match(rownames(tpm_norm_count_table_ex),hits_up$ensembl_id)]
= read.delim(paste0(outputs_directory,'gene_counts_human_dec2022.txt'),skip=1)
human_count = read.delim(paste0(outputs_directory,'gene_counts_macaque.txt'),skip=1)
macaque_count all(human_count$Geneid==macaque_count$Geneid)
## [1] TRUE
= data.frame(Hs_CTX_WT_Brain_S3A1_M = human_count[,7],
APlab_count Hs_CTX_WT_Brain_S7A1_M = human_count[,8],
Hs_CTX_WT_Brain_S2A1_M = human_count[,9],
Hs_CTX_WT_Brain_S1A1_M = human_count[,10],
Hs_CTX_WT_Brain_S6A1_F = human_count[,11],
Mm_CTX_WT_Brain_10506_M = macaque_count[,7],
Mm_CTX_WT_Brain_10521_F = macaque_count[,8],
row.names = human_count$Geneid)
= data.frame(species=factor( c(rep("HS",5),c('MM','MM')),levels=c("HS","MM")),
brain_met sample='WholeCortex',sex=c('M','M','M','M','F','M','F'),
row.names=colnames(APlab_count))
= DESeqDataSetFromMatrix(
brain_bulk countData = APlab_count,
colData = brain_met,
design = ~ species )
= estimateSizeFactors(brain_bulk)
brain_bulk <- DESeq(brain_bulk) brain_bulk
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
= vst(brain_bulk, blind=TRUE)
vst_data = rlog(brain_bulk, blind=TRUE)
log_data
= counts(brain_bulk,normalized=TRUE)
normalized_counts = results(brain_bulk, contrast = c("species","HS","MM") )
brain_bulk_PL_res = brain_bulk_PL_res[!is.na(brain_bulk_PL_res$padj),]
brain_bulk_PL_sig = brain_bulk_PL_sig[brain_bulk_PL_sig$padj<0.01,]
brain_bulk_PL_sig = brain_bulk_PL_sig[brain_bulk_PL_sig$log2FoldChange<(0),]
brain_bulk_PL_sig_down = brain_bulk_PL_sig[brain_bulk_PL_sig$log2FoldChange>(0),] brain_bulk_PL_sig_up
= read.delim(paste0(outputs_directory,'Ext_RNASeq_TPMCOUNTS.tsv'),header=TRUE, as.is=TRUE)
brain_tpm = read.delim(paste0(outputs_directory,'Ext_RNASeq_METADATA.tsv'),header=TRUE, as.is=TRUE)
brain_met = read.delim(paste0(outputs_directory,'Ext_RNASeq_COUNTDATA.tsv'),header=TRUE, as.is=TRUE)
brain_counts rownames(brain_met) = brain_met$sample_names
= brain_counts[,match(rownames(brain_met),colnames(brain_counts))] brain_counts
load(paste0(objects_directory,"ensembl_hg38_genemap.RData"))
load(paste0(objects_directory,"GTF_Annotation.RData"))
= genemap[!duplicated(genemap$ensembl_gene_id),] genemapu
Load objects from other vignettes
load(paste0(objects_directory,"bda_final.RData"))
load(paste0(objects_directory,"DEseq2_RNA.RData"))
= read.delim(paste0(outputs_directory,"uniprotkb_keyword_KW_0991_2023_09_01.tsv"),
id_DAVID_all header=TRUE)
= unlist(lapply(split(id_DAVID_all$Gene.Names,id_DAVID_all$Entry),
id_DAVID_all function(x){strsplit(x," ")}))
= unique( genemapu$ensembl_gene_id[genemapu$external_gene_name %in% id_DAVID_all ] )
id_DAVID_all_ensg = id_DAVID_all_ensg[id_DAVID_all_ensg %in% hits_dn$ensembl_id]
id
= read.delim(paste0(outputs_directory,"hits_up_DAVID_KEGG.txt"))
up_fa = unique( unlist(strsplit(up_fa$Genes[up_fa$Term=="GO:0070062~extracellular exosome"],", ")) )
exosomal_genes = exosomal_genes[-which(exosomal_genes %in% "ENSG00000285762")]
exosome
= tpm_norm_count_table[rownames(tpm_norm_count_table) %in% exosomal_genes,3:ncol(tpm_norm_count_table)]
tpm_norm_count_table_EX = as.data.frame(log10(0.1+tpm_norm_count_table_EX))
tpm_norm_count_table_ex rownames(tpm_norm_count_table_ex) = hits_up$hgnc_symbol[match(rownames(tpm_norm_count_table_ex),hits_up$ensembl_id)]
= read.delim(paste0(outputs_directory,'gene_counts_human_dec2022.txt'),skip=1)
human_count = read.delim(paste0(outputs_directory,'gene_counts_macaque.txt'),skip=1)
macaque_count all(human_count$Geneid==macaque_count$Geneid)
## [1] TRUE
= data.frame(Hs_CTX_WT_Brain_S3A1_M = human_count[,7],
APlab_count Hs_CTX_WT_Brain_S7A1_M = human_count[,8],
Hs_CTX_WT_Brain_S2A1_M = human_count[,9],
Hs_CTX_WT_Brain_S1A1_M = human_count[,10],
Hs_CTX_WT_Brain_S6A1_F = human_count[,11],
Mm_CTX_WT_Brain_10506_M = macaque_count[,7],
Mm_CTX_WT_Brain_10521_F = macaque_count[,8],
row.names = human_count$Geneid)
= data.frame(species=factor( c(rep("HS",5),c('MM','MM')),levels=c("HS","MM")),
brain_met sample='WholeCortex',sex=c('M','M','M','M','F','M','F'),
row.names=colnames(APlab_count))
= DESeqDataSetFromMatrix(
brain_bulk countData = APlab_count,
colData = brain_met,
design = ~ species )
= estimateSizeFactors(brain_bulk)
brain_bulk <- DESeq(brain_bulk) brain_bulk
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
= vst(brain_bulk, blind=TRUE)
vst_data = rlog(brain_bulk, blind=TRUE)
log_data
= counts(brain_bulk,normalized=TRUE)
normalized_counts = results(brain_bulk, contrast = c("species","HS","MM") )
brain_bulk_PL_res = brain_bulk_PL_res[!is.na(brain_bulk_PL_res$padj),]
brain_bulk_PL_sig = brain_bulk_PL_sig[brain_bulk_PL_sig$padj<0.01,]
brain_bulk_PL_sig = brain_bulk_PL_sig[brain_bulk_PL_sig$log2FoldChange<(0),]
brain_bulk_PL_sig_down = brain_bulk_PL_sig[brain_bulk_PL_sig$log2FoldChange>(0),] brain_bulk_PL_sig_up
= read.delim(paste0(outputs_directory,'Ext_RNASeq_TPMCOUNTS.tsv'),header=TRUE, as.is=TRUE)
brain_tpm = read.delim(paste0(outputs_directory,'Ext_RNASeq_METADATA.tsv'),header=TRUE, as.is=TRUE)
brain_met = read.delim(paste0(outputs_directory,'Ext_RNASeq_COUNTDATA.tsv'),header=TRUE, as.is=TRUE)
brain_counts rownames(brain_met) = brain_met$sample_names
= brain_counts[,match(rownames(brain_met),colnames(brain_counts))] brain_counts
Retain normal Cortex and Female samples and perform DESeq2 based normalisation
= brain_counts[ , brain_met$lab=="Khaitovich Lab" & brain_met$sources %like% 'Cortex' & brain_met$condition=="Normal" & brain_met$sex=="F"]
klrna = brain_met[brain_met$lab=="Khaitovich Lab" & brain_met$sources %like% 'Cortex' & brain_met$condition=="Normal" & brain_met$sex=="F",]
brain_met_kl
= DESeqDataSetFromMatrix(
brain_bulk_kl countData = klrna,
colData = brain_met_kl,
design = ~ species )
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
= estimateSizeFactors(brain_bulk_kl)
brain_bulk_kl = counts(brain_bulk_kl, normalized=TRUE )
brain_bulk_kl_normalized
= DESeqDataSetFromMatrix(
brain_bulk_kl countData = klrna,
colData = brain_met_kl,
design = ~ species )
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
= estimateSizeFactors(brain_bulk_kl)
brain_bulk_kl = DESeq(brain_bulk_kl) brain_bulk_kl
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
= counts(brain_bulk_kl, normalized=TRUE )
brain_bulk_kl_normalized
= results(brain_bulk_kl, contrast = c("species","HS","MM") )
brain_bulk_KL_res = results(brain_bulk_kl, contrast = c("species","HS","PT") )
brain_bulk_KL_res2 = results(brain_bulk_kl, contrast = c("species","HS","PP") ) brain_bulk_KL_res3
All the other diseases - do they show such a trend? Fix the gene names! https://www.mirbase.org/ftp.shtml
= table(bda_final$Disease)
choroby = choroby[choroby>30]
choroby
= lapply( split( bda_final$ensid[bda_final$Disease %in% names(choroby)],
LFC_disease $Disease[bda_final$Disease %in% names(choroby)]), function(genes){
bda_final= brain_bulk_PL_res[rownames(brain_bulk_PL_res) %in% genes, ]
pt = pt[!is.na(pt$log2FoldChange),]
pt = pt$log2FoldChange
res names(res) = rownames(pt)
return(res)} )
= lapply( split( bda_final$ensid[bda_final$Disease %in% names(choroby)],
LFC_disease2 $Disease[bda_final$Disease %in% names(choroby)]), function(genes){
bda_final= brain_bulk_KL_res[rownames(brain_bulk_KL_res) %in% genes, ]
pt = pt[!is.na(pt$log2FoldChange),]
pt = pt$log2FoldChange
res names(res) = rownames(pt)
return(res)} )
= lapply( split( bda_final$ensid[bda_final$Disease %in% names(choroby)],
LFC_disease3 $Disease[bda_final$Disease %in% names(choroby)]), function(genes){
bda_final= brain_bulk_KL_res2[rownames(brain_bulk_KL_res2) %in% genes, ]
pt = pt[!is.na(pt$log2FoldChange),]
pt = pt$log2FoldChange
res names(res) = rownames(pt)
return(res) } )
### --------------------
= function(x){t.test(x)$p.value}
ChosenFunction = unlist(lapply(LFC_disease,ChosenFunction))
LFC_disease_pv = unlist(lapply(LFC_disease2,ChosenFunction))
LFC_disease2_pv = unlist(lapply(LFC_disease3,ChosenFunction))
LFC_disease3_pv
= median
ChosenFunction = unlist(lapply(LFC_disease,ChosenFunction))
LFC_disease_fc = unlist(lapply(LFC_disease2,ChosenFunction))
LFC_disease2_fc = unlist(lapply(LFC_disease3,ChosenFunction))
LFC_disease3_fc
=colorRampPalette(c("orange3","white","aquamarine3"))(length(LFC_disease_fc))
cols
par(mfrow=c(1,1),mar=c(15,4,5,1))
barplot(LFC_disease_fc[order(LFC_disease_fc,decreasing=FALSE)],
col=ifelse(LFC_disease_pv[order(LFC_disease_fc,decreasing=FALSE)]<0.05,"aquamarine3","gray80"),
las=2,axes=FALSE,ylim=c(-1,1),ylab="log[2]FC (Human/NHP)")
axis(2,lwd=2,las=2,cex.lab=1.5)
par(mfrow=c(1,1),mar=c(15,4,5,1))
barplot(LFC_disease2_fc[order(LFC_disease2_fc,decreasing=FALSE)],
col=ifelse(LFC_disease2_pv[order(LFC_disease2_fc,decreasing=FALSE)]<0.05,"aquamarine3","gray80"),
las=2,axes=FALSE,ylab="log[2]FC (Human/NHP)")
axis(2,lwd=2,las=2,cex.lab=1.5)
par(mfrow=c(1,1),mar=c(15,4,1,1))
barplot(LFC_disease3_fc[order(LFC_disease3_fc,decreasing=FALSE)],
col=ifelse(LFC_disease3_pv[order(LFC_disease3_fc,decreasing=FALSE)]<0.05,"aquamarine3","gray80"),
las=2,axes=FALSE,ylim=c(-0.4,0.4),ylab="log[2]FC (Human/NHP)")
axis(2,lwd=2,las=2,cex.lab=1.5)
library(beeswarm)
library(ggpubr)
library(dplyr)
= counts(brain_bulk,normalized=TRUE)
normalized_counts_PL = counts(brain_bulk_kl,normalized=TRUE)
normalized_counts_KL
= brain_met[brain_met$lab=="Khaitovich Lab" & brain_met$sources %like% 'Cortex' & brain_met$condition=="Normal" & brain_met$sex=="F",]
samples_KL
= data.frame( expression=normalized_counts_PL['ENSG00000007866',],species=c(rep("HS",5),rep("MM",2)))
tead3_pl = data.frame( expression=normalized_counts_KL['ENSG00000007866',],
tead3_kl species=samples_KL$species[match(colnames(normalized_counts_KL),samples_KL$sample_names)] )
= tead3_kl[tead3_kl$species %in% c("HS","PT","MM"),]
tead3_kl $species = factor(tead3_kl$species,levels=c("HS","PT","MM"))
tead3_kl
<- function(x){sd(x)/sqrt(length(x))}
se <- summarise(group_by(tead3_pl, species), mean=mean(expression),se=se(expression))
my_dat
ggplot(my_dat, aes(x=species, y=mean, fill=species)) +
geom_bar(stat="identity", position=position_dodge()) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.2,
position=position_dodge(.9)) + theme_classic() + ylim(c(0,50)) + scale_fill_manual(values=c('gray','blue'))
<- function(x){sd(x)/sqrt(length(x))}
se <- summarise(group_by(tead3_kl, species),
my_dat mean=mean(expression),se=se(expression))
ggplot(my_dat, aes(x=species, y=mean, fill=species)) +
geom_bar(stat="identity", position=position_dodge()) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.2,
position=position_dodge(.9)) + theme_classic() + ylim(c(0,90)) + scale_fill_manual(values=c('gray','red','blue'))
P-values
'ENSG00000007866',] brain_bulk_KL_res[
## log2 fold change (MLE): species HS vs MM
## Wald test p-value: species HS vs MM
## DataFrame with 1 row and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSG00000007866 39.6105 1.75034 0.441979 3.96023 0.000074879
## padj
## <numeric>
## ENSG00000007866 0.0004964
'ENSG00000007866',] brain_bulk_KL_res3[
## log2 fold change (MLE): species HS vs PP
## Wald test p-value: species HS vs PP
## DataFrame with 1 row and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSG00000007866 39.6105 1.16246 0.350057 3.32078 0.000897662
## padj
## <numeric>
## ENSG00000007866 0.00614593
'ENSG00000007866',] brain_bulk_PL_sig[
## log2 fold change (MLE): species HS vs MM
## Wald test p-value: species HS vs MM
## DataFrame with 1 row and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSG00000007866 30.2115 1.78403 0.581936 3.06568 0.00217175
## padj
## <numeric>
## ENSG00000007866 0.00825217
Domains were identified using TOPDOM. We read them in here. We consider boundaries that have support in two replicates. First chunk lifts over the boundary coordinates between human and chimpanzee assemblies.
= readTADs( paste0(outputs_directory,"25kb_domains/hs_ele_krnorm.all.25kb.topdom.bedpe" ) )
ele_domains = readTADs( paste0(outputs_directory,"25kb_domains/hs_pf_krnorm.all.25kb.topdom.bedpe" ) )
fas_domains = readTADs( paste0(outputs_directory,"25kb_domains/pt_mandy_krnorm.all.25kb.topdom.bedpe" ) )
man_domains = readTADs( paste0(outputs_directory,"25kb_domains/pt_sandra_krnorm.all.25kb.topdom.bedpe" ) )
saa_domains
= liftOverBoundaries( ele_domains, chain_HsPt, WSize = 500 )
ele_domains_lift_over_Pt6 = liftOverBoundaries( fas_domains, chain_HsPt, WSize = 500 )
fas_domains_lift_over_Pt6 = c(ele_domains_lift_over_Pt6$lifted_over,fas_domains_lift_over_Pt6$lifted_over)
bed_file export.bed( bed_file, con=paste0(outputs_directory,"ele_fas_boundaries_lift_Pt6.bed" ) )
save( ele_domains_lift_over_Pt6, fas_domains_lift_over_Pt6, file=paste0(objects_directory, "ele_fas_boundaries_lift_Pt6.RData"))
= liftOverBoundaries( man_domains, chain_PtHs, WSize = 500 )
man_domains_lift_over_hg38 = liftOverBoundaries( saa_domains, chain_PtHs, WSize = 500 )
saa_domains_lift_over_hg38 = c(man_domains_lift_over_hg38$lifted_over,saa_domains_lift_over_hg38$lifted_over)
bed_file export.bed( bed_file, con=paste0(outputs_directory,"man_saa_boundaries_lift_Hg38.bed" ) )
save( man_domains_lift_over_hg38, saa_domains_lift_over_hg38, file=paste0(objects_directory, "man_saa_domains_lift_Hg38.RData") )
We display the reproducibility
= readTADs( paste0(outputs_directory,"25kb_domains/hs_ele_krnorm.all.25kb.topdom.bedpe" ) )
ele_domains = readTADs( paste0(outputs_directory,"25kb_domains/hs_pf_krnorm.all.25kb.topdom.bedpe" ) )
fas_domains = readTADs( paste0(outputs_directory,"25kb_domains/pt_mandy_krnorm.all.25kb.topdom.bedpe" ) )
man_domains = readTADs( paste0(outputs_directory,"25kb_domains/pt_sandra_krnorm.all.25kb.topdom.bedpe" ) )
saa_domains
load(paste0(objects_directory, "man_saa_domains_lift_Hg38.RData"))
load(paste0(objects_directory, "ele_fas_boundaries_lift_Pt6.RData"))
= getAllBoundaries( ele_domains$boundaries, fas_domains$boundaries )
all_human_boundaires = getAllBoundaries( man_domains$boundaries, saa_domains$boundaries )
all_chimp_boundaires
= list(ELE30 = unique(queryHits(findOverlaps(all_human_boundaires,ele_domains$boundaries))),
peak_list PF = unique(queryHits(findOverlaps(all_human_boundaires,fas_domains$boundaries))) )
ggVennDiagram(peak_list,label_alpha=0) + scale_fill_distiller( direction = 1)
= list(Sandra = unique(queryHits(findOverlaps(all_chimp_boundaires,saa_domains$boundaries))),
peak_list Mandy = unique(queryHits(findOverlaps(all_chimp_boundaires,man_domains$boundaries))) )
ggVennDiagram(peak_list,label_alpha=0) + scale_fill_distiller( direction = 1)
= matrix(c(6100,
m 1021,
769,
6000,
593,
1131),
ncol = 2, nrow=3,
byrow = FALSE)
barplot(m,col=c("green4","steelblue3","blue4"), ylim=c(0,8000),ylab="Loops",names=c("Human", "Chimpanzee"))
axis(2,lwd=2)
export.bed(all_human_boundaires,con=paste0(outputs_directory,"all_human_boundaires_input.bed"))
export.bed(all_chimp_boundaires,con=paste0(outputs_directory,"all_chimp_boundaires.bed") )
Check the evolutionary conservation of the reproducible boundaries.
= ele_domains$boundaries[queryHits(findOverlaps(ele_domains$boundaries,fas_domains$boundaries))]
human_boundaires_reproducible = vector("list",1)
chimp_domains names(chimp_domains) = "boundaries"
$boundaries = man_domains$boundaries[queryHits(findOverlaps(man_domains$boundaries,saa_domains$boundaries))]
chimp_domains
= liftOverBoundaries( chimp_domains, chain_PtHs, WSize = 500 )
chimp_domains_lift_over_hg38 export.bed(chimp_domains_lift_over_hg38$lifted_over,con=paste0(outputs_directory,"chimp_domains_lift_over_hg38.bed"))
save( chimp_domains_lift_over_hg38, file=paste0(objects_directory,"chimp_domains_lift_over_hg38.RData"))
save(human_boundaires_reproducible,file=paste0(objects_directory,"human_boundaires_reproducible.RData"))
Display the result
load( paste0(objects_directory,"chimp_domains_lift_over_hg38.RData") )
load(paste0(objects_directory,"human_boundaires_reproducible.RData"))
= getAllBoundaries( human_boundaires_reproducible,chimp_domains_lift_over_hg38$lifted_over )
allBound = list(Human = unique(queryHits(findOverlaps(allBound,human_boundaires_reproducible))),
peak_list Chimp = unique(queryHits(findOverlaps(allBound,chimp_domains_lift_over_hg38$lifted_over))) )
ggVennDiagram(peak_list,label_alpha=0) + scale_fill_distiller( direction = 1)
= human_boundaires_reproducible[unique(queryHits(findOverlaps(human_boundaires_reproducible,chimp_domains_lift_over_hg38$lifted_over)))]
all_evol_shared_boundaries = chimp_domains_lift_over_hg38$original[ which(names(chimp_domains_lift_over_hg38$original) %in% names(chimp_domains_lift_over_hg38$lifted_over[queryHits(findOverlaps(chimp_domains_lift_over_hg38$lifted_over,all_evol_shared_boundaries))]) ) ] all_evol_shared_boundaries_Pt
Sometimes the human boundaries in the chimp have no reads or are in the regions with an overtly low mappability and vice versa. We want to gent rid of those instances. Boundaries called in human should not be in the vicinity of low coverage regions in the human and in the chimp. Boundaries called in the chimp should not be in the vicinity of low coverage bins in chrim and in human.
= do.call("c", lapply( as.list(names(ele)), function(x){
lowCoverageBinsHG38 print(x)
= gagr[which(chrom(gagr)==x)]
thischr = ele[[x]]
m = as.data.frame(summary(m$LFM))
tp = tp[tp$j > (tp$i+200),]
tp = Matrix::Matrix(0, nrow=nrow(m$LFM),ncol=ncol(m$LFM),sparse=TRUE )
M cbind(tp$i,tp$j)] = tp$x
M[= M + t(M)
M = thischr[which(rowSums(M)<100)]
thischr return(thischr) } ) )
export.bed(lowCoverageBinsHG38,con=paste0(outputs_directory,"lowCoverageBinsHG38.bed"))
= do.call("c", lapply( as.list(names(mandy)), function(x){
lowCoverageBinsPT6 print(x)
# x = "chr1"
= gagr_pt[which(chrom(gagr_pt)==x)]
thischr = mandy[[x]]
m = as.data.frame(summary(m$LFM))
tp = tp[tp$j > (tp$i+200),]
tp = Matrix::Matrix(0, nrow=nrow(m$LFM),ncol=ncol(m$LFM),sparse=TRUE )
M cbind(tp$i,tp$j)] = tp$x
M[= M + t(M)
M = thischr[which(rowSums(M)<100)]
thischr return(thischr) } ) )
export.bed(lowCoverageBinsPT6,con=paste0(outputs_directory,"lowCoverageBinsPaT6.bed"))
= do.call("c", lapply( as.list(names(fa)), function(x){
lowCoverageBinsHG38_2 print(x)
= gagr[which(chrom(gagr)==x)]
thischr = fa[[x]]
m = as.data.frame(summary(m$LFM))
tp = tp[tp$j > (tp$i+200),]
tp = Matrix::Matrix(0, nrow=nrow(m$LFM),ncol=ncol(m$LFM),sparse=TRUE )
M cbind(tp$i,tp$j)] = tp$x
M[= M + t(M)
M = thischr[which(rowSums(M)<100)]
thischr return(thischr) } ) )
export.bed(lowCoverageBinsHG38_2,con=paste0(outputs_directory,"lowCoverageBinsHG38_2.bed"))
= do.call("c", lapply( as.list(names(sa)), function(x){
lowCoverageBinsPT6_2 print(x)
# x = "chr1"
= gagr_pt[which(chrom(gagr_pt)==x)]
thischr = sa[[x]]
m = as.data.frame(summary(m$LFM))
tp = tp[tp$j > (tp$i+200),]
tp = Matrix::Matrix(0, nrow=nrow(m$LFM),ncol=ncol(m$LFM),sparse=TRUE )
M cbind(tp$i,tp$j)] = tp$x
M[= M + t(M)
M = thischr[which(rowSums(M)<100)]
thischr return(thischr) } ) )
export.bed(lowCoverageBinsPT6_2,con=paste0(outputs_directory,"lowCoverageBinsPaT6_2.bed"))
save( lowCoverageBinsHG38, lowCoverageBinsPT6, file=paste0(objects_directory,"lowCoverageBins.RData") )
save( lowCoverageBinsHG38_2, lowCoverageBinsPT6_2, file=paste0(objects_directory,"lowCoverageBins2.RData") )
Lift over these intervals of low coverage. Then lift over the boundaries for the next steps of the analyses.
load(paste0(objects_directory,"lowCoverageBins.RData"))
load(paste0(objects_directory,"lowCoverageBins2.RData"))
= GenomicRanges::resize(lowCoverageBinsHG38,50000,fix="center")
lowCoverageBinsHG38 = GenomicRanges::resize(lowCoverageBinsHG38_2,50000,fix="center")
lowCoverageBinsHG38_2
= GenomicRanges::resize(lowCoverageBinsPT6,50000,fix="center")
lowCoverageBinsPT6 = GenomicRanges::resize(lowCoverageBinsPT6_2,50000,fix="center")
lowCoverageBinsPT6_2
= lowCoverageBinsHG38[queryHits(findOverlaps(lowCoverageBinsHG38,lowCoverageBinsHG38_2))]
lowCoverageBinsHG38 = lowCoverageBinsPT6[queryHits(findOverlaps(lowCoverageBinsPT6,lowCoverageBinsPT6_2))]
lowCoverageBinsPT6
# used to be 10000
= GenomicRanges::resize( unlist(liftOver(GenomicRanges::resize(lowCoverageBinsHG38,500,fix="center"),
lowCoverageBinsHG38_Pt chain = chain_HsPt)), 50000, fix="center")
= GenomicRanges::resize( unlist(liftOver(GenomicRanges::resize(lowCoverageBinsPT6,500,fix="center"),
lowCoverageBinsPt_Hg38 chain = chain_PtHs)), 50000, fix="center")
export.bed(lowCoverageBinsPt_Hg38,con=paste0(outputs_directory,"lowCoverageBinsPt_Hg38.bed"))
save( lowCoverageBinsPt_Hg38, lowCoverageBinsPt_Hg38,
file=paste0(objects_directory,"low_coverage_bins_lifted_over.RData"))
Get to the list of human and chimp specific boundaries. Consider boundaries observed in both replicates. Remove boundaries within regions with poor mappability in both species (50kb intervals centered on the lifted over region).
load( paste0(objects_directory, "ele_fas_boundaries_lift_Pt6.RData") )
load( paste0(objects_directory, "man_saa_domains_lift_Hg38.RData") )
### -----------
= getAllBoundaries( ele_domains$boundaries,
all_human_boundaires_input $boundaries )
fas_domains= getAllBoundaries( man_domains$boundaries,
all_chimp_boundaires_input $boundaries )
saa_domains
= getAllBoundaries( man_domains_lift_over_hg38$lifted_over,
all_chimp_boundaires_Hg38 $lifted_over )
saa_domains_lift_over_hg38export.bed( all_chimp_boundaires_Hg38, con=paste0(outputs_directory,"all_chimp_boundaires_Hg38_tp.bed" ))
### --------------------
### remove boundaries that intersect poorly mappable regions in the two species
= all_human_boundaires_input[-queryHits(findOverlaps(all_human_boundaires_input,c( lowCoverageBinsHG38,lowCoverageBinsPt_Hg38) )) ]
all_human_boundaires = all_chimp_boundaires_input[-queryHits(findOverlaps(all_chimp_boundaires_input,c(lowCoverageBinsPT6,lowCoverageBinsHG38_Pt)))]
all_chimp_boundaires
export.bed( all_human_boundaires, con=paste0(outputs_directory,"all_human_boundaires_Hg38.bed" ) )
export.bed( all_chimp_boundaires, con=paste0(outputs_directory,"all_chimp_boundaires_Pt6.bed" ) )
Liftovers: - we pick the longest one - lift over needs to be on the same chromosome.
Identify species specific boundaries. To call a boundary species specific it needs to be: - found in both replicates of this species - not in a poorly mappable region in either of the two species - never found in the other species - be amenable for liftOver.
= human_boundaires_reproducible[ - queryHits(findOverlaps(human_boundaires_reproducible,all_chimp_boundaires_Hg38 )) ]
human_specific_boundaries = human_specific_boundaries[ - queryHits(findOverlaps(human_specific_boundaries,reduce(c(lowCoverageBinsHG38,lowCoverageBinsPt_Hg38)) )) ]
human_specific_boundaries
= chimp_domains$boundaries[ - queryHits(findOverlaps(chimp_domains$boundaries,reduce(c(ele_domains_lift_over_Pt6$lifted_over,
chimp_specific_boundaries $lifted_over)) )) ]
fas_domains_lift_over_Pt6= chimp_specific_boundaries[ - queryHits(findOverlaps(chimp_specific_boundaries,reduce(c(lowCoverageBinsPT6,lowCoverageBinsHG38_Pt)) )) ]
chimp_specific_boundaries
## Boundaries need to be able to be lifted over! Otherwise we do not know if the boundary is lost because it is not lifted over or it is lost because it was not called
= liftOverBoundaries(list(boundaries=chimp_specific_boundaries), chain_PtHs, WSize = 500 )
chimp_specific_boundaries_Hg38 = liftOverBoundaries(list(boundaries=human_specific_boundaries), chain_HsPt, WSize = 500 )
human_specific_boundaries_Pt
## double filtering for lift overs, any chimp boundary lifted over to hg38 should not be observed in human baoundaries
= GenomicRanges::resize(chimp_specific_boundaries_Hg38$original[which(names(chimp_specific_boundaries_Hg38$original) %in% names(chimp_specific_boundaries_Hg38$lifted_over))],50000,fix="center")
chimp_specific_boundaries = GenomicRanges::resize(chimp_specific_boundaries_Hg38$lifted_over,50000,fix="center")
chimp_specific_boundaries_Hg38 all(names(chimp_specific_boundaries)==names(chimp_specific_boundaries_Hg38))
= GenomicRanges::resize(human_specific_boundaries_Pt$original[which(names(human_specific_boundaries_Pt$original) %in% names(human_specific_boundaries_Pt$lifted_over))],50000,fix="center")
human_specific_boundaries = GenomicRanges::resize(human_specific_boundaries_Pt$lifted_over,50000,fix="center")
human_specific_boundaries_Pt all(names(human_specific_boundaries)==names(human_specific_boundaries_Pt))
= chimp_specific_boundaries[-queryHits(findOverlaps(chimp_specific_boundaries_Hg38,all_human_boundaires_input))]
chimp_specific_boundaries = human_specific_boundaries[-queryHits(findOverlaps(human_specific_boundaries_Pt,all_chimp_boundaires_input))]
human_specific_boundaries
= chimp_specific_boundaries_Hg38[-queryHits(findOverlaps(chimp_specific_boundaries_Hg38,all_human_boundaires_input))]
chimp_specific_boundaries_Hg38 = human_specific_boundaries_Pt[-queryHits(findOverlaps(human_specific_boundaries_Pt,all_chimp_boundaires_input))]
human_specific_boundaries_Pt
export.bed( chimp_specific_boundaries_Hg38,
con=paste0(outputs_directory,"/chimp_specific_boundaries_Hg38.bed" ))
export.bed( human_specific_boundaries_Pt,
con=paste0(outputs_directory,"human_specific_boundaries_Pt.bed" ) )
export.bed( human_specific_boundaries,
con=paste0(outputs_directory,"human_specific_boundaries.bed" ) )
export.bed( chimp_specific_boundaries,
con=paste0(outputs_directory,"chimp_specific_boundaries.bed" ) )
##########################
= c( human_specific_boundaries, chimp_specific_boundaries_Hg38 )
species_specific_boundaries export.bed( species_specific_boundaries,
con=paste0(outputs_directory,"species_specific_boundaries.bed" ) )
save( human_specific_boundaries, chimp_specific_boundaries_Hg38,human_specific_boundaries_Pt,chimp_specific_boundaries,
file=paste0(objects_directory,"species_specific_boundaries.RData"))
##########################
We have the species specific boundaries
load(paste0(objects_directory,"species_specific_boundaries.RData"))
load(paste0(objects_directory,"low_coverage_bins_lifted_over.RData"))
= c( human_specific_boundaries, chimp_specific_boundaries_Hg38 ) species_specific_boundaries
load(paste0(objects_directory,'si.RData'))
= data.frame( V1=paste0("chr",c(1:22,"X")),
chroms_combs_hs V2=paste0("chr",c(1:22,"X")),stringsAsFactors = FALSE )
=20
itn=paste0("chr",c(1:22,"X"))
chroms= read.hic_files( paste0(dumped_directory_ele), "",".matrix.txt", ga, paste0("chr",c(1:22,"X") ) )
ele_lfm_5kb = lapply( ele_lfm_5kb, function(m){ IPF( m, numberOfIterations=itn ) } )
ele save( ele, file=paste0(objects_directory,"Ele30_hic.RData" ))
= read.hic_files( paste0(dumped_directory_fa), "",".matrix.txt", ga, paste0("chr",c(1:22,"X") ) )
fa_lfm_5kb = lapply( fa_lfm_5kb, function(m){ IPF( m, numberOfIterations=itn ) } )
fa save( fa, file=paste0(objects_directory,"FetalAstrocytes_hic.RData" ))
load(paste0(objects_directory,'si_pt.RData'))
= data.frame( V1=paste0("chr",c(c(1,"2A","2B",3:22),"X")),
chroms_combs_pt V2=paste0("chr",c(c(1,"2A","2B",3:22),"X")),
stringsAsFactors = FALSE)
=20
itn=paste0("chr",c(c(1,"2A","2B",3:22),"X"))
chroms
## ---------------
= read.hic_files( paste0(dumped_directory_mandy), "",".matrix.txt", ga_pt, chroms=paste0("chr",c(c(1,"2A","2B",3:22),"X")) )
mandy_lfm_5kb = lapply( mandy_lfm_5kb, function(m){ IPF( m, numberOfIterations=itn ) } )
mandy save( mandy, file=paste0(objects_directory,"Mandy_hic.RData" ) )
## ---------------
= read.hic_files( paste0(dumped_directory_sandra), "",".matrix.txt", ga_pt, chroms=paste0("chr",c(c(1,"2A","2B",3:22),"X")) )
sa_lfm_5kb = lapply( sa_lfm_5kb, function(m){ IPF( m, numberOfIterations=itn ) } )
sa save( sa, file=paste0(objects_directory,"SandraA_hic.RData" ) )
= InsulationScore( human_specific_boundaries,
human_spe_bound_IS_ele 5, 3, 10 )
ele, gagr, save(human_spe_bound_IS_ele,file=paste0(objects_directory,"human_spe_bound_IS_ele.RData"))
= InsulationScore( human_specific_boundaries,
human_spe_bound_IS_fas 5, 3, 10 )
fa, gagr, save(human_spe_bound_IS_fas,file=paste0(objects_directory,"human_spe_bound_IS_fas.RData"))
= InsulationScore( chimp_specific_boundaries_Hg38,
chimp_spe_bound_IS_ele 5, 3, 10 )
ele, gagr, save(chimp_spe_bound_IS_ele,file=paste0(objects_directory,"chimp_spe_bound_IS_ele.RData"))
= InsulationScore( chimp_specific_boundaries_Hg38,
chimp_spe_bound_IS_fas 5, 3, 10 )
fa, gagr, save(chimp_spe_bound_IS_fas,file=paste0(objects_directory,"chimp_spe_bound_IS_fas.RData"))
= InsulationScore( all_evol_shared_boundaries,
shared_bound_IS_ele 5, 3, 10 )
ele, gagr, save(shared_bound_IS_ele,file=paste0(objects_directory,"shared_bound_IS_ele.RData"))
= InsulationScore( all_evol_shared_boundaries,
shared_bound_IS_fas 5, 3, 10 )
fa, gagr, save(shared_bound_IS_fas,file=paste0(objects_directory,"shared_bound_IS_fas.RData"))
## values for chimpanzee samples
= InsulationScore( human_specific_boundaries_Pt,
human_spe_bound_IS_mandy 5, 3, 10 )
mandy, gagr_pt, save(human_spe_bound_IS_mandy,file=paste0(objects_directory,"human_spe_bound_IS_mandy.RData"))
= InsulationScore( human_specific_boundaries_Pt,
human_spe_bound_IS_sandraA 5, 3, 10 )
sa, gagr_pt, save(human_spe_bound_IS_sandraA,file=paste0(objects_directory,"human_spe_bound_IS_sandraA.RData"))
= InsulationScore( chimp_specific_boundaries,
chimp_spe_bound_IS_mandy 5, 3, 10 )
mandy, gagr_pt, save(chimp_spe_bound_IS_mandy,file=paste0(objects_directory,"chimp_spe_bound_IS_mandy.RData"))
= InsulationScore( chimp_specific_boundaries,
chimp_spe_bound_IS_sandraA 5, 3, 10 )
sa, gagr_pt, save(chimp_spe_bound_IS_sandraA,file=paste0(objects_directory,"chimp_spe_bound_IS_sandraA.RData"))
= InsulationScore( all_evol_shared_boundaries_Pt,
shared_bound_IS_mandy 5, 3, 10 )
mandy, gagr_pt, save(shared_bound_IS_mandy,file=paste0(objects_directory,"shared_bound_IS_mandy.RData"))
= InsulationScore( all_evol_shared_boundaries_Pt,
shared_bound_IS_sandraA 5, 3, 10 )
sa, gagr_pt, save(shared_bound_IS_sandraA,file=paste0(objects_directory,"shared_bound_IS_sandraA.RData"))
Species specific boundaries, insulation change
load(paste0(objects_directory,"human_spe_bound_IS_mandy.RData"))
load(paste0(objects_directory,"human_spe_bound_IS_fas.RData"))
load(paste0(objects_directory,"human_spe_bound_IS_ele.RData"))
load(paste0(objects_directory,"human_spe_bound_IS_sandraA.RData"))
load(paste0(objects_directory,"chimp_spe_bound_IS_mandy.RData"))
load(paste0(objects_directory,"chimp_spe_bound_IS_ele.RData"))
load(paste0(objects_directory,"chimp_spe_bound_IS_fas.RData"))
load(paste0(objects_directory,"chimp_spe_bound_IS_sandraA.RData"))
load(paste0(objects_directory,"shared_bound_IS_ele.RData"))
load(paste0(objects_directory,"shared_bound_IS_fas.RData"))
load(paste0(objects_directory,"shared_bound_IS_mandy.RData"))
load(paste0(objects_directory,"shared_bound_IS_sandraA.RData"))
= log2(rowMeans(human_spe_bound_IS_ele[,c(1,3)])/human_spe_bound_IS_ele[,2])
human_spe_bound_IS_ele = log2(rowMeans(human_spe_bound_IS_mandy[,c(1,3)])/human_spe_bound_IS_mandy[,2])
human_spe_bound_IS_mandy = log2(rowMeans(human_spe_bound_IS_fas[,c(1,3)])/human_spe_bound_IS_fas[,2])
human_spe_bound_IS_fas = log2(rowMeans(human_spe_bound_IS_sandraA[,c(1,3)])/human_spe_bound_IS_sandraA[,2])
human_spe_bound_IS_sandraA
= log2(rowMeans(chimp_spe_bound_IS_ele[,c(1,3)])/chimp_spe_bound_IS_ele[,2])
chimp_spe_bound_IS_ele = log2(rowMeans(chimp_spe_bound_IS_mandy[,c(1,3)])/chimp_spe_bound_IS_mandy[,2])
chimp_spe_bound_IS_mandy = log2(rowMeans(chimp_spe_bound_IS_fas[,c(1,3)])/chimp_spe_bound_IS_fas[,2])
chimp_spe_bound_IS_fas = log2(rowMeans(chimp_spe_bound_IS_sandraA[,c(1,3)])/chimp_spe_bound_IS_sandraA[,2])
chimp_spe_bound_IS_sandraA
= log2(rowMeans(shared_bound_IS_mandy[,c(1,3)])/shared_bound_IS_mandy[,2])
shared_bound_IS_mandy = log2(rowMeans(shared_bound_IS_sandraA[,c(1,3)])/shared_bound_IS_sandraA[,2])
shared_bound_IS_sandraA = log2(rowMeans(shared_bound_IS_ele[,c(1,3)])/shared_bound_IS_ele[,2])
shared_bound_IS_ele = log2(rowMeans(shared_bound_IS_fas[,c(1,3)])/shared_bound_IS_fas[,2])
shared_bound_IS_fas
= c(shared_bound_IS_ele, shared_bound_IS_fas)
is_hs = c(shared_bound_IS_mandy, shared_bound_IS_sandraA)
is_pt = is_hs[is.finite(is_hs)]
is_hs = is_pt[is.finite(is_pt)]
is_pt
## -----
= rowMax(cbind(human_spe_bound_IS_ele,human_spe_bound_IS_fas))
human_spe_bound_human names(human_spe_bound_human) = names(human_spe_bound_IS_ele)
= rowMax(cbind(human_spe_bound_IS_mandy,human_spe_bound_IS_sandraA))
human_spe_bound_chimp names(human_spe_bound_chimp) = names(human_spe_bound_IS_mandy)
= human_spe_bound_human[match(names(human_spe_bound_chimp),names(human_spe_bound_human))]
human_spe_bound_human = human_spe_bound_human-human_spe_bound_chimp
human_spe_boundaries_evol
## -----
= rowMax(cbind(chimp_spe_bound_IS_ele,chimp_spe_bound_IS_fas))
chimp_spe_bound_human names(chimp_spe_bound_human) = names(chimp_spe_bound_IS_ele)
= rowMax(cbind(chimp_spe_bound_IS_mandy,chimp_spe_bound_IS_sandraA))
chimp_spe_bound_chimp names(chimp_spe_bound_chimp) = names(chimp_spe_bound_IS_mandy)
= chimp_spe_bound_chimp[match(names(chimp_spe_bound_human),names(chimp_spe_bound_chimp))]
chimp_spe_bound_chimp
= chimp_spe_bound_human-chimp_spe_bound_chimp
chimp_spe_boundaries_evol
boxplot( human_spe_boundaries_evol, chimp_spe_boundaries_evol,
outline=FALSE, col="white",border=c("black","red"),
tlim=c(-0.6,0.6),ylab="Insulation change Human/Chimp [log2]",
names=c("Human","Chimp"),xlab="Species specificity of boundary")
t.test(human_spe_boundaries_evol,chimp_spe_boundaries_evol)
##
## Welch Two Sample t-test
##
## data: human_spe_boundaries_evol and chimp_spe_boundaries_evol
## t = 6.6461, df = 274.65, p-value = 0.0000000001614
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.2357400 0.4341763
## sample estimates:
## mean of x mean of y
## 0.1432925 -0.1916657
names(gagr) = paste(chrom(gagr),names(gagr),sep="_")
= function( IS, GAGR ){
processIS = GAGR
res $binid=NULL
res$score = 0
res$score[match(rownames(IS),names(res))] = log2( rowMeans((0.001+IS[,c(1,3)]))/(0.001+IS[,2] ) )
resreturn(res) }
## --------
= InsulationScore( gagr[which(chrom(gagr)!="chrY")], ele, gagr, 5, 3, 10 )
genome_wide_IS_ele save(genome_wide_IS_ele,file=paste0(objects_directory,"genome_wide_IS_ele.RData"))
= processIS(genome_wide_IS_ele,gagr)
genome_wide_IS_ele_gr export.bedGraph( genome_wide_IS_ele_gr, con=paste0(outputs_directory,"genome_wide_IS_ele_gagr.bedGraph"))
= InsulationScore( gagr[which(chrom(gagr)!="chrY")], fa, gagr, 5, 3, 10 )
genome_wide_IS_fas save(genome_wide_IS_fas,file=paste0(objects_directory,"genome_wide_IS_fas.RData"))
= processIS(genome_wide_IS_fas,gagr)
genome_wide_IS_fas_gr export.bedGraph( genome_wide_IS_fas_gr, con=paste0(outputs_directory,"genome_wide_IS_fas_gr.bedGraph"))
= InsulationScore( gagr_pt[which(chrom(gagr_pt)!="chrY")], mandy, gagr_pt, 5, 3, 10 )
genome_wide_IS_mandy save(genome_wide_IS_mandy,file=paste0(objects_directory,"genome_wide_IS_mandy.RData"))
= processIS(genome_wide_IS_mandy,gagr_pt)
genome_wide_IS_mandy_gr export.bedGraph( genome_wide_IS_mandy_gr, con=paste0(outputs_directory,"genome_wide_IS_mandy_gr.bedGraph"))
= InsulationScore( gagr_pt[which(chrom(gagr_pt)!="chrY")], sa, gagr_pt, 5, 3, 10 )
genome_wide_IS_sandraA save(genome_wide_IS_sandraA,file=paste0(objects_directory,"genome_wide_IS_sandraA.RData"))
= processIS(genome_wide_IS_sandraA,gagr_pt)
genome_wide_IS_sandraA_gr export.bedGraph( genome_wide_IS_sandraA_gr, con=paste0(outputs_directory,"genome_wide_IS_sandraA_gr.bedGraph"))
Final plots
Display the insulation scores for all the bins, species specific as well as shared boundaries.
=import.bedGraph(paste0(outputs_directory,"genome_wide_IS_ele_gagr.bedGraph"))
genome_wide_IS_ele_gr=import.bedGraph(paste0(outputs_directory,"genome_wide_IS_fas_gr.bedGraph"))
genome_wide_IS_fas_gr=import.bedGraph(paste0(outputs_directory,"genome_wide_IS_sandraA_gr.bedGraph"))
genome_wide_IS_sandraA=import.bedGraph(paste0(outputs_directory,"genome_wide_IS_mandy_gr.bedGraph"))
genome_wide_IS_mandy_gr
= rowMeans(cbind(genome_wide_IS_ele_gr$score,genome_wide_IS_fas_gr$score))
genome_wide_IS_human = rowMeans(cbind(genome_wide_IS_sandraA$score,genome_wide_IS_mandy_gr$score))
genome_wide_IS_chimp
boxplot( genome_wide_IS_human, genome_wide_IS_chimp,
is_hs, human_spe_bound_human,human_spe_bound_chimp,
is_pt,chimp_spe_bound_chimp,outline=FALSE,
chimp_spe_bound_human, col="white",border=c("black","red","black","black","red","red","red","black"),
ylim=c(-1,1),ylab=expression("Insulation (log"[2]*")"),
names=c("GW Hs","GW Pt","Bound Hs","Hs-sp Hs","Hs-spe Pt","Bound Pt","Pt-spe Pt","Pt-spe Hs"),las=2)
axis(1,lwd=2,at=1:8,c("GW Hs","GW Pt","Bound Hs","Hs-sp Hs","Hs-spe Pt","Bound Pt","Pt-spe Pt","Pt-spe Hs"),las=2)
axis(2,lwd=2,las=2)
box(col="black",lwd=2)
abline(h=0,lwd=2,lty=2,col="gray")
t.test(human_spe_bound_human,human_spe_bound_chimp)
##
## Welch Two Sample t-test
##
## data: human_spe_bound_human and human_spe_bound_chimp
## t = 5.3102, df = 510.25, p-value = 0.0000001638
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.09027806 0.19630691
## sample estimates:
## mean of x mean of y
## 0.3546137 0.2113212
t.test(chimp_spe_bound_human,chimp_spe_bound_chimp)
##
## Welch Two Sample t-test
##
## data: chimp_spe_bound_human and chimp_spe_bound_chimp
## t = -3.7563, df = 315.76, p-value = 0.0002053
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.30303371 -0.09470476
## sample estimates:
## mean of x mean of y
## 0.3448980 0.5437673
= readBed_filterChroms(paste0(outputs_directory,'ChIP_Seq_H3K4me3_12-22_HomSap_i-Astro_ELE10-30_merged_hg38_peaks.narrowPeak'),
hs_me3 paste0("",c(1:22,'X')),10)
= readBed_filterChroms(paste0(outputs_directory,'ChIP_Seq_H3K27ac_12-22_HomSap_i-Astro_ELE10-30_merged_hg38_peaks.narrowPeak'),
hs_k27ac paste0("",c(1:22,'X')),10)
= readBed_filterChroms(paste0(outputs_directory,'ChIP_Seq_H3K4me3_12-22_PanTro_i-Astro_Sandra_Mandy4-6_merged_PanTro6_peaks.narrowPeak'),
pt_me3 chroms=paste0('chr',c(1,'2A','2B', 3:22,'X')),4)
= readBed_filterChroms(paste0(outputs_directory,'ChIP_Seq_H3K27ac_12-22_PanTro_i-Astro_Sandra_Mandy4-6_merged_PanTro6_peaks.narrowPeak'),
pt_k27ac chroms=paste0('chr',c(1,'2A','2B', 3:22,'X')),4)
= readBed_filterChroms(paste0(outputs_directory,'ChIP_Seq_H3K4me3_03-22_MacMul_i-Astro_WT_Becky_Rep_1_RheMac10_peaks.narrowPeak'),
mm_me3 chroms=paste0(c(1:22,'X')),4)
= readBed_filterChroms(paste0(outputs_directory,'ChIP_Seq_H3K27ac_03-22_MacMul_i-Astro_WT_Becky_Rep_1_RheMac10_peaks.narrowPeak'),
mm_k27ac chroms=paste0(c(1:22,'X')),4)
seqlevelsStyle(hs_k27ac) = "ucsc"
seqlevelsStyle(pt_k27ac) = "ucsc"
seqlevelsStyle(mm_k27ac) = "ucsc"
seqlevelsStyle(hs_me3) = "ucsc"
seqlevelsStyle(pt_me3) = "ucsc"
seqlevelsStyle(mm_me3) = "ucsc"
Load the files from the other vignettes
load(paste0(objects_directory,'tss_objects.RData'))
load(paste0(objects_directory,'DEseq2_RNA.RData'))
= import.bed(paste0(outputs_directory,"species_specific_boundaries.bed")) species_specific_boundaries
= readBed_filterChroms(paste0(outputs_directory,'ATAC_Seq_12-22_HomSap_i-Astro_ELE10-30_merged_hg38_peaks.narrowPeak'),
hs_atac paste0("",c(1:22,'X')),10)
= readBed_filterChroms(paste0(outputs_directory,'ATAC_Seq_12-22_Pantro_i-Astro_Sandra_Mandy4-6_merged_PanTro6_peaks.narrowPeak'),
pt_atac paste0("chr",c(1,"2A","2B", 3:22,'X')),10)
= readBed_filterChroms(paste0(outputs_directory,'ATAC_Seq_12-22_MacMul_i-Astro_Becky_merged_RheMac10_peaks.narrowPeak'),
mm_atac paste0("",c(1:22,'X')),10)
seqlevelsStyle(hs_atac) = 'ucsc'
seqlevelsStyle(mm_atac) = 'ucsc'
start(hs_atac) = start(hs_atac) + hs_atac$score
end(hs_atac) = start(hs_atac) + 1
start(pt_atac) = start(pt_atac) + pt_atac$score
end(pt_atac) = start(pt_atac) + 1
start(mm_atac) = start(mm_atac) + mm_atac$score
end(mm_atac) = start(mm_atac) + 1
export.bed( hs_atac, con=paste0(outputs_directory,'ATAC.Hs_clean_summit.narrowPeak'))
export.bed( pt_atac, con=paste0(outputs_directory,'ATAC.Pt_clean_summit.narrowPeak'))
export.bed( mm_atac, con=paste0(outputs_directory,'ATAC.Mm_clean_summit.narrowPeak'))
The generic liftOver command: $liftOver -minMatch=0.5 -bedPlus=6 -tab
cd ~/Documents/Tools/
## human to chimp
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Hs_clean_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/hg38ToPanTro6.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Hs_clean_peaks_on_PT6_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Hs_clean_peaks_on_PT6_summit.unmapped.file
## human to macaque
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Hs_clean_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/hg38ToRheMac10.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Hs_clean_peaks_on_RM10_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Hs_clean_peaks_on_RM10_summit.unmapped.file
## chimp to human
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Pt_clean_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/panTro6ToHg38.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Pt_clean_peaks_on_Hg38_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Pt_clean_peaks_on_Hg38_summit.unmapped.file
## macaque to human
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Mm_clean_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/rheMac10ToHg38.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Mm_clean_peaks_on_Hg38_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Mm_clean_peaks_on_Hg38_summit.unmapped.file
## chimp to macaque
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Pt_clean_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/panTro6ToRheMac10.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Pt_clean_peaks_on_RheMac10_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Pt_clean_peaks_on_RheMac10_summit.unmapped.file
## macaque to chimp
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Mm_clean_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/rheMac10ToPanTro6.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Mm_clean_peaks_on_PanTro6_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Mm_clean_peaks_on_PanTro6_summit.unmapped.file
We define a unique set of intervals of peaks found in at least one species and that are aligneable.
= readBed_filterChroms(paste0(liftOverPeaks,'ATAC.Hs_clean_peaks_on_PT6_summit.narrowPeak'),
hs_atac_mapped_in_chimp chroms=paste0('chr',c(1,'2A','2B', 3:22,'X')),4)
= readBed_filterChroms(paste0(liftOverPeaks,"ATAC.Hs_clean_peaks_on_RM10_summit.narrowPeak"),
hs_atac_mapped_in_rhesus chroms=paste0("chr",c(1:22,'X')),4)
= readBed_filterChroms(paste0(liftOverPeaks,'ATAC.Pt_clean_peaks_on_Hg38_summit.narrowPeak'),
chimp_mapped_in_humans chroms=paste0('chr',c(1:22,'X')),4)
= readBed_filterChroms(paste0(liftOverPeaks,'ATAC.Mm_clean_peaks_on_Hg38_summit.narrowPeak'),
macaque_mapped_in_humans chroms=paste0('chr',c(1:22,'X')),4)
= readBed_filterChroms(paste0(liftOverPeaks,'ATAC.Mm_clean_peaks_on_PanTro6_summit.narrowPeak'),
macaque_in_chimps chroms=paste0('chr',c(1,'2A','2B', 3:22,'X')),4)
= readBed_filterChroms(paste0(liftOverPeaks,'ATAC.Pt_clean_peaks_on_RheMac10_summit.narrowPeak'),
chimps_in_macaque chroms=paste0('chr',c(1:22,'X')),4)
= function( gro, finalSize ){
CleanAndResize return( GenomicRanges::resize(gro[ width(gro)==2 & start(gro)>500 ],finalSize,fix="center"))
}
= CleanAndResize( hs_atac_mapped_in_chimp, 500 )
hs_atac_mapped_in_chimp = CleanAndResize( hs_atac_mapped_in_rhesus, 500 )
hs_atac_mapped_in_rhesus = CleanAndResize( chimp_mapped_in_humans, 500 )
chimp_mapped_in_humans = CleanAndResize( macaque_mapped_in_humans, 500 )
macaque_mapped_in_humans = CleanAndResize( macaque_in_chimps, 500 )
macaque_in_chimps = CleanAndResize( chimps_in_macaque, 500 )
chimps_in_macaque
= CleanAndResize( hs_atac, 500 )
hs_atac = CleanAndResize( pt_atac, 500 )
pt_atac = CleanAndResize( mm_atac, 500 ) mm_atac
Now, we have all the lifted over combinations.
# human peaks aligned in all the species
= names(hs_atac_mapped_in_chimp)[names(hs_atac_mapped_in_chimp) %in% names(hs_atac_mapped_in_rhesus) ]
hs_pt_mm_liftover = hs_atac[ which(names(hs_atac) %in% hs_pt_mm_liftover ) ]
human_peaks_aligned_Pt_Mm_coordinates_hs
= human_peaks_aligned_Pt_Mm_coordinates_hs[-subjectHits(findOverlaps(human_peaks_aligned_Pt_Mm_coordinates_hs,drop.self=TRUE,drop.redundant=TRUE))]
human_peaks_aligned_Pt_Mm_coordinates_hs length(human_peaks_aligned_Pt_Mm_coordinates_hs)
## [1] 141484
= hs_atac_mapped_in_chimp[ match(names(human_peaks_aligned_Pt_Mm_coordinates_hs),names(hs_atac_mapped_in_chimp)) ]
human_peaks_aligned_Pt_Mm_coordinates_pt = hs_atac_mapped_in_rhesus[ match(names(human_peaks_aligned_Pt_Mm_coordinates_hs),names(hs_atac_mapped_in_rhesus)) ]
human_peaks_aligned_Pt_Mm_coordinates_mm all(names(human_peaks_aligned_Pt_Mm_coordinates_hs)==names(human_peaks_aligned_Pt_Mm_coordinates_pt))
## [1] TRUE
all(names(human_peaks_aligned_Pt_Mm_coordinates_hs)==names(human_peaks_aligned_Pt_Mm_coordinates_mm))
## [1] TRUE
# peaks found in chimp and macaque (aligned to the human genome) but not detected in human
= chimp_mapped_in_humans[ queryHits(findOverlaps(chimp_mapped_in_humans,macaque_mapped_in_humans))]
nhp_peaks = nhp_peaks[ which(names(nhp_peaks) %in% names(chimps_in_macaque))]
nhp_peaks = nhp_peaks[ -queryHits(findOverlaps(nhp_peaks,hs_atac))]
nhp_peaks_coordinates_hs = nhp_peaks_coordinates_hs[-subjectHits(findOverlaps(nhp_peaks_coordinates_hs,drop.self=TRUE,drop.redundant=TRUE))]
nhp_peaks_coordinates_hs = pt_atac[ match(names(nhp_peaks_coordinates_hs),names(pt_atac)) ]
nhp_peaks_coordinates_pt = chimps_in_macaque[ match(names(nhp_peaks_coordinates_pt),names(chimps_in_macaque)) ]
nhp_peaks_coordinates_mm all(names(nhp_peaks_coordinates_hs)==names(nhp_peaks_coordinates_mm))
## [1] TRUE
all(names(nhp_peaks_coordinates_hs)==names(nhp_peaks_coordinates_pt))
## [1] TRUE
# chimp peaks aligned both in humans and macaques but not detected as peaks in humans and macaques
= chimp_mapped_in_humans[ -queryHits(findOverlaps(chimp_mapped_in_humans,c(nhp_peaks_coordinates_hs, hs_atac))) ]
chimp_peaks = chimp_peaks[ which(names(chimp_peaks) %in% names(chimps_in_macaque)) ]
chimp_peaks = chimp_peaks[ which(names(chimp_peaks) %in% names(pt_atac)) ]
chimp_peaks = chimp_mapped_in_humans[ match( names(chimp_peaks), names(chimp_mapped_in_humans) )]
chimp_uniquely_peaks_coordinates_hs = chimp_uniquely_peaks_coordinates_hs[-subjectHits(findOverlaps(chimp_uniquely_peaks_coordinates_hs,
chimp_uniquely_peaks_coordinates_hs drop.self=TRUE,drop.redundant=TRUE))]
= pt_atac[ match( names(chimp_uniquely_peaks_coordinates_hs), names(pt_atac) )]
chimp_uniquely_peaks_coordinates_pt = chimps_in_macaque[ match( names(chimp_uniquely_peaks_coordinates_pt), names(chimps_in_macaque) )]
chimp_uniquely_peaks_coordinates_mm all(names(chimp_uniquely_peaks_coordinates_hs)==names(chimp_uniquely_peaks_coordinates_pt))
## [1] TRUE
all(names(chimp_uniquely_peaks_coordinates_hs)==names(chimp_uniquely_peaks_coordinates_mm))
## [1] TRUE
length(chimp_uniquely_peaks_coordinates_hs)
## [1] 31740
# macaque peaks aligned both in humans and chimps but not detected as peaks in humans and chimps
= macaque_mapped_in_humans[ -queryHits(findOverlaps(macaque_mapped_in_humans,c(nhp_peaks_coordinates_hs, hs_atac))) ]
macaque_peaks = macaque_peaks[ which(names(macaque_peaks) %in% names(macaque_in_chimps)) ]
macaque_peaks = macaque_peaks[ which(names(macaque_peaks) %in% names(mm_atac)) ]
macaque_peaks
= macaque_mapped_in_humans[ match( names(macaque_peaks), names(macaque_mapped_in_humans) )]
macaque_uniquely_peaks_coordinates_hs = macaque_uniquely_peaks_coordinates_hs[ -subjectHits(findOverlaps(macaque_uniquely_peaks_coordinates_hs,drop.self=TRUE,drop.redundant=TRUE))]
macaque_uniquely_peaks_coordinates_hs = macaque_in_chimps[ match( names(macaque_uniquely_peaks_coordinates_hs), names(macaque_in_chimps) )]
macaque_uniquely_peaks_coordinates_pt = mm_atac[ match( names(macaque_uniquely_peaks_coordinates_hs), names(mm_atac) )]
macaque_uniquely_peaks_coordinates_mm
all(names(macaque_uniquely_peaks_coordinates_hs)==names(macaque_uniquely_peaks_coordinates_pt))
## [1] TRUE
all(names(macaque_uniquely_peaks_coordinates_hs)==names(macaque_uniquely_peaks_coordinates_mm))
## [1] TRUE
length(macaque_uniquely_peaks_coordinates_hs)
## [1] 43319
### we pool all togehter and remove duplicated peaks --> 225,059
= c( human_peaks_aligned_Pt_Mm_coordinates_hs, nhp_peaks_coordinates_hs,
all_human_intervals
chimp_uniquely_peaks_coordinates_hs, macaque_uniquely_peaks_coordinates_hs )
= c( human_peaks_aligned_Pt_Mm_coordinates_pt, nhp_peaks_coordinates_pt,
all_chimp_intervals
chimp_uniquely_peaks_coordinates_pt, macaque_uniquely_peaks_coordinates_pt )
= c( human_peaks_aligned_Pt_Mm_coordinates_mm, nhp_peaks_coordinates_mm,
all_macaque_intervals
chimp_uniquely_peaks_coordinates_mm, macaque_uniquely_peaks_coordinates_mm )
all(names(all_human_intervals) == names(all_chimp_intervals))
## [1] TRUE
all(names(all_human_intervals) == names(all_macaque_intervals))
## [1] TRUE
= table(names(all_human_intervals))
duplicated_peak_names = names(duplicated_peak_names[duplicated_peak_names>1])
duplicated_peak_names
= all_human_intervals[which(! names(all_human_intervals) %in% duplicated_peak_names) ]
all_human_intervals = all_chimp_intervals[which(! names(all_chimp_intervals) %in% duplicated_peak_names) ]
all_chimp_intervals = all_macaque_intervals[which(! names(all_macaque_intervals) %in% duplicated_peak_names) ] all_macaque_intervals
= readBed_filterChroms(paste0(outputs_directory,'ATAC_Seq_12-22_HomSap_i-Astro_ELE10-30_merged_hg38_peaks.narrowPeak'),
hs_atac2 paste0("",c(1:22,'X')),10)
= readBed_filterChroms(paste0(outputs_directory,'ATAC_Seq_12-22_Pantro_i-Astro_Sandra_Mandy4-6_merged_PanTro6_peaks.narrowPeak'),
pt_atac2 paste0("chr",c(1,"2A","2B", 3:22,'X')),10)
= readBed_filterChroms(paste0(outputs_directory,'ATAC_Seq_12-22_MacMul_i-Astro_Becky_merged_RheMac10_peaks.narrowPeak'),
mm_atac2 paste0("",c(1:22,'X')),10)
seqlevelsStyle(hs_atac2) = 'ucsc'
seqlevelsStyle(mm_atac2) = 'ucsc'
start(hs_atac2) = start(hs_atac2) + hs_atac2$score
end(hs_atac2) = start(hs_atac2)
= GenomicRanges::resize(hs_atac2,500,fix="center")
hs_atac2
start(pt_atac2) = start(pt_atac2) + pt_atac2$score
end(pt_atac2) = start(pt_atac2)
= GenomicRanges::resize(pt_atac2,500,fix="center")
pt_atac2
start(mm_atac2) = start(mm_atac2) + mm_atac2$score
end(mm_atac2) = start(mm_atac2)
= GenomicRanges::resize(mm_atac2,500,fix="center")
mm_atac2
export.bed( hs_atac2, con=paste0(outputs_directory,'ATAC.Hs_clean_500_summit.narrowPeak'))
export.bed( pt_atac2, con=paste0(outputs_directory,'ATAC.Pt_clean_500_summit.narrowPeak'))
export.bed( mm_atac2, con=paste0(outputs_directory,'ATAC.Mm_clean_500_summit.narrowPeak'))
cd ~/Documents/Tools/
## human to chimp
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Hs_clean_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/hg38ToPanTro6.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Hs_clean_peaks_on_PT6_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Hs_clean_peaks_on_PT6_500_summit.unmapped.file
## human to macaque
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Hs_clean_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/hg38ToRheMac10.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Hs_clean_peaks_on_RM10_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Hs_clean_peaks_on_RM10_500_summit.unmapped.file
## chimp to human
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Pt_clean_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/panTro6ToHg38.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Pt_clean_peaks_on_Hg38_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Pt_clean_peaks_on_Hg38_500_summit.unmapped.file
## macaque to human
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Mm_clean_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/rheMac10ToHg38.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Mm_clean_peaks_on_Hg38_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Mm_clean_peaks_on_Hg38_500_summit.unmapped.file
## chimp to macaque
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Pt_clean_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/panTro6ToRheMac10.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Pt_clean_peaks_on_RheMac10_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Pt_clean_peaks_on_RheMac10_500_summit.unmapped.file
## macaque to chimp
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/ATAC.Mm_clean_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/rheMac10ToPanTro6.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Mm_clean_peaks_on_PanTro6_500_summit.narrowPeak /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/sequence_analysis/ATAC.Mm_clean_peaks_on_PanTro6_500_summit.unmapped.file
Read in the results to retrieve the peak names that we wish to use.
= readBed_filterChroms(paste0(liftOverPeaks,"ATAC.Hs_clean_peaks_on_PT6_500_summit.narrowPeak"),
peaks_hs_Pt paste0("chr",c(1,"2A","2B", 3:22,'X')),5 )
= readBed_filterChroms(paste0(liftOverPeaks,"ATAC.Hs_clean_peaks_on_RM10_500_summit.narrowPeak"),
peaks_hs_Mm paste0("chr",c(1:22,'X')),5 )
= names(peaks_hs_Pt)[names(peaks_hs_Pt) %in% names(peaks_hs_Mm) ]
peaks_hs
= readBed_filterChroms(paste0(liftOverPeaks,"ATAC.Pt_clean_peaks_on_Hg38_500_summit.narrowPeak"),
peaks_Pt_Hs paste0("chr",c(1:22,'X')),5 )
= readBed_filterChroms(paste0(liftOverPeaks,"ATAC.Pt_clean_peaks_on_RheMac10_500_summit.narrowPeak"),
peaks_Pt_Mm paste0("chr",c(1:22,'X')),5 )
= names(peaks_Pt_Hs)[names(peaks_Pt_Hs) %in% names(peaks_Pt_Mm) ]
peaks_pt
= readBed_filterChroms(paste0(liftOverPeaks,"ATAC.Mm_clean_peaks_on_Hg38_500_summit.narrowPeak"),
peaks_Mm_Hs paste0("chr",c(1:22,'X')),5 )
= readBed_filterChroms(paste0(liftOverPeaks,"ATAC.Mm_clean_peaks_on_PanTro6_500_summit.narrowPeak"),
peaks_Mm_Pt paste0("chr",c(1,"2A","2B", 3:22,'X')), 5 )
= names(peaks_Mm_Hs)[names(peaks_Mm_Hs) %in% names(peaks_Mm_Pt) ]
peaks_mm
= unique( c( peaks_hs, peaks_pt, peaks_mm)) peaks
The final ranges
length(all_human_intervals)==length(all_chimp_intervals)
## [1] TRUE
length(all_human_intervals)==length(all_macaque_intervals)
## [1] TRUE
length(all_macaque_intervals) # 225,059
## [1] 225059
= all_human_intervals[which(names(all_human_intervals) %in% peaks)]
all_human_intervals = all_chimp_intervals[which(names(all_chimp_intervals) %in% peaks)]
all_chimp_intervals = all_macaque_intervals[which(names(all_macaque_intervals) %in% peaks)]
all_macaque_intervals length(all_human_intervals)==length(all_chimp_intervals)
## [1] TRUE
length(all_human_intervals)==length(all_macaque_intervals)
## [1] TRUE
length(all_macaque_intervals) # 224,411
## [1] 224411
all(names(all_human_intervals)==names(all_chimp_intervals))
## [1] TRUE
all(names(all_human_intervals)==names(all_macaque_intervals))
## [1] TRUE
export.gff( all_human_intervals,
con=paste0(outputs_directory,"hs_atac_for_Deseq2.gtf" ) )
export.gff( all_chimp_intervals,
con=paste0(outputs_directory,"pt_atac_for_Deseq2.gtf" ) )
export.gff( all_macaque_intervals,
con=paste0(outputs_directory,"mm_atac_for_Deseq2.gtf" ) )
writeLines( paste0( seqlevels(all_human_intervals), ",",
gsub("chr",'',seqlevels(all_human_intervals)) ),
paste0(outputs_directory,'hs_atac_for_Deseq2.txt') )
writeLines( paste0( seqlevels(all_chimp_intervals), ",",
gsub("chr",'',seqlevels(all_chimp_intervals)) ),
paste0(outputs_directory,'pt_atac_for_Deseq2.txt') )
writeLines( paste0( seqlevels(all_macaque_intervals), ",",
gsub("chr",'',seqlevels(all_macaque_intervals)) ),
paste0(outputs_directory,'mm_atac_for_Deseq2.txt') )
setwd('/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/bam_files/')
## -------------------------
= featureCounts( 'ATAC_Seq_12-21_HomSap_i-Astro_WT_ELE10_merged_hg38.bam',
ele10 annot.ext = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/hs_atac_for_Deseq2.gtf',
isGTFAnnotationFile = TRUE,
chrAliases = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/hs_atac_for_Deseq2.txt' ,
GTF.featureType = 'sequence_feature', GTF.attrType = 'ID', isPairedEnd=TRUE)
= featureCounts( 'ATAC_Seq_05-22_HomSap_i-Astro_WT_ELE30_2_Rep_1_hg38.bam',
ele30 annot.ext = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/hs_atac_for_Deseq2.gtf',
isGTFAnnotationFile = TRUE,
chrAliases = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/hs_atac_for_Deseq2.txt' ,
GTF.featureType = 'sequence_feature', GTF.attrType = 'ID', isPairedEnd=TRUE)
## -------------------------
= featureCounts( 'ATAC_Seq_12-22_Pantro_i-Astro_Sandra_merged_PanTro6.bam',
sandraa annot.ext = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/pt_atac_for_Deseq2.gtf',
isGTFAnnotationFile = TRUE,
chrAliases = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/pt_atac_for_Deseq2.txt' ,
GTF.featureType = 'sequence_feature', GTF.attrType = 'ID', isPairedEnd=TRUE)
= featureCounts( 'ATAC_Seq_05-22_PanTro_i-Astro_WT_Mandy4_Rep_1_PanTro6.bam',
Mandy04 annot.ext = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/pt_atac_for_Deseq2.gtf',
isGTFAnnotationFile = TRUE,
chrAliases = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/pt_atac_for_Deseq2.txt' ,
GTF.featureType = 'sequence_feature', GTF.attrType = 'ID', isPairedEnd=TRUE)
= featureCounts( 'ATAC_Seq_05-22_PanTro_i-Astro_WT_Mandy6_Rep_1_PanTro6.bam',
Mandy06 annot.ext = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/pt_atac_for_Deseq2.gtf',
isGTFAnnotationFile = TRUE,
chrAliases = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/pt_atac_for_Deseq2.txt' ,
GTF.featureType = 'sequence_feature', GTF.attrType = 'ID', isPairedEnd=TRUE)
## -------------------------
= featureCounts( 'ATAC_Seq_12-22_MacMul_i-Astro_Becky_merged_RheMac10.bam',
becky annot.ext = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/mm_atac_for_Deseq2.gtf',
isGTFAnnotationFile = TRUE,
chrAliases = '/Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/mm_atac_for_Deseq2.txt' ,
GTF.featureType = 'sequence_feature', GTF.attrType = 'ID', isPairedEnd=TRUE)
## -------------------------
save( ele10, ele30, sandraa, Mandy04, Mandy06, becky,
file=paste0(outputs_directory,'counts_ATAC_refined.RData' ) )
## -------------------------
all( ele10$annotation$GeneID == ele30$annotation$GeneID )
all( names(all_human_intervals)== ele10$annotation$GeneID)
all(start(all_human_intervals)==ele10$annotation$Start)
all( names(all_human_intervals)== becky$annotation$GeneID)
= data.frame( ELE10 = ele10$counts[,1],
ATAC_count ELE30 = ele30$counts[,1],
SandraA = sandraa$counts[,1],
Mandy04 = Mandy04$counts[,1],
Mandy06 = Mandy06$counts[,1],
Becky = becky$counts[,1] )
save( ATAC_count,
file = paste0(outputs_directory,'ATAC_count.RData' ) )
|| Paired-end : yes ||
|| Count read pairs : yes ||
|| Annotation : hs_atac_for_Deseq2.gtf (GTF) ||
|| Dir for temp files : . ||
|| Chromosome alias file : hs_atac_for_Deseq2.txt ||
|| Threads : 1 ||
|| Level : meta-feature level ||
|| Multimapping reads : counted ||
|| Multi-overlapping reads : not counted ||
|| Min overlapping bases : 1
Here for the quantitative analysis we will consider only peaks that have at least 50% liftover between all the species
load( paste0(outputs_directory,'ATAC_count.RData') )
= data.frame(species=c('HS','HS','PT','PT','PT','MM'),
metadata human_or_not = c("HS","HS","NHP","NHP","NHP","NHP"),
assay='ATAC',
row.names=colnames(ATAC_count))
<- DESeqDataSetFromMatrix( countData=ATAC_count,
data colData = metadata,
design = ~ 0 + species )
$species = relevel(data$species, "HS")
data
= DESeq(data,fitType = 'local') data
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
= vst(data, blind=TRUE)
vst_data = rlog(data, blind=TRUE) log_data
PCA Plot for all Samples.
<- c('HS' = '#000000', 'PT' = '#FF3300', 'MM' = '#0033FF')
species.colors = order(rowVars(counts(data, normalized = TRUE)), decreasing = TRUE)
ord = prcomp(t(counts(data, normalized = TRUE)[ord,]))
pca
plotPCA(log_data, intgroup="species") +
geom_label_repel(aes(label = name),fill = alpha(c("white"),0.2),
show.legend = FALSE, size = 3.25, label.size=0.5,
fontface = 'bold') +
scale_color_manual(values = species.colors) + theme_bw() + labs(color = "Species") +
theme(aspect.ratio = 1, axis.text = element_text(face = 'bold', size = 11),
axis.title = element_text(face = 'bold', size = 13),
legend.text = element_text(face = 'bold'), legend.title = element_text(face = 'bold', size = 12)) +
ggtitle("PCA Plot")
Next, we will identify human specific ATAC peaks in comparison with chimpanzee and macaque.
= all_human_intervals[ which(names(all_human_intervals) %in% rownames(ATAC_count))]
hs_atac_for_Deseq2 score(hs_atac_for_Deseq2) = 1
export.bed( hs_atac_for_Deseq2,
con=paste0(outputs_directory,"hs_atac_for_Deseq2.bed" ))
save(hs_atac_for_Deseq2,
file=paste0(objects_directory,"hs_atac_for_Deseq2.RData") )
= all_chimp_intervals[ which(names(all_chimp_intervals) %in% names(hs_atac_for_Deseq2))]
pt_atac_for_Deseq2 score(pt_atac_for_Deseq2) = 1
export.bed( pt_atac_for_Deseq2,
con=paste0(outputs_directory,"pt_atac_for_Deseq2.bed" ))
save(pt_atac_for_Deseq2,
file=paste0(objects_directory,"pt_atac_for_Deseq2.RData") )
= all_macaque_intervals[ which(names(all_macaque_intervals) %in% names(hs_atac_for_Deseq2))]
mm_atac_for_Deseq2 score(mm_atac_for_Deseq2) = 1
export.bed( mm_atac_for_Deseq2,
con=paste0(outputs_directory,"mm_atac_for_Deseq2.bed" ))
save(mm_atac_for_Deseq2,
file=paste0(objects_directory,"mm_atac_for_Deseq2.RData") )
Individual comparisons and a table of these
= DESeqDataSetFromMatrix( countData = ATAC_count[ ,colnames(ATAC_count) %in% rownames(metadata[metadata$species %in% c("HS","PT"),])],
HS_PT colData = metadata[metadata$species %in% c("HS","PT"),],
design = ~ 0 + species )
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
= DESeq(HS_PT,fitType = 'local') HS_PT
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
$species = relevel(HS_PT$species, "HS")
HS_PT= results(HS_PT, contrast = c("species","HS","PT"))
res_HS_PT # all(rownames(res_HS_PT)==names(hs_atac_for_Deseq2))
= DESeqDataSetFromMatrix( countData=ATAC_count[,metadata$species %in% c("HS","MM")],
HS_MM colData = metadata[metadata$species %in% c("HS","MM"),],
design = ~ 0 + species )
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
= DESeq(HS_MM,fitType = 'local') HS_MM
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
$species = relevel(HS_MM$species, "HS")
HS_MM= results(HS_MM, contrast = c("species","HS","MM"))
res_HS_MM # all(rownames(res_HS_PT)==rownames(res_HS_MM))
= data.frame( hs_pt_LFC = res_HS_PT$log2FoldChange,
res_HS_NHP hs_pt_Padj = res_HS_PT$padj,
hs_mm_LFC = res_HS_MM$log2FoldChange,
hs_mm_Padj = res_HS_MM$padj,
row.names = rownames(res_HS_MM),
chrom_hs = chrom(hs_atac_for_Deseq2),
start= start(hs_atac_for_Deseq2),
end = end(hs_atac_for_Deseq2) )
$score=0
hs_atac_for_Deseq2$padj_HSPT = res_HS_PT$padj
hs_atac_for_Deseq2$padj_HSMM = res_HS_MM$padj
hs_atac_for_Deseq2export.gff(hs_atac_for_Deseq2,con=paste0(outputs_directory,"hs_atac_for_Deseq2.gtf"))
table(rowSums(res_HS_NHP[,c("hs_pt_Padj","hs_mm_Padj")]<0.1))
##
## 0 1 2
## 122226 71847 17203
table(rowSums(all_Deseqs[,c("pvalue.x","padj.y")]<0.1))
##
## 0 1 2
## 13083 11086 5137
table( res_HS_NHP$hs_pt_Padj<0.1 & res_HS_NHP$hs_pt_LFC<0 )
##
## FALSE TRUE
## 205878 15321
table( res_HS_NHP$hs_pt_Padj<0.1 & res_HS_NHP$hs_pt_LFC>0 )
##
## FALSE TRUE
## 202928 20257
table( res_HS_NHP$hs_mm_Padj<0.1 & res_HS_NHP$hs_mm_LFC<0 )
##
## FALSE TRUE
## 184934 35243
table( res_HS_NHP$hs_mm_Padj<0.1 & res_HS_NHP$hs_mm_LFC>0 )
##
## FALSE TRUE
## 175875 43810
## export locations of the altered peaks
= hs_atac_for_Deseq2[ which(!is.na(res_HS_NHP$hs_pt_Padj) & ! is.na(res_HS_NHP$hs_mm_Padj)) ]
hs_atac_for_Deseq2_Hs_vs_NHP_filt = res_HS_NHP[ which(!is.na(res_HS_NHP$hs_pt_Padj) & ! is.na(res_HS_NHP$hs_mm_Padj)), ]
res_HS_NHP_filt all(names(hs_atac_for_Deseq2_Hs_vs_NHP_filt)==rownames(res_HS_NHP_filt))
## [1] TRUE
sum( rowSums(cbind(res_HS_NHP_filt$hs_pt_Padj<0.1,res_HS_NHP_filt$hs_mm_Padj<0.1 ))>0 )
## [1] 89050
## -----------------------------
=0.1
pvalthr= hs_atac_for_Deseq2_Hs_vs_NHP_filt[ which( res_HS_NHP_filt$hs_pt_LFC>0 & res_HS_NHP_filt$hs_pt_Padj<pvalthr & res_HS_NHP_filt$hs_mm_LFC>0 & res_HS_NHP_filt$hs_mm_Padj<pvalthr) ]
gained_ATAC_gr = res_HS_NHP_filt[ which( res_HS_NHP_filt$hs_pt_LFC>0 & res_HS_NHP_filt$hs_pt_Padjpvalthr & res_HS_NHP_filt$hs_mm_LFC>0 & res_HS_NHP_filt$hs_mm_Padj<pvalthr), ]
gained_ATAC $score=0
gained_ATAC_grexport.bed(gained_ATAC_gr,con=paste0(outputs_directory,"gained_ATAC_gr.bed"))
= hs_atac_for_Deseq2_Hs_vs_NHP_filt[ which( res_HS_NHP_filt$hs_pt_LFC<0 & res_HS_NHP_filt$hs_pt_Padj<pvalthr & res_HS_NHP_filt$hs_mm_LFC<0 & res_HS_NHP_filt$hs_mm_Padj<pvalthr) ]
lost_ATAC_gr = res_HS_NHP_filt[ which( res_HS_NHP_filt$hs_pt_LFC<0 & res_HS_NHP_filt$hs_pt_Padj<pvalthr & res_HS_NHP_filt$hs_mm_LFC<0 & res_HS_NHP_filt$hs_mm_Padj<pvalthr), ]
lost_ATAC $score=0
lost_ATAC_grexport.bed(lost_ATAC_gr,con=paste0(outputs_directory,"lost_ATAC_gr.bed"))
par(mar=c(5,5,5,5),mfrow=c(1,1))#, cex=1.0, cex.main=1.4, cex.axis=1.4, cex.lab=1.4)
<- as.data.frame(res_HS_NHP)
topT = rownames(topT[topT$hs_pt_Padj <=pvalthr & topT$hs_mm_Padj<=pvalthr,])
topTsig length(topTsig)
## [1] 25677
with(topT, plot(hs_pt_LFC, -log10(hs_pt_Padj),
pch=20, cex=1.0,
xlab=bquote(~Log[2]~fold~change),
ylab=bquote(~-log[10]~Q~value),
xlim=c(-10,10),
ylim=c(0,20)),col="gray60")
with(subset(topT, rownames(topT) %in% topTsig),
points(hs_pt_LFC, -log10(hs_pt_Padj), pch=20, col="steelblue", cex=0.5))
axis(2,lwd=2)
axis(1,lwd=2)
box(col="black",lwd=2)
First we will consider only regions that do not overlap promoters nor H3K4me3 peaks. Then, we identify the human specific ATAC-seq peaks and remove the peaks that overlap H3K27ac in NHPs. Create a big annotation table for the DORegions between Hs, Pt and Mm.
= gained_ATAC_gr[ - queryHits(findOverlaps(gained_ATAC_gr,c(hs_me3,promoters_tss_gr)))]
human_spe_enhancers = gained_ATAC_gr[ queryHits(findOverlaps(gained_ATAC_gr, promoters_tss_gr[queryHits(findOverlaps(promoters_tss_gr,hs_me3))]))]
human_spe_active_promoters = gained_ATAC_gr[ queryHits(findOverlaps(gained_ATAC_gr, promoters_tss_gr[-queryHits(findOverlaps(promoters_tss_gr,hs_me3))]))]
human_spe_inactive_promoters = names(pt_atac_for_Deseq2[-queryHits(findOverlaps(pt_atac_for_Deseq2,pt_k27ac))])
pt_atac_for_Deseq2_not_H3K27ac = names(mm_atac_for_Deseq2[-queryHits(findOverlaps(mm_atac_for_Deseq2,mm_k27ac))])
mm_atac_for_Deseq2_not_H3K27ac
## HUMAN SPECIFIC ENHANCERS
= human_spe_enhancers[which(names(human_spe_enhancers) %in% pt_atac_for_Deseq2_not_H3K27ac[pt_atac_for_Deseq2_not_H3K27ac %in% mm_atac_for_Deseq2_not_H3K27ac])]
human_spe_enhancers = human_spe_enhancers[queryHits(findOverlaps(human_spe_enhancers,hs_k27ac))]
human_spe_enhancers_with_K27_peak = human_spe_enhancers[-queryHits(findOverlaps(human_spe_enhancers,hs_k27ac))]
human_spe_enhancers_without_K27_peak
length(human_spe_enhancers)
## [1] 9356
= names(gained_ATAC_gr)
gained_atac_peaks_hs = pt_atac_for_Deseq2[which(names(pt_atac_for_Deseq2) %in% gained_atac_peaks_hs)]
gained_ATAC_gr_pt = mm_atac_for_Deseq2[which(names(mm_atac_for_Deseq2) %in% gained_atac_peaks_hs)]
gained_ATAC_gr_mm
all(names(gained_atac_peaks_hs) == names(gained_ATAC_gr_pt))
## [1] TRUE
all(names(gained_atac_peaks_hs) == names(gained_ATAC_gr_mm))
## [1] TRUE
= gained_atac_peaks_hs[-queryHits(findOverlaps(gained_ATAC_gr,promoters_tss_gr))]
intergenic_gained
= data.frame(atac_Hs = countOverlaps(gained_ATAC_gr,hs_atac),
gained_ATAC_functional_annotation atac_Pt = countOverlaps(gained_ATAC_gr_pt,pt_atac),
atac_Mm = countOverlaps(gained_ATAC_gr_mm,mm_atac),
me3_Hs = countOverlaps(gained_ATAC_gr,hs_me3),
me3_Pt = countOverlaps(gained_ATAC_gr_pt,pt_me3),
me3_Mm = countOverlaps(gained_ATAC_gr_mm,mm_me3),
k27_Hs = countOverlaps(gained_ATAC_gr,hs_k27ac),
k27_Pt = countOverlaps(gained_ATAC_gr_pt,pt_k27ac),
k27_Mm = countOverlaps(gained_ATAC_gr_mm,mm_k27ac),
promoter = countOverlaps(gained_ATAC_gr,promoters_tss_gr),
is_intergenic = gained_atac_peaks_hs %in% intergenic_gained,
row.names = gained_atac_peaks_hs )
colSums(gained_ATAC_functional_annotation>0)
## atac_Hs atac_Pt atac_Mm me3_Hs me3_Pt
## 13108 4253 1792 411 329
## me3_Mm k27_Hs k27_Pt k27_Mm promoter
## 301 7382 2888 1424 908
## is_intergenic
## 12268
par(mfrow=c(1,1))
=gained_ATAC_functional_annotation>0
x=x[order(x[,1],x[,2],x[,3],x[,4],x[,5],x[,6],x[,7],x[,8],x[,9],x[,10],x[,11]),]
x2]>0,2] = 2
x[x[,3]>0,3] = 3
x[x[,4]>0,4] = 4
x[x[,5]>0,5] = 5
x[x[,6]>0,6] = 6
x[x[,7]>0,7] = 7
x[x[,8]>0,8] = 8
x[x[,9]>0,9] = 9
x[x[,10]>0,10] = 10
x[x[,11]>0,11] = 11
x[x[,
par(mfrow=c(1,1),mar=c(7,1,1,1))
image(t(x),
col=c("white","gray80","gray80","gray80",
'forestgreen','forestgreen','forestgreen',
"coral3","coral3","coral3",
"black","blue4"),
axes=FALSE,
las=2)
box(col="black",lwd=2)
axis(1,at=seq(0,1,length=11),
c("atac_human","atac_chimp","atac_macaque",
"me3_human","me3_chimp","me3_macaque",
"k27_human","k27_chimp","k27_macaque",
"promoter","intergenic"),
las=2)
abline(v=seq(0,1,length.out=11)[c(3,6,9,10)]+0.05,lwd=2)
= gained_ATAC_functional_annotation[gained_ATAC_functional_annotation$me3_Pt==0 & gained_ATAC_functional_annotation$me3_Mm==0 & gained_ATAC_functional_annotation$me3_Hs>0 & gained_ATAC_functional_annotation$atac_Hs>0 & gained_ATAC_functional_annotation$atac_Pt==0 & gained_ATAC_functional_annotation$atac_Mm==0 & gained_ATAC_functional_annotation$k27_Mm==0 & gained_ATAC_functional_annotation$promoter>0 & gained_ATAC_functional_annotation$me3_Hs>0 & gained_ATAC_functional_annotation$atac_Pt==0 & gained_ATAC_functional_annotation$promoter>0,]
gained_promoters
= gained_ATAC_gr[which(names(gained_ATAC_gr) %in% rownames(gained_promoters))] gained_promoters_gr
= names(lost_ATAC_gr)
lost_atac_peaks_hs = pt_atac_for_Deseq2[which(names(pt_atac_for_Deseq2) %in% lost_atac_peaks_hs)]
lost_ATAC_gr_pt = mm_atac_for_Deseq2[which(names(mm_atac_for_Deseq2) %in% lost_atac_peaks_hs)]
lost_ATAC_gr_mm
all(names(lost_atac_peaks_hs) == names(lost_ATAC_gr_pt))
## [1] TRUE
all(names(lost_atac_peaks_hs) == names(lost_ATAC_gr_mm))
## [1] TRUE
= lost_atac_peaks_hs[-queryHits(findOverlaps(lost_ATAC_gr,promoters_tss_gr))]
intergenic_lost
= data.frame(atac_Hs = countOverlaps(lost_ATAC_gr,hs_atac),
lost_ATAC_functional_annotation atac_Pt = countOverlaps(lost_ATAC_gr_pt,pt_atac),
atac_Mm = countOverlaps(lost_ATAC_gr_mm,mm_atac),
me3_Hs = countOverlaps(lost_ATAC_gr,hs_me3),
me3_Pt = countOverlaps(lost_ATAC_gr_pt,pt_me3),
me3_Mm = countOverlaps(lost_ATAC_gr_mm,mm_me3),
k27_Hs = countOverlaps(lost_ATAC_gr,hs_k27ac),
k27_Pt = countOverlaps(lost_ATAC_gr_pt,pt_k27ac),
k27_Mm = countOverlaps(lost_ATAC_gr_mm,mm_k27ac),
promoter = countOverlaps(lost_ATAC_gr,promoters_tss_gr),
is_intergenic = lost_atac_peaks_hs %in% intergenic_lost,
row.names = lost_atac_peaks_hs )
colSums(lost_ATAC_functional_annotation>0)
## atac_Hs atac_Pt atac_Mm me3_Hs me3_Pt
## 479 2441 3172 145 446
## me3_Mm k27_Hs k27_Pt k27_Mm promoter
## 505 325 1538 1234 540
## is_intergenic
## 2780
We observe a 3 fold over representation of lost than gained promoters in evolution
= lost_ATAC_functional_annotation[lost_ATAC_functional_annotation$promoter>0 & rowSums( lost_ATAC_functional_annotation[,c("me3_Pt","me3_Mm","k27_Pt","k27_Mm")]>0)==4,]
lost_promoters = gained_ATAC_functional_annotation[gained_ATAC_functional_annotation$me3_Hs>0 & gained_ATAC_functional_annotation$promoter>0 & rowSums(gained_ATAC_functional_annotation[,c("me3_Pt","me3_Mm","k27_Pt","k27_Mm")]==0)==4,]
gained_promoters
= lost_ATAC_functional_annotation[lost_ATAC_functional_annotation$promoter>0 ,]
lost_promoters = gained_ATAC_functional_annotation[gained_ATAC_functional_annotation$promoter>0 ,]
gained_promoters
= unique( promoters_tss_gr[queryHits(findOverlaps(promoters_tss_gr, lost_ATAC_gr[which(names(lost_ATAC_gr) %in% rownames(lost_promoters))] ))]$gene_id )
lost_promoters_ensid = unique( promoters_tss_gr[queryHits(findOverlaps(promoters_tss_gr, gained_ATAC_gr[which(names(gained_ATAC_gr) %in% rownames(gained_promoters) )]))]$gene_id )
gained_promoters_ensid
length(gained_promoters_ensid)
## [1] 951
length(lost_promoters_ensid)
## [1] 613
any(lost_promoters_ensid %in% gained_promoters_ensid)
## [1] TRUE
=matrix(c(nrow(lost_promoters),
mnrow(gained_promoters),
length(lost_ATAC_gr),
length(gained_ATAC_gr)),2,2)
fisher.test(m)
##
## Fisher's Exact Test for Count Data
##
## data: m
## p-value < 0.00000000000000022
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 2.103521 2.646321
## sample estimates:
## odds ratio
## 2.360091
save(gained_ATAC_gr,file=paste0(objects_directory,"gained_ATAC_gr.RData"))
save(lost_ATAC_gr,file=paste0(objects_directory,"lost_ATAC_gr.RData"))
## ----------------
= lost_ATAC_functional_annotation[ lost_ATAC_functional_annotation$is_intergenic>0 & lost_ATAC_functional_annotation$promoter==0 & rowSums(lost_ATAC_functional_annotation[,c("me3_Hs","me3_Pt","me3_Mm")])==0 & lost_ATAC_functional_annotation$k27_Hs==0 & lost_ATAC_functional_annotation$k27_Pt>0 & lost_ATAC_functional_annotation$k27_Mm>0,]
geniune_lost_active_enhancers
= lost_ATAC_functional_annotation[ lost_ATAC_functional_annotation$is_intergenic>0 & lost_ATAC_functional_annotation$promoter==0 & rowSums(lost_ATAC_functional_annotation[,c("me3_Hs","me3_Pt","me3_Mm")])==0 & lost_ATAC_functional_annotation$k27_Hs==0 & lost_ATAC_functional_annotation$k27_Pt==0 & lost_ATAC_functional_annotation$k27_Mm==0,]
geniune_lost_poised_enhancers
= lost_ATAC_functional_annotation[ lost_ATAC_functional_annotation$is_intergenic>0 & lost_ATAC_functional_annotation$promoter==0 & rowSums(lost_ATAC_functional_annotation[,c("me3_Hs","me3_Pt","me3_Mm")])==0 & lost_ATAC_functional_annotation$k27_Hs==0 ,]
geniune_lost_enhancers
= lost_ATAC_gr[which(names(lost_ATAC_gr) %in% rownames(geniune_lost_enhancers))]
genuine_lost_enhancers_gr = lost_ATAC_gr[which(names(lost_ATAC_gr) %in% rownames(geniune_lost_poised_enhancers))]
geniune_lost_poised_enhancers_gr = lost_ATAC_gr[which(names(lost_ATAC_gr) %in% rownames(geniune_lost_active_enhancers))]
geniune_lost_active_enhancers_gr save( genuine_lost_enhancers_gr, file=paste0(objects_directory,"genuine_lost_enhancers_gr.RData"))
## ----------------
= gained_ATAC_functional_annotation[ gained_ATAC_functional_annotation$is_intergenic>0 & gained_ATAC_functional_annotation$promoter==0 & rowSums(gained_ATAC_functional_annotation[,c("me3_Hs","me3_Pt","me3_Mm")])==0 & gained_ATAC_functional_annotation$k27_Pt==0 & gained_ATAC_functional_annotation$k27_Mm==0,]
genuine_gained_enhancers
= gained_ATAC_functional_annotation[ gained_ATAC_functional_annotation$is_intergenic>0 & gained_ATAC_functional_annotation$promoter==0 & rowSums(gained_ATAC_functional_annotation[,c("me3_Hs","me3_Pt","me3_Mm")])==0 & gained_ATAC_functional_annotation$k27_Pt==0 & gained_ATAC_functional_annotation$k27_Mm==0 & gained_ATAC_functional_annotation$k27_Hs>0,]
genuine_gained_active_enhancers
= gained_ATAC_functional_annotation[ gained_ATAC_functional_annotation$is_intergenic>0 & gained_ATAC_functional_annotation$promoter==0 & rowSums(gained_ATAC_functional_annotation[,c("me3_Hs","me3_Pt","me3_Mm")])==0 & gained_ATAC_functional_annotation$k27_Pt==0 & gained_ATAC_functional_annotation$k27_Mm==0 & gained_ATAC_functional_annotation$k27_Hs==0,]
genuine_gained_poised_enhancers
= gained_ATAC_gr[which(names(gained_ATAC_gr) %in% rownames(genuine_gained_enhancers))]
genuine_gained_enhancers_gr = gained_ATAC_gr[which(names(gained_ATAC_gr) %in% rownames(genuine_gained_active_enhancers))]
genuine_gained_active_enhancers_gr = gained_ATAC_gr[which(names(gained_ATAC_gr) %in% rownames(genuine_gained_poised_enhancers))]
genuine_gained_poised_enhancers_gr
seqlevelsStyle(genuine_gained_enhancers_gr) = "ncbi"
export.bed( genuine_gained_enhancers_gr, con=paste0(outputs_directory,"genuine_gained_enhancers.bed"))
seqlevelsStyle(genuine_gained_enhancers_gr) = "ucsc"
export.bed( genuine_gained_enhancers_gr, con=paste0(outputs_directory,"genuine_gained_enhancers_ucsc.bed"))
= HS_UP_Genes$ensembl_id
up_set = HS_DN_Genes$ensembl_id
dn_set
= promoters_filtered_gr[ which( promoters_filtered_gr$gene_id %in% up_set ) ]
promoters_HITS_UP = promoters_filtered_gr[ which( promoters_filtered_gr$gene_id %in% dn_set ) ] promoters_HITS_DN
length(genuine_gained_enhancers_gr)
## [1] 9343
length(genuine_lost_enhancers_gr)
## [1] 2351
= GenomicRanges::resize(promoters_HITS_UP,1000000,fix="center")
promoters_HITS_UP_500 sum(countOverlaps(promoters_HITS_UP_500,genuine_gained_enhancers_gr)>0)
## [1] 586
sum(countOverlaps(promoters_HITS_UP_500,genuine_gained_active_enhancers_gr)>0)
## [1] 460
## ----------------
= promoters_filtered_gr[ which( promoters_filtered_gr$gene_id %in% up_set ) ]
promoters_HITS_UP = promoters_filtered_gr[ which( promoters_filtered_gr$gene_id %in% dn_set ) ]
promoters_HITS_DN
save( promoters_HITS_UP, promoters_HITS_DN,
file=paste0(objects_directory,"promoters_up_down.RData"))
= promoters_filtered_gr[ which(promoters_filtered_gr$gene_id %in% all_Deseqs[all_Deseqs$log2FoldChange.x>0 & all_Deseqs$padj.x<0.01,"Row.names"] ) ]
prom_up_hs_pt = promoters_filtered_gr[ which(promoters_filtered_gr$gene_id %in% all_Deseqs[all_Deseqs$log2FoldChange.x<0 & all_Deseqs$padj.x<0.01,"Row.names"] ) ]
prom_dn_hs_pt
= promoters_filtered_gr[ which(promoters_filtered_gr$gene_id %in% all_Deseqs[all_Deseqs$log2FoldChange.y>0 & all_Deseqs$padj.y<0.01,"Row.names"] ) ]
prom_up_hs_mm = promoters_filtered_gr[ which(promoters_filtered_gr$gene_id %in% all_Deseqs[all_Deseqs$log2FoldChange.y<0 & all_Deseqs$padj.y<0.01,"Row.names"] ) ]
prom_dn_hs_mm
## ----------------
= genuine_gained_enhancers_gr[ which(elementMetadata(distanceToNearest(genuine_gained_enhancers_gr,promoters_HITS_UP))[,1]<500000)]
genuine_gained_enhancers_that_do_something = genuine_lost_enhancers_gr[ which(elementMetadata(distanceToNearest(genuine_lost_enhancers_gr,promoters_HITS_DN))[,1]<500000)]
genuine_lost_enhancers_that_do_something = genuine_lost_enhancers_gr[ which(elementMetadata(distanceToNearest(genuine_lost_enhancers_gr,c( prom_up_hs_pt,prom_dn_hs_pt,prom_up_hs_mm,prom_dn_hs_mm,promoters_HITS_UP,promoters_HITS_DN )))[,1]>500000)]
genuine_lost_enhancers_that_do_nothing
## verify on elements identified without TAD filtering
= genuine_gained_enhancers_gr[which(elementMetadata(distanceToNearest(genuine_gained_enhancers_gr, promoters_HITS_UP ))[,1]<500000)]
enhancers_linked_with_activation = genuine_gained_enhancers_gr[which(elementMetadata(distanceToNearest(genuine_gained_enhancers_gr, c( prom_up_hs_pt,prom_dn_hs_pt,prom_up_hs_mm,prom_dn_hs_mm,promoters_HITS_UP,promoters_HITS_DN )))[,1]>500000) ]
enhancers_not_linked_with_activation
save( enhancers_linked_with_activation, enhancers_not_linked_with_activation,
file=paste0(objects_directory,"enhancers_functional_groups.RData"))
export.bed(enhancers_linked_with_activation,con=paste0(outputs_directory,"enhancers_linked_with_activation.bed"))
export.bed(enhancers_not_linked_with_activation,con=paste0(outputs_directory,"enhancers_not_linked_with_activation.bed"))
save( genuine_lost_enhancers_that_do_something, genuine_lost_enhancers_that_do_nothing,
file=paste0(objects_directory,"lost_enhancers_functional_groups.RData"))
export.bed(genuine_lost_enhancers_that_do_something,con=paste0(outputs_directory,"lost_enhancers_linked_with_activation.bed"))
export.bed(genuine_lost_enhancers_that_do_nothing,con=paste0(outputs_directory,"lost_enhancers_not_linked_with_activation.bed") )
## all enhancers
= hs_atac[- queryHits(findOverlaps(hs_atac,c(hs_me3,promoters_tss_gr))) ]
enhancers_HS = enhancers_HS[ queryHits(findOverlaps(enhancers_HS, hs_k27ac) ) ]
enhancers_HS
## conserved enhancers - very not a lot and do not change significantly and do not overlap any promoter
= res_HS_NHP[ abs(res_HS_NHP$hs_pt_LFC)<log2(1.5) & abs(res_HS_NHP$hs_mm_LFC)<log2(1.5) &
conserved_enhancers $hs_pt_Padj>0.1 & res_HS_NHP$hs_mm_Padj>0.1, ]
res_HS_NHP= enhancers_HS[ which(names(enhancers_HS) %in% rownames(conserved_enhancers)) ]
conserved_enhancers save( conserved_enhancers,
file=paste0(objects_directory,"conserved_enhancers.RData" ))
export.bed(conserved_enhancers,con=paste0(outputs_directory,"conserved_enhancers.bed"))
Bar-graph showing how many enhancer do something
= c(prom_up_hs_pt,prom_dn_hs_pt,prom_up_hs_mm,prom_dn_hs_mm)
up_dn_sep = up_dn_sep[which(! names(up_dn_sep) %in% names(promoters_HITS_UP))]
up_dn_sep
= c( any_DEG=sum( elementMetadata(distanceToNearest(genuine_gained_enhancers_gr,promoters_HITS_UP))[,1] > 500000)-length(enhancers_not_linked_with_activation),
m EAG=sum( elementMetadata(distanceToNearest(genuine_gained_enhancers_gr,promoters_HITS_UP))[,1] < 500000) )
par(mar=c(4,4,4,4),mfrow=c(1,1))
barplot(as.matrix(m),beside = FALSE,col=c("#0B6623","steelblue3"),
ylim=c(0,10000),ylab="Enhancers")
axis(2,lwd=2)
m
## any_DEG EAG
## 5219 1443
= import.bed(paste0(outputs_directory,"genuine_gained_enhancers_ucsc.bed"))
genuine_gained_enhancers_gr = data.frame( up_genes = countOverlaps(ele_domains$TADs,promoters_HITS_UP),
enhancers_TAD_annotation up_enhancers = countOverlaps(ele_domains$TADs,genuine_gained_enhancers_gr),
genes_Hs_NHP = countOverlaps(ele_domains$TADs,c(prom_up_hs_pt,prom_dn_hs_pt,prom_up_hs_mm,prom_dn_hs_mm)))
= genuine_gained_enhancers_gr[unique(queryHits(findOverlaps(genuine_gained_enhancers_gr,ele_domains$TADs[which(enhancers_TAD_annotation[,1]>0 & enhancers_TAD_annotation[,2]>0)])))]
enhancers_linked_with_activation_TAD = genuine_gained_enhancers_gr[unique(queryHits(findOverlaps(genuine_gained_enhancers_gr,ele_domains$TADs[which(enhancers_TAD_annotation[,1]==0 & enhancers_TAD_annotation[,2]>0 & enhancers_TAD_annotation[,3]==0)])))]
enhancers_not_linked_with_activation_TAD
= enhancers_linked_with_activation_TAD[which(!enhancers_linked_with_activation_TAD$name %in% enhancers_not_linked_with_activation_TAD$name)]
enhancers_linked_with_activation_TADs = enhancers_not_linked_with_activation_TAD[which(!enhancers_not_linked_with_activation_TAD$name %in% enhancers_linked_with_activation_TAD$name)]
enhancers_not_linked_with_activation_TADs
names(enhancers_linked_with_activation_TADs) = enhancers_linked_with_activation_TADs$name
names(enhancers_not_linked_with_activation_TAD) = enhancers_not_linked_with_activation_TAD$name
export.bed(enhancers_linked_with_activation_TADs,con=paste0(outputs_directory,"enhancers_linked_with_activation_TADs.bed"))
export.bed(enhancers_not_linked_with_activation_TADs,con=paste0(outputs_directory,"enhancers_not_linked_with_activation_TADs.bed"))
We read in the annotation of TADs from TopDom. We identify DEGs in single comparisions and assess how frequently we see up and down regulated genes per TAD.
= promoters_filtered_gr[ which(promoters_filtered_gr$gene_id %in% all_Deseqs[all_Deseqs$log2FoldChange.x>0 & all_Deseqs$padj.x<0.01,"Row.names"] ) ]
prom_up_hs_pt = promoters_filtered_gr[ which(promoters_filtered_gr$gene_id %in% all_Deseqs[all_Deseqs$log2FoldChange.x<0 & all_Deseqs$padj.x<0.01,"Row.names"] ) ]
prom_dn_hs_pt
= promoters_filtered_gr[ which(promoters_filtered_gr$gene_id %in% all_Deseqs[all_Deseqs$log2FoldChange.y>0 & all_Deseqs$padj.y<0.01,"Row.names"] ) ]
prom_up_hs_mm = promoters_filtered_gr[ which(promoters_filtered_gr$gene_id %in% all_Deseqs[all_Deseqs$log2FoldChange.y<0 & all_Deseqs$padj.y<0.01,"Row.names"] ) ]
prom_dn_hs_mm
= data.frame( up_prom = countOverlaps(ele_domains$TADs,promoters_HITS_UP),
ele_domains_anno dn_prom = countOverlaps(ele_domains$TADs,promoters_HITS_DN),
up_vsPt = countOverlaps(ele_domains$TADs,prom_up_hs_pt),
dn_vsPt = countOverlaps(ele_domains$TADs,prom_dn_hs_pt),
up_vsMm = countOverlaps(ele_domains$TADs,prom_up_hs_mm),
dn_vsMm = countOverlaps(ele_domains$TADs,prom_dn_hs_mm),
number_of_enh_hs = countOverlaps(ele_domains$TADs, enhancers_HS ),
genuine_gained_enhancers = countOverlaps(ele_domains$TADs,genuine_gained_enhancers_gr),
genuine_gained_active_enhancers = countOverlaps(ele_domains$TADs,genuine_gained_active_enhancers_gr),
genuine_gained_poised_enhancers = countOverlaps(ele_domains$TADs,genuine_gained_poised_enhancers_gr),
genuine_lost_enhancers = countOverlaps(ele_domains$TADs,genuine_lost_enhancers_gr),
geniune_lost_active_enhancers = countOverlaps(ele_domains$TADs,geniune_lost_active_enhancers_gr),
geniune_lost_poised_enhancers = countOverlaps(ele_domains$TADs,geniune_lost_poised_enhancers_gr),
gained_enhancers_that_do_sth = countOverlaps(ele_domains$TADs,genuine_gained_enhancers_that_do_something),
lost_enhancers_that_do_sth = countOverlaps(ele_domains$TADs,genuine_lost_enhancers_that_do_something),
prom_number=countOverlaps(ele_domains$TADs,promoters_filtered_gr),
me3_number=countOverlaps(ele_domains$TADs,hs_me3),
size = width(ele_domains$TADs))
length(unique(queryHits(findOverlaps(promoters_HITS_UP,ele_domains$TADs))))
## [1] 555
length(unique(queryHits(findOverlaps(promoters_HITS_DN,ele_domains$TADs))))
## [1] 447
= ele_domains_anno[ele_domains_anno$up_prom>0 & ele_domains_anno$dn_prom==0,]
tads_with_up_gene = ele_domains_anno[ele_domains_anno$dn_prom>0 & ele_domains_anno$up_prom==0,]
tads_with_dn_gene colSums(tads_with_up_gene>0)
## up_prom dn_prom
## 382 0
## up_vsPt dn_vsPt
## 382 72
## up_vsMm dn_vsMm
## 382 83
## number_of_enh_hs genuine_gained_enhancers
## 349 231
## genuine_gained_active_enhancers genuine_gained_poised_enhancers
## 154 176
## genuine_lost_enhancers geniune_lost_active_enhancers
## 92 19
## geniune_lost_poised_enhancers gained_enhancers_that_do_sth
## 55 228
## lost_enhancers_that_do_sth prom_number
## 8 382
## me3_number size
## 366 382
colSums(tads_with_dn_gene>0)
## up_prom dn_prom
## 0 365
## up_vsPt dn_vsPt
## 38 365
## up_vsMm dn_vsMm
## 83 365
## number_of_enh_hs genuine_gained_enhancers
## 335 183
## genuine_gained_active_enhancers genuine_gained_poised_enhancers
## 99 133
## genuine_lost_enhancers geniune_lost_active_enhancers
## 98 20
## geniune_lost_poised_enhancers gained_enhancers_that_do_sth
## 50 12
## lost_enhancers_that_do_sth prom_number
## 93 365
## me3_number size
## 353 365
sum( rowSums(ele_domains_anno[,c("up_prom","dn_prom")]>0)==2 )
## [1] 33
sum( rowSums(ele_domains_anno[,c("up_prom","dn_prom")]>0)==1 & rowSums(ele_domains_anno[,c("up_prom","dn_prom")])>1)
## [1] 124
## compute co-occurence of up and down regulated genes in TADs
= function(x,col1,col2){
getStats = c( sum(x[,col1]>0 & x[,col2]==0),
tp sum(x[,col1]==0 & x[,col2]>0),
sum(x[,col1]>0 & x[,col2]>0 ) )
names(tp) = c("FirstOnly","SecondOnly","Both")
return(tp)
}
par(mfrow=c(1,1),mar=c(8,5,1,1))
= rbind( getStats(ele_domains_anno,
m which(colnames(ele_domains_anno)=="up_prom"),
which(colnames(ele_domains_anno)=="dn_prom")),
getStats(ele_domains_anno,
which(colnames(ele_domains_anno)=="up_vsPt"),
which(colnames(ele_domains_anno)=="dn_vsPt")),
getStats(ele_domains_anno,
which(colnames(ele_domains_anno)=="up_vsMm"),
which(colnames(ele_domains_anno)=="dn_vsMm")) )
barplot(t(m), beside=FALSE, col=c("green4","wheat3","gray60"),
names=c("Hs vs NHP","Hs vs. Pt","Hs vs. Mm"),las=2,
ylab="EAG")
axis(2,lwd=2,las=2)
Figure showing how many genuine gained enhancers are there per domain. First of all there are many domains that only feature gained enhancer and no upregulated EAG. There are few domains where I do not see a gained enhancer despite the presence of an upregulated EAG. We see both the up-regulated EAG and a gained DOR in 253 TADs.
getStats(ele_domains_anno,
which(colnames(ele_domains_anno)=="genuine_gained_enhancers"),
which(colnames(ele_domains_anno)=="up_prom"))
## FirstOnly SecondOnly Both
## 3420 162 253
hist(ele_domains_anno[ele_domains_anno$up_prom>0,"genuine_gained_enhancers"],n=14,
main="",col="green4",xlab="Number of gained putative enhancers",ylim=c(0,300))
axis(1,lwd=2)
axis(2,lwd=2)
Majority of TADs have a gained enhancer and an upregulated EAG!
sum(ele_domains_anno$genuine_gained_enhancers>0)
## [1] 3673
sum(ele_domains_anno[ele_domains_anno$up_prom>0,"genuine_gained_enhancers"]>0)
## [1] 253
sum(ele_domains_anno$up_prom>0)
## [1] 415
= hs_atac_for_Deseq2_Hs_vs_NHP_filt[ which( res_HS_NHP_filt$hs_pt_LFC>0 & res_HS_NHP_filt$hs_pt_Padj<pvalthr | res_HS_NHP_filt$hs_mm_LFC>0 & res_HS_NHP_filt$hs_mm_Padj<pvalthr) ]
gained_enhancers_in_any_comp = gained_enhancers_in_any_comp[- queryHits(findOverlaps(gained_enhancers_in_any_comp,c(hs_me3,promoters_tss_gr)))]
gained_enhancers_in_any_comp = gained_enhancers_in_any_comp[which(names(gained_enhancers_in_any_comp) %in% pt_atac_for_Deseq2_not_H3K27ac[pt_atac_for_Deseq2_not_H3K27ac %in% mm_atac_for_Deseq2_not_H3K27ac])]
gained_enhancers_in_any_comp sum( countOverlaps(ele_domains$TADs,gained_enhancers_in_any_comp)>0 & ele_domains_anno$up_prom>0 )
## [1] 358
sum( countOverlaps(ele_domains$TADs,gained_enhancers_in_any_comp)>0 & rowSums( ele_domains_anno[,c("up_vsPt", "up_vsMm")]>0)>0 )
## [1] 1607
Overall number of human gained enhancers as compared to chimps and macaques - is it explaining the fact that the log fold change in the human lineage is more pronounced when compared to macaques?
all(names(hs_atac_for_Deseq2)==rownames(res_HS_NHP))
## [1] TRUE
= promoters_HITS_UP[queryHits(findOverlaps(promoters_HITS_UP, ele_domains$TADs[which(ele_domains_anno$up_prom>0 & ele_domains_anno$genuine_gained_enhancers==0)]))]
proms_in_tads_wo_DORs = promoters_HITS_UP[queryHits(findOverlaps(promoters_HITS_UP, ele_domains$TADs[which(ele_domains_anno$up_prom>0 & ele_domains_anno$genuine_gained_enhancers==1)]))]
proms_in_tads_with_1_DORs = promoters_HITS_UP[queryHits(findOverlaps(promoters_HITS_UP, ele_domains$TADs[which(ele_domains_anno$up_prom>0 & ele_domains_anno$genuine_gained_enhancers>1 & ele_domains_anno$genuine_gained_enhancers<4)]))]
proms_in_tads_with_many_DORs = promoters_HITS_UP[queryHits(findOverlaps(promoters_HITS_UP, ele_domains$TADs[which(ele_domains_anno$up_prom>0 & ele_domains_anno$genuine_gained_enhancers>4)]))]
proms_in_tads_with_very_many_DORs
= hs_atac_for_Deseq2[which(res_HS_NHP$hs_pt_LFC>0 & res_HS_NHP$hs_pt_Padj<0.1)]
DORs_gained_Hs_Pt = hs_atac_for_Deseq2[which(res_HS_NHP$hs_mm_LFC>0 & res_HS_NHP$hs_mm_Padj<0.1)]
DORs_gained_Hs_Mm = DORs_gained_Hs_Pt[-unique(queryHits(findOverlaps(DORs_gained_Hs_Pt,c(promoters_tss_gr,hs_me3,gained_ATAC_gr))))]
DORs_gained_Hs_Pt = DORs_gained_Hs_Mm[-unique(queryHits(findOverlaps(DORs_gained_Hs_Mm,c(promoters_tss_gr,hs_me3,gained_ATAC_gr))))]
DORs_gained_Hs_Mm
$DORs_gained_Hs_Pt = countOverlaps(ele_domains$TADs,DORs_gained_Hs_Pt)
ele_domains_anno$DORs_gained_Hs_Mm = countOverlaps(ele_domains$TADs,DORs_gained_Hs_Mm)
ele_domains_anno
$DORs_anno = cut(ele_domains_anno$genuine_gained_enhancers,c(-Inf,0,1,3,1000))
ele_domains_anno
par(mfrow=c(2,2),mar=c(3,2,1,2))
boxplot( all_Deseqs$log2FoldChange.x[all_Deseqs$Row.names %in% names(proms_in_tads_wo_DORs)],
# all_Deseqs$log2FoldChange.x[all_Deseqs$Row.names %in% names(proms_in_tads_with_DORs)],
$log2FoldChange.x[all_Deseqs$Row.names %in% names(proms_in_tads_with_1_DORs)],
all_Deseqs$log2FoldChange.x[all_Deseqs$Row.names %in% names(proms_in_tads_with_many_DORs)],
all_Deseqs$log2FoldChange.x[all_Deseqs$Row.names %in% names(proms_in_tads_with_very_many_DORs)],
all_Deseqsnotch=TRUE, ylim=c(0,12), col="white",border=colorRampPalette(c("steelblue","green4"))(4),
names=c("0","1","2-3",">3"))
axis(1,lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)
boxplot( all_Deseqs$log2FoldChange.y[all_Deseqs$Row.names %in% names(proms_in_tads_wo_DORs)],
$log2FoldChange.y[all_Deseqs$Row.names %in% names(proms_in_tads_with_1_DORs)],
all_Deseqs$log2FoldChange.y[all_Deseqs$Row.names %in% names(proms_in_tads_with_many_DORs)],
all_Deseqs$log2FoldChange.y[all_Deseqs$Row.names %in% names(proms_in_tads_with_very_many_DORs)],
all_Deseqsnotch=TRUE, ylim=c(0,12), col="white",border=colorRampPalette(c("steelblue","green4"))(4),
names=c("0","1","2-3",">3"))
axis(1,lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)
boxplot(split(ele_domains_anno$DORs_gained_Hs_Pt,ele_domains_anno$DORs_anno),ylim=c(0,30),notch=TRUE, col="white",border=colorRampPalette(c("steelblue","green4"))(4),names=c("0","1","2-3",">3"))
## Warning in (function (z, notch = FALSE, width = NULL, varwidth = FALSE, : some
## notches went outside hinges ('box'): maybe set notch=FALSE
boxplot(split(ele_domains_anno$DORs_gained_Hs_Mm,ele_domains_anno$DORs_anno),ylim=c(0,30),notch=TRUE, col="white",border=colorRampPalette(c("steelblue","green4"))(4),names=c("0","1","2-3",">3"))
= hs_atac_for_Deseq2[which(res_HS_NHP$hs_pt_LFC<0 & res_HS_NHP$hs_pt_Padj<0.1)]
DORs_lost_Hs_Pt = hs_atac_for_Deseq2[which(res_HS_NHP$hs_mm_LFC<0 & res_HS_NHP$hs_mm_Padj<0.1)]
DORs_lost_Hs_Mm = DORs_lost_Hs_Pt[-unique(queryHits(findOverlaps(DORs_lost_Hs_Pt,c(promoters_tss_gr,hs_me3,lost_ATAC_gr))))]
DORs_lost_Hs_Pt = DORs_lost_Hs_Mm[-unique(queryHits(findOverlaps(DORs_lost_Hs_Mm,c(promoters_tss_gr,hs_me3,lost_ATAC_gr))))]
DORs_lost_Hs_Mm
$DORs_lost_Hs_Pt = countOverlaps(ele_domains$TADs,DORs_lost_Hs_Pt)
ele_domains_anno$DORs_lost_Hs_Mm = countOverlaps(ele_domains$TADs,DORs_lost_Hs_Mm)
ele_domains_anno
$DORs_anno = cut(ele_domains_anno$genuine_lost_enhancers,c(-Inf,0,1,3,1000))
ele_domains_anno
###
= promoters_HITS_DN[queryHits(findOverlaps(promoters_HITS_DN, ele_domains$TADs[which(ele_domains_anno$dn_prom>0 & ele_domains_anno$genuine_lost_enhancers==0)]))]
proms_in_tads_wo_lost_DORs
= promoters_HITS_DN[queryHits(findOverlaps(promoters_HITS_DN, ele_domains$TADs[which(ele_domains_anno$dn_prom>0 & ele_domains_anno$genuine_lost_enhancers==1)]))]
proms_in_tads_with_1_lost_DORs
= promoters_HITS_DN[queryHits(findOverlaps(promoters_HITS_DN, ele_domains$TADs[which(ele_domains_anno$dn_prom>0 & ele_domains_anno$genuine_lost_enhancers>1 & ele_domains_anno$genuine_gained_enhancers<4)]))]
proms_in_tads_with_many_lost_DORs
= promoters_HITS_DN[queryHits(findOverlaps(promoters_HITS_DN, ele_domains$TADs[which(ele_domains_anno$dn_prom>0 & ele_domains_anno$genuine_lost_enhancers>3)]))]
proms_in_tads_with_very_many_lost_DORs
par(mfrow=c(2,2),mar=c(3,2,1,1))
boxplot( all_Deseqs$log2FoldChange.x[all_Deseqs$Row.names %in% names(proms_in_tads_wo_lost_DORs)],
$log2FoldChange.x[all_Deseqs$Row.names %in% names(proms_in_tads_with_1_lost_DORs)],
all_Deseqs$log2FoldChange.x[all_Deseqs$Row.names %in% names(proms_in_tads_with_many_lost_DORs)],
all_Deseqs$log2FoldChange.x[all_Deseqs$Row.names %in% names(proms_in_tads_with_very_many_lost_DORs)],
all_Deseqsnotch=FALSE, ylim=c(-12,2), col="white",border=colorRampPalette(c("black","red"))(4),
names=c("0","1","2-3",">3"))
boxplot( all_Deseqs$log2FoldChange.y[all_Deseqs$Row.names %in% names(proms_in_tads_wo_lost_DORs)],
$log2FoldChange.y[all_Deseqs$Row.names %in% names(proms_in_tads_with_1_lost_DORs)],
all_Deseqs$log2FoldChange.y[all_Deseqs$Row.names %in% names(proms_in_tads_with_many_lost_DORs)],
all_Deseqs$log2FoldChange.y[all_Deseqs$Row.names %in% names(proms_in_tads_with_very_many_lost_DORs)],
all_Deseqsnotch=FALSE, ylim=c(-12,2), col="white",border=colorRampPalette(c("black","red"))(4),
names=c("0","1","2-3",">3"))
boxplot(split(ele_domains_anno$DORs_lost_Hs_Pt,ele_domains_anno$DORs_anno),
ylim=c(0,30),notch=TRUE, col="white",border=colorRampPalette(c("black","red"))(4),
names=c("0","1","2-3",">3"))
boxplot(split(ele_domains_anno$DORs_lost_Hs_Mm,ele_domains_anno$DORs_anno),ylim=c(0,30),notch=TRUE,
col="white",border=colorRampPalette(c("black","red"))(4),
names=c("0","1","2-3",">3"))
= readRDS(paste0(objects_directory,'phastCons30way_signal_in_5bp_bins_for_all_ATAC_peaks_500Kb_around_summit.Rds'))
phastCons
par(mfrow=c(1,1),mar=c(5,5,1,1))
plot( seq(-500,500,length.out=200),
colMeans(phastCons[rownames(phastCons) %in% names(conserved_enhancers),]),
ylab="PhastCons",
ty="l",col="black",lwd=3,ylim=c(0.0,0.4),xlim=c(-500,500),
xlab="distance from ATAC-seq peak summit" )
lines( seq(-500,500,length.out=200),
colMeans(phastCons[rownames(phastCons) %in% names(enhancers_linked_with_activation),]),ty="l",col="turquoise4",lwd=3 )
lines( seq(-500,500,length.out=200),
colMeans(phastCons[rownames(phastCons) %in% names(enhancers_not_linked_with_activation),]),ty="l",col="gray80",lwd=3 )
lines( seq(-500,500,length.out=200),
colMeans(phastCons[rownames(phastCons) %in% names(genuine_lost_enhancers_gr),]),ty="l",col="red",lwd=3 )
axis(1,lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)
abline(v=0,lwd=2,lty=2,col="gray")
t.test(phastCons[rownames(phastCons) %in% names(conserved_enhancers),95],
rownames(phastCons) %in% names(enhancers_linked_with_activation),95]) phastCons[
##
## Welch Two Sample t-test
##
## data: phastCons[rownames(phastCons) %in% names(conserved_enhancers), 95] and phastCons[rownames(phastCons) %in% names(enhancers_linked_with_activation), 95]
## t = 18.66, df = 3400.3, p-value < 0.00000000000000022
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1738891 0.2147229
## sample estimates:
## mean of x mean of y
## 0.3625148 0.1682088
t.test(phastCons[rownames(phastCons) %in% names(enhancers_not_linked_with_activation),95],
rownames(phastCons) %in% names(enhancers_linked_with_activation),95]) phastCons[
##
## Welch Two Sample t-test
##
## data: phastCons[rownames(phastCons) %in% names(enhancers_not_linked_with_activation), 95] and phastCons[rownames(phastCons) %in% names(enhancers_linked_with_activation), 95]
## t = 4.7247, df = 3272.1, p-value = 0.000002401
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.02917625 0.07056904
## sample estimates:
## mean of x mean of y
## 0.2180814 0.1682088
= merge(as.data.frame(res_HS_PT),
DOR_Deseq2 as.data.frame(res_HS_MM),
by="row.names")
## all putative enhancers
seqlevelsStyle(hs_atac) = "UCSC"
= hs_atac[ -unique( queryHits(findOverlaps(hs_atac,c(promoters_tss_gr,hs_me3)))) ]
all_primate_enhancers
= DOR_Deseq2[DOR_Deseq2$Row.names %in% names(all_primate_enhancers),]
changed_enhancers_ATAC_signal_change = changed_enhancers_ATAC_signal_change[ !is.na(changed_enhancers_ATAC_signal_change$padj.x),]
changed_enhancers_ATAC_signal_change = changed_enhancers_ATAC_signal_change[ !is.na(changed_enhancers_ATAC_signal_change$padj.y),]
changed_enhancers_ATAC_signal_change
= changed_enhancers_ATAC_signal_change[ changed_enhancers_ATAC_signal_change$padj.x<sqrt(0.1) | changed_enhancers_ATAC_signal_change$padj.y<sqrt(0.1), ]
changed_enhancers_ATAC_signal_change = changed_enhancers_ATAC_signal_change[! changed_enhancers_ATAC_signal_change$log2FoldChange.x==changed_enhancers_ATAC_signal_change$log2FoldChange.y,]
changed_enhancers_ATAC_signal_change
par(mfrow=c(1,1),mar=c(4,4,1,1))
boxplot(abs(changed_enhancers_ATAC_signal_change$log2FoldChange.x),
abs(changed_enhancers_ATAC_signal_change$log2FoldChange.y),
outline=FALSE, ylab=expression("Human/NHP [log"[2]*")]"),
names=c("Hs vs. Pt","Hs vs. Mm"),
col="white",border=c("red","blue"),lwd=2,ylim=c(0,7))
axis(1,lwd=2, at=c(1,2),labels=c("Hs vs. Pt","Hs vs. Mm"))
axis(2,lwd=2)
box(col="black",lwd=2)
par(mfrow=c(1,1),pty='s')
heatscatter( changed_enhancers_ATAC_signal_change$log2FoldChange.x,
$log2FoldChange.y,
changed_enhancers_ATAC_signal_changecolpal="blues",cex=0.5,
ylab=expression("Human/chimp [log"[2]*")]"),
xlab=expression("Human/macaque [log"[2]*")]"),
ylim=c(-10,10),
xlim=c(-10,10))
axis(1,lwd=2)
axis(2,lwd=2)
Regulomes are less correlated than transcriptomes
cor.test(res_HS_NHP$hs_pt_LFC[res_HS_NHP$hs_pt_Padj<0.1 & res_HS_NHP$hs_mm_Padj<0.1],
$hs_mm_LFC[res_HS_NHP$hs_pt_Padj<0.1 & res_HS_NHP$hs_mm_Padj<0.1] ) res_HS_NHP
##
## Pearson's product-moment correlation
##
## data: res_HS_NHP$hs_pt_LFC[res_HS_NHP$hs_pt_Padj < 0.1 & res_HS_NHP$hs_mm_Padj < 0.1] and res_HS_NHP$hs_mm_LFC[res_HS_NHP$hs_pt_Padj < 0.1 & res_HS_NHP$hs_mm_Padj < 0.1]
## t = 241.55, df = 17201, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8753683 0.8821743
## sample estimates:
## cor
## 0.878816
cor.test(all_Deseqs$log2FoldChange.x[all_Deseqs$padj.x<0.01 & all_Deseqs$pvalue.y<0.01],
$log2FoldChange.y[all_Deseqs$padj.x<0.01 & all_Deseqs$pvalue.y<0.01] ) all_Deseqs
##
## Pearson's product-moment correlation
##
## data: all_Deseqs$log2FoldChange.x[all_Deseqs$padj.x < 0.01 & all_Deseqs$pvalue.y < 0.01] and all_Deseqs$log2FoldChange.y[all_Deseqs$padj.x < 0.01 & all_Deseqs$pvalue.y < 0.01]
## t = 49.93, df = 1680, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7529456 0.7914822
## sample estimates:
## cor
## 0.7729257
= cor.test(log_fold_dat$HSvPT_lfc_shrunk,log_fold_dat$HSvMM_lfc, conf.level = 0.99)
expressionRNA = cor.test(changed_enhancers_ATAC_signal_change$log2FoldChange.x,
regulomeATAC $log2FoldChange.y, conf.level = 0.99)
changed_enhancers_ATAC_signal_changepar(pty="m")
barplot( c(expressionRNA$estimate,
$estimate),
regulomeATACcol=c('green4','steelblue'),ylim=c(0,1),
names=c("gene expression","ATAC"))
segments(0.7,expressionRNA$conf.int[[1]],0.7,expressionRNA$conf.int[[2]])
segments(1.9,regulomeATAC$conf.int[[1]],1.9,regulomeATAC$conf.int[[2]])
axis(2,lwd=2)
= GenomicRanges::resize(enhancers_linked_with_activation,1000000,fix="center")
linked_500 = GenomicRanges::resize(enhancers_not_linked_with_activation,1000000,fix="center")
not_linked_500 sum(countOverlaps(linked_500,promoters_filtered_gr)>0)
## [1] 1443
sum(countOverlaps(not_linked_500,promoters_filtered_gr)>0)
## [1] 2675
Enhancers that do something are more open
= import.bw(paste0(outputs_directory,"ATAC_Seq_12-22_HomSap_i-Astro_ELE10-30_merged_hg38_RPGC.bw"))
atac_hs_bw = import.bw(paste0(outputs_directory,"ChIP_Seq_H3K27ac_12-22_HomSap_i-Astro_ELE10-30_merged_hg38_RPGC.bw"))
k27_hs_bw seqlevelsStyle(atac_hs_bw) = "ucsc"
seqlevelsStyle(k27_hs_bw) = "ucsc"
= import.bed(paste0(outputs_directory,"enhancers_linked_with_activation.bed"))
linked_GR names(linked_GR) = linked_GR$name
= import.bed(paste0(outputs_directory,"enhancers_not_linked_with_activation.bed"))
not_linked_GR names(not_linked_GR) = not_linked_GR$name
= GetAPRangesForGenomicRangesObject(linked_GR)
linked_GR_AP = GetAPRangesForGenomicRangesObject(not_linked_GR)
not_linked_GR_AP
= getSignalInBins( linked_GR_AP, atac_hs_bw, 1 )
linked_atac_hs = getSignalInBins( linked_GR_AP, k27_hs_bw, 1 )
linked_k27_hs
= getSignalInBins( not_linked_GR_AP, atac_hs_bw, 1 )
not_linked_atac_hs = getSignalInBins( not_linked_GR_AP, k27_hs_bw, 1 )
not_linked_k27_hs
par(mfrow=c(1,2),mar=c(5,5,1,1),pty="m")
plot(seq(-1000,1000,length.out=200),
colMeans(linked_atac_hs),col="turquoise4",ty='l',lwd=2,
xlab="Distance from the DOR summit",ylab="ATAC-seq signal (RPGC)")
lines(seq(-1000,1000,length.out=200),
colMeans(not_linked_atac_hs),col="gray",lwd=2)
axis(1,lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)
plot(seq(-1000,1000,length.out=200),
colMeans(linked_k27_hs),col="turquoise4",ty='l',lwd=2,
xlab="Distance from the DOR summit",ylab="H3K27ac ChIP-seq signal (RPGC)",ylim=c(0,6))
lines(seq(-1000,1000,length.out=200),
colMeans(not_linked_k27_hs),col="gray",lwd=2)
axis(1,lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)
Names of all the TFs in Hocomoco database.
= c(list.files('~/human_beds/A'),
AllTFs unlist( lapply( as.list(paste0("~/human_beds/A-kopia",c('',2,3,4,5,6))), function(x){list.files(x)} ) ) )
= unique( do.call('c', lapply(strsplit(AllTFs,'.bed'),function(el){el[[1]]})) )
TFs
= read.delim( file=paste0(outputs_directory,'TFsymbol_fixed.txt'),as.is=TRUE )
TFsEnsemblG $eig = genemapu$ensembl_gene_id[match(TFsEnsemblG$Fixed,genemapu$hgnc_symbol)]
TFsEnsemblG$names = AllTFs
TFsEnsemblG$names2 = unlist(strsplit(AllTFs,".bed"))
TFsEnsemblG$names3 = paste0( unlist(strsplit(AllTFs,".bed")), "_HG38.bed" )
TFsEnsemblGsave(TFsEnsemblG,file=paste0(objects_directory,"TFsEnsemblG.RData"))
Load objects
= read.delim(paste0(outputs_directory,"human_stripe_factors.txt"),header=FALSE,as.is=TRUE)
human_stripe_factors load(paste0(objects_directory,"TFsEnsemblG.RData"))
load( paste0(objects_directory,"enhancers_functional_groups.RData"))
load( paste0(objects_directory,"conserved_enhancers.RData" ))
export.bed( conserved_enhancers, con=paste0(outputs_directory,"conserved_enhancers.bed" ))
= import.bed(paste0(outputs_directory,"genuine_gained_enhancers_ucsc.bed"))
genuine_gained_enhancers_gr load(paste0(objects_directory,"lost_enhancers_functional_groups.RData"))
= import.bed(paste0(outputs_directory,"lost_enhancers_linked_with_activation.bed"))
genuine_lost_enhancers_that_do_something = import.bed(paste0(outputs_directory,"lost_enhancers_not_linked_with_activation.bed"))
genuine_lost_enhancers_that_do_nothing
= import.bed(paste0(outputs_directory,"enhancers_linked_with_activation_TADs.bed"))
enhancers_linked_with_activation_TADs = import.bed(paste0(outputs_directory,"enhancers_not_linked_with_activation_TADs.bed"))
enhancers_not_linked_with_activation_TADs names(enhancers_linked_with_activation_TADs) = enhancers_linked_with_activation_TADs$name
names(enhancers_not_linked_with_activation_TADs) = enhancers_not_linked_with_activation_TADs$name
Align the chosen enhancer groups to chimp
cd ~/Documents/Tools/
## human to chimp
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/hg38ToPanTro6.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_PanTro.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_PanTro.unmapped.file
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/hg38ToPanTro6.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_PanTro.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_PanTro.unmapped.file
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_TADs.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/hg38ToPanTro6.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_TADs_PanTro.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_TADs_PanTro.unmapped.file
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_TADs.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/hg38ToPanTro6.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_TADs_PanTro.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_TADs_PanTro.unmapped.file
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/hg38ToPanTro6.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_PanTro.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_PanTro.unmapped.file
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_linked_with_activation.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/hg38ToPanTro6.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_linked_with_activation_PanTro.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_linked_with_activation_PanTro.unmapped.file
./liftOver -minMatch=0.5 -bedPlus=6 -tab /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_not_linked_with_activation.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/chain_files/hg38ToPanTro6.over.chain /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_not_linked_with_activation_PanTro.bed /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_not_linked_with_activation_PanTro.unmapped.file
Get sequences for the enhancers
= readBed_filterChroms(paste0(outputs_directory,'enhancers_linked_with_activation_PanTro.bed'),
enhancers_linked_with_activation_pt chroms=paste0('chr',c(1,'2A','2B', 3:22,'X')),4)
= readBed_filterChroms(paste0(outputs_directory,'enhancers_not_linked_with_activation_PanTro.bed'),
enhancers_not_linked_with_activation_pt chroms=paste0('chr',c(1,'2A','2B', 3:22,'X')),4)
= readBed_filterChroms(paste0(outputs_directory,'enhancers_linked_with_activation_TADs_PanTro.bed'),
enhancers_linked_with_activation_TADs_pt chroms=paste0('chr',c(1,'2A','2B', 3:22,'X')),4)
= readBed_filterChroms(paste0(outputs_directory,'enhancers_not_linked_with_activation_TADs_PanTro.bed'),
enhancers_not_linked_with_activation_TADs_pt chroms=paste0('chr',c(1,'2A','2B', 3:22,'X')),4)
= readBed_filterChroms(paste0(outputs_directory,'conserved_enhancers_PanTro.bed'),
conserved_enhancers_pt chroms=paste0('chr',c(1,'2A','2B', 3:22,'X')),4)
= readBed_filterChroms(paste0(outputs_directory,'lost_enhancers_linked_with_activation_PanTro.bed'),
lost_linked_pt chroms=paste0('chr',c(1,'2A','2B', 3:22,'X')),4)
= readBed_filterChroms(paste0(outputs_directory,'lost_enhancers_not_linked_with_activation_PanTro.bed'),
lost_not_linked_pt chroms=paste0('chr',c(1,'2A','2B', 3:22,'X')),4)
## checks
= enhancers_linked_with_activation[match(names(enhancers_linked_with_activation_pt),names(enhancers_linked_with_activation))]
enhancers_linked_with_activation all(names(enhancers_linked_with_activation_pt)==names(enhancers_linked_with_activation))
## [1] TRUE
all(names(enhancers_not_linked_with_activation_pt)==names(enhancers_not_linked_with_activation))
## [1] TRUE
= genuine_lost_enhancers_that_do_something[match(names(lost_linked_pt),genuine_lost_enhancers_that_do_something$name)]
genuine_lost_enhancers_that_do_something = genuine_lost_enhancers_that_do_nothing[match(names(lost_not_linked_pt),genuine_lost_enhancers_that_do_nothing$name)]
genuine_lost_enhancers_that_do_nothing all(genuine_lost_enhancers_that_do_something$name==names(lost_linked_pt))
## [1] TRUE
all(genuine_lost_enhancers_that_do_nothing$name==names(lost_not_linked_pt))
## [1] TRUE
names(genuine_lost_enhancers_that_do_something) = genuine_lost_enhancers_that_do_something$name
names(genuine_lost_enhancers_that_do_nothing) = genuine_lost_enhancers_that_do_nothing$name
= enhancers_linked_with_activation_TADs[match(names(enhancers_linked_with_activation_TADs_pt),names(enhancers_linked_with_activation_TADs))]
enhancers_linked_with_activation_TADs = enhancers_not_linked_with_activation_TADs[match(names(enhancers_not_linked_with_activation_TADs_pt),names(enhancers_not_linked_with_activation_TADs))]
enhancers_not_linked_with_activation_TADs all(names(enhancers_linked_with_activation_TADs)==names(enhancers_linked_with_activation_TADs_pt))
## [1] TRUE
all(names(enhancers_not_linked_with_activation_TADs)==names(enhancers_not_linked_with_activation_TADs_pt))
## [1] TRUE
= Biostrings::getSeq(BSgenome.Hsapiens.UCSC.hg38,enhancers_linked_with_activation)
enhancers_linked_with_activation_seq_Hs = Biostrings::getSeq(BSgenome.Ptroglodytes.UCSC.panTro6,enhancers_linked_with_activation_pt)
enhancers_linked_with_activation_seq_Pt = Biostrings::getSeq(BSgenome.Hsapiens.UCSC.hg38,enhancers_not_linked_with_activation)
enhancers_not_linked_with_activation_seq_Hs = Biostrings::getSeq(BSgenome.Ptroglodytes.UCSC.panTro6,enhancers_not_linked_with_activation_pt)
enhancers_not_linked_with_activation_seq_Pt
= Biostrings::getSeq(BSgenome.Hsapiens.UCSC.hg38,enhancers_linked_with_activation_TADs)
enhancers_linked_with_activation_TADs_seq_Hs = Biostrings::getSeq(BSgenome.Ptroglodytes.UCSC.panTro6,enhancers_linked_with_activation_TADs_pt)
enhancers_linked_with_activation_TADs_seq_Pt = Biostrings::getSeq(BSgenome.Hsapiens.UCSC.hg38,enhancers_not_linked_with_activation_TADs)
enhancers_not_linked_with_activation_TADs_seq_Hs = Biostrings::getSeq(BSgenome.Ptroglodytes.UCSC.panTro6,enhancers_not_linked_with_activation_TADs_pt)
enhancers_not_linked_with_activation_TADs_seq_Pt
= Biostrings::getSeq(BSgenome.Hsapiens.UCSC.hg38,conserved_enhancers)
conserved_enhancers_seq_Hs = Biostrings::getSeq(BSgenome.Ptroglodytes.UCSC.panTro6,conserved_enhancers_pt)
conserved_enhancers_seq_Pt = Biostrings::getSeq(BSgenome.Hsapiens.UCSC.hg38,genuine_lost_enhancers_that_do_something)
lost_enhancers_linked_seq_Hs = Biostrings::getSeq(BSgenome.Ptroglodytes.UCSC.panTro6,lost_linked_pt)
lost_enhancers_linked_seq_Pt = Biostrings::getSeq(BSgenome.Hsapiens.UCSC.hg38,genuine_lost_enhancers_that_do_nothing)
lost_enhancers_not_linked_seq_Hs = Biostrings::getSeq(BSgenome.Ptroglodytes.UCSC.panTro6,lost_not_linked_pt) lost_enhancers_not_linked_seq_Pt
Now let’s find the evolutionary mismatches between sequences. We compare human to chimp sequences.
= mclapply( as.list(names(enhancers_linked_with_activation)),
test function(enh){ Figure_out_mismatching_sequences( enhancers_linked_with_activation[which(names(enhancers_linked_with_activation)==enh)],
which(names(enhancers_linked_with_activation_seq_Hs)==enh)],
enhancers_linked_with_activation_seq_Hs[which(names(enhancers_linked_with_activation_pt)==enh)],
enhancers_linked_with_activation_pt[which(names(enhancers_linked_with_activation_seq_Pt)==enh)]) },
enhancers_linked_with_activation_seq_Pt[mc.cores = 4L )
= do.call("rbind",test)
enhancers_linked_with_activation_hs_vs_Pt save(enhancers_linked_with_activation_hs_vs_Pt,
file=paste0(objects_directory,"enhancers_linked_with_activation_hs_vs_Pt.RData") )
= GRanges( seqnames=Rle(enhancers_linked_with_activation_hs_vs_Pt$seqnames),
enhancers_linked_with_activation_hs_vs_Pt_gr ranges = IRanges( enhancers_linked_with_activation_hs_vs_Pt$start,
end=enhancers_linked_with_activation_hs_vs_Pt$end ),
kind=enhancers_linked_with_activation_hs_vs_Pt$type)
names(enhancers_linked_with_activation_hs_vs_Pt_gr) = enhancers_linked_with_activation_hs_vs_Pt$names
export.bed( enhancers_linked_with_activation_hs_vs_Pt_gr, con=paste0(outputs_directory,"enhancers_linked_with_activation_hs_vs_Pt_ucsc.bed"))
seqlevelsStyle(enhancers_linked_with_activation_hs_vs_Pt_gr) = "ncbi"
export.gff( enhancers_linked_with_activation_hs_vs_Pt_gr, paste0(outputs_directory,"enhancers_linked_with_activation_hs_vs_Pt.gtf"))
export.bed( enhancers_linked_with_activation_hs_vs_Pt_gr, paste0(outputs_directory,"enhancers_linked_with_activation_hs_vs_Pt.bed"))
Intersect with bedtools
cd ~/Documents/Tools/bedtools2/
## ------------
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation/hs_specieis_enhancers_A.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation/hs_specieis_enhancers_A1.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia2/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation/hs_specieis_enhancers_A2.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia3/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation/hs_specieis_enhancers_A3.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia4/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation/hs_specieis_enhancers_A4.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia5/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation/hs_specieis_enhancers_A5.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia6/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation/hs_specieis_enhancers_A6.bed
Intersect with chimp TFBS lifted over to the Hg38 genome assembly
cd ~/Documents/Tools/bedtools2/
## ------------
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TFBS_chimp/hs_specieis_enhancers_A.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TFBS_chimp/hs_specieis_enhancers_A1.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia2/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TFBS_chimp/hs_specieis_enhancers_A2.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia3/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TFBS_chimp/hs_specieis_enhancers_A3.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia4/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TFBS_chimp/hs_specieis_enhancers_A4.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia5/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TFBS_chimp/hs_specieis_enhancers_A5.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia6/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TFBS_chimp/hs_specieis_enhancers_A6.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia7/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TFBS_chimp/hs_specieis_enhancers_A7.bed
= mclapply( as.list(names(enhancers_not_linked_with_activation)),
norole function(enh){ Figure_out_mismatching_sequences( enhancers_not_linked_with_activation[which(names(enhancers_not_linked_with_activation)==enh)],
which(names(enhancers_not_linked_with_activation_seq_Hs)==enh)],
enhancers_not_linked_with_activation_seq_Hs[which(names(enhancers_not_linked_with_activation_pt)==enh)],
enhancers_not_linked_with_activation_pt[which(names(enhancers_not_linked_with_activation_seq_Pt)==enh)]) },
enhancers_not_linked_with_activation_seq_Pt[mc.cores = 4L )
= do.call("rbind",norole)
enhancers_not_linked_with_activation_hs_vs_Pt save(enhancers_not_linked_with_activation_hs_vs_Pt,
file=paste0(objects_directory,"enhancers_not_linked_with_activation_hs_vs_Pt.RData"))
= GRanges( seqnames=Rle(enhancers_not_linked_with_activation_hs_vs_Pt$seqnames),
enhancers_not_linked_with_activation_hs_vs_Pt_gr ranges = IRanges( enhancers_not_linked_with_activation_hs_vs_Pt$start,
end=enhancers_not_linked_with_activation_hs_vs_Pt$end ),
kind=enhancers_not_linked_with_activation_hs_vs_Pt$type)
names(enhancers_not_linked_with_activation_hs_vs_Pt_gr) = enhancers_not_linked_with_activation_hs_vs_Pt$names
seqlevelsStyle(enhancers_not_linked_with_activation_hs_vs_Pt_gr) = "ncbi"
export.gff( enhancers_not_linked_with_activation_hs_vs_Pt_gr, paste0(outputs_directory,"enhancers_not_linked_with_activation_hs_vs_Pt.gtf"))
export.bed( enhancers_not_linked_with_activation_hs_vs_Pt_gr, paste0(outputs_directory,"enhancers_not_linked_with_activation_hs_vs_Pt.bed"))
Intersect with bedtools
cd ~/Documents/Tools/bedtools2/
## ------------
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation/hs_specieis_enhancers_A.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation/hs_specieis_enhancers_A1.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia2/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation/hs_specieis_enhancers_A2.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia3/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation/hs_specieis_enhancers_A3.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia4/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation/hs_specieis_enhancers_A4.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia5/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation/hs_specieis_enhancers_A5.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia6/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation/hs_specieis_enhancers_A6.bed
Now let’s find the evolutionary mismatches between sequences. We compare human to chimp sequences.
= mclapply( as.list(names(enhancers_linked_with_activation_TADs)),
TADs_enh_linked function(enh){ Figure_out_mismatching_sequences( enhancers_linked_with_activation_TADs[which(names(enhancers_linked_with_activation_TADs)==enh)],
which(names(enhancers_linked_with_activation_TADs_seq_Hs)==enh)],
enhancers_linked_with_activation_TADs_seq_Hs[which(names(enhancers_linked_with_activation_TADs_pt)==enh)],
enhancers_linked_with_activation_TADs_pt[which(names(enhancers_linked_with_activation_TADs_seq_Pt)==enh)]) },
enhancers_linked_with_activation_TADs_seq_Pt[mc.cores = 4L )
= do.call("rbind",TADs_enh_linked)
enhancers_linked_with_activation_TADs_hs_vs_Pt save(enhancers_linked_with_activation_TADs_hs_vs_Pt,
file=paste0(objects_directory,"eenhancers_linked_with_activation_TADs_hs_vs_Pt.RData") )
= GRanges( seqnames=Rle(enhancers_linked_with_activation_TADs_hs_vs_Pt$seqnames),
enhancers_linked_with_activation_TADs_hs_vs_Pt_gr ranges = IRanges( enhancers_linked_with_activation_TADs_hs_vs_Pt$start,
end=enhancers_linked_with_activation_TADs_hs_vs_Pt$end ),
kind=enhancers_linked_with_activation_TADs_hs_vs_Pt$type)
names(enhancers_linked_with_activation_TADs_hs_vs_Pt_gr) = enhancers_linked_with_activation_TADs_hs_vs_Pt$names
export.bed( enhancers_linked_with_activation_TADs_hs_vs_Pt_gr, con=paste0(outputs_directory,"enhancers_linked_with_activation_TADs_hs_vs_Pt_ucsc.bed"))
seqlevelsStyle(enhancers_linked_with_activation_TADs_hs_vs_Pt_gr) = "ncbi"
export.gff( enhancers_linked_with_activation_TADs_hs_vs_Pt_gr, paste0(outputs_directory,"enhancers_linked_with_activation_TADs_hs_vs_Pt.gtf"))
export.bed( enhancers_linked_with_activation_TADs_hs_vs_Pt_gr, paste0(outputs_directory,"enhancers_linked_with_activation_TADs_hs_vs_Pt.bed"))
Intersect with bedtools
cd ~/Documents/Tools/bedtools2/
## ------------
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TADs/hs_specieis_enhancers_A.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TADs/hs_specieis_enhancers_A1.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia2/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TADs/hs_specieis_enhancers_A2.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia3/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TADs/hs_specieis_enhancers_A3.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia4/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TADs/hs_specieis_enhancers_A4.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia5/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TADs/hs_specieis_enhancers_A5.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia6/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_linked_with_activation_TADs/hs_specieis_enhancers_A6.bed
= mclapply( as.list(names(enhancers_not_linked_with_activation_TADs)),
TADs_enh__not_linked function(enh){ Figure_out_mismatching_sequences( enhancers_not_linked_with_activation_TADs[which(names(enhancers_not_linked_with_activation_TADs)==enh)],
which(names(enhancers_not_linked_with_activation_TADs_seq_Hs)==enh)],
enhancers_not_linked_with_activation_TADs_seq_Hs[which(names(enhancers_not_linked_with_activation_TADs_pt)==enh)],
enhancers_not_linked_with_activation_TADs_pt[which(names(enhancers_not_linked_with_activation_TADs_seq_Pt)==enh)]) },
enhancers_not_linked_with_activation_TADs_seq_Pt[mc.cores = 4L )
= do.call("rbind",TADs_enh__not_linked)
enhancers_not_linked_with_activation_TADs_hs_vs_Pt save(enhancers_not_linked_with_activation_TADs_hs_vs_Pt,
file=paste0(objects_directory,"enhancers_not_linked_with_activation_TADs_hs_vs_Pt.RData"))
= GRanges( seqnames=Rle(enhancers_not_linked_with_activation_TADs_hs_vs_Pt$seqnames),
enhancers_not_linked_with_activation_TADs_hs_vs_Pt_gr ranges = IRanges( enhancers_not_linked_with_activation_TADs_hs_vs_Pt$start,
end=enhancers_not_linked_with_activation_TADs_hs_vs_Pt$end ),
kind=enhancers_not_linked_with_activation_TADs_hs_vs_Pt$type)
names(enhancers_not_linked_with_activation_TADs_hs_vs_Pt_gr) = enhancers_not_linked_with_activation_TADs_hs_vs_Pt$names
seqlevelsStyle(enhancers_not_linked_with_activation_TADs_hs_vs_Pt_gr) = "ncbi"
export.gff( enhancers_not_linked_with_activation_TADs_hs_vs_Pt_gr, paste0(outputs_directory,"enhancers_not_linked_with_activation_TADs_hs_vs_Pt.gtf"))
export.bed( enhancers_not_linked_with_activation_TADs_hs_vs_Pt_gr, paste0(outputs_directory,"enhancers_not_linked_with_activation_TADs_hs_vs_Pt.bed"))
Intersect with bedtools.
cd ~/Documents/Tools/bedtools2/
## ------------
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation_TADs/hs_specieis_enhancers_A.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation_TADs/hs_specieis_enhancers_A1.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia2/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation_TADs/hs_specieis_enhancers_A2.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia3/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation_TADs/hs_specieis_enhancers_A3.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia4/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation_TADs/hs_specieis_enhancers_A4.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia5/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation_TADs/hs_specieis_enhancers_A5.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/enhancers_not_linked_with_activation_TADs_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia6/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/enhancers_not_linked_with_activation_TADs/hs_specieis_enhancers_A6.bed
= mclapply( as.list(names(conserved_enhancers)),
cons function(enh){ Figure_out_mismatching_sequences( conserved_enhancers[which(names(conserved_enhancers)==enh)],
which(names(conserved_enhancers_seq_Hs)==enh)],
conserved_enhancers_seq_Hs[which(names(conserved_enhancers_pt)==enh)],
conserved_enhancers_pt[which(names(conserved_enhancers_seq_Pt)==enh)]) },
conserved_enhancers_seq_Pt[mc.cores = 4L )
= do.call("rbind",cons)
conserved_enhancers_hs_vs_Pt save(conserved_enhancers_hs_vs_Pt,
file=paste0(objects_directory,"conserved_enhancers_hs_vs_Pt.RData"))
= GRanges( seqnames=Rle(conserved_enhancers_hs_vs_Pt$seqnames),
conserved_enhancers_hs_vs_Pt_gr ranges = IRanges( conserved_enhancers_hs_vs_Pt$start,
end=conserved_enhancers_hs_vs_Pt$end ),
kind=conserved_enhancers_hs_vs_Pt$type)
names(conserved_enhancers_hs_vs_Pt_gr) = conserved_enhancers_hs_vs_Pt$names
export.bed( conserved_enhancers_hs_vs_Pt_gr, con=paste0(outputs_directory,"conserved_enhancers_hs_vs_Pt_ucsc.bed"))
seqlevelsStyle(conserved_enhancers_hs_vs_Pt_gr) = "ncbi"
export.gff( conserved_enhancers_hs_vs_Pt_gr, con=paste0(outputs_directory,"conserved_enhancers_hs_vs_Pt.gtf"))
export.bed( conserved_enhancers_hs_vs_Pt_gr, con=paste0(outputs_directory,"conserved_enhancers_hs_vs_Pt.bed"))
Intersect with bedtools.
cd ~/Documents/Tools/bedtools2/
## ------------
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Hs/hs_specieis_enhancers_A.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Hs/hs_specieis_enhancers_A1.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia2/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Hs/hs_specieis_enhancers_A2.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia3/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Hs/hs_specieis_enhancers_A3.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia4/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Hs/hs_specieis_enhancers_A4.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia5/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Hs/hs_specieis_enhancers_A5.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt.bed -b /Volumes/T7/T7_backup_25_07_2023/TFBS/human_beds/A-kopia6/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Hs/hs_specieis_enhancers_A6.bed
Intersect with chimp TFBS lifted over to the Hg38 genome assembly
cd ~/Tools/bedtools2/
## ------------
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Pt/hs_specieis_enhancers_A.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Pt/hs_specieis_enhancers_A1.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia2/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Pt/hs_specieis_enhancers_A2.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia3/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Pt/hs_specieis_enhancers_A3.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia4/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Pt/hs_specieis_enhancers_A4.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia5/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Pt/hs_specieis_enhancers_A5.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia6/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Pt/hs_specieis_enhancers_A6.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/conserved_enhancers_hs_vs_Pt_ucsc.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia7/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/conserved_enhancers_Pt/hs_specieis_enhancers_A7.bed
= mclapply( as.list(names(genuine_lost_enhancers_that_do_something)),
lostE function(enh){ Figure_out_mismatching_sequences( genuine_lost_enhancers_that_do_something[which(names(genuine_lost_enhancers_that_do_something)==enh)],
which(names(lost_enhancers_linked_seq_Hs)==enh)],
lost_enhancers_linked_seq_Hs[which(names(lost_linked_pt)==enh)],
lost_linked_pt[which(names(lost_enhancers_linked_seq_Pt)==enh)]) },
lost_enhancers_linked_seq_Pt[mc.cores = 4L )
= do.call("rbind",lostE)
lost_enhancers_linked_hs_vs_Pt save(lost_enhancers_linked_hs_vs_Pt,
file=paste0(objects_directory,"lost_enhancers_linked_hs_vs_Pt.RData"))
= GRanges( seqnames=Rle(lost_enhancers_linked_hs_vs_Pt$seqnames),
lost_enhancers_linked_hs_vs_Pt_gr ranges = IRanges( lost_enhancers_linked_hs_vs_Pt$start,
end=lost_enhancers_linked_hs_vs_Pt$end ),
kind=lost_enhancers_linked_hs_vs_Pt$type)
names(lost_enhancers_linked_hs_vs_Pt_gr) = lost_enhancers_linked_hs_vs_Pt$names
export.bed( lost_enhancers_linked_hs_vs_Pt_gr, con=paste0(outputs_directory,"lost_enhancers_linked_hs_vs_Pt_gr_ucsc.bed"))
seqlevelsStyle(lost_enhancers_linked_hs_vs_Pt_gr) = "ncbi"
export.gff( lost_enhancers_linked_hs_vs_Pt_gr, con=paste0(outputs_directory,"lost_enhancers_linked_hs_vs_Pt_gr.gtf"))
export.bed( lost_enhancers_linked_hs_vs_Pt_gr, con=paste0(outputs_directory,"lost_enhancers_linked_hs_vs_Pt_gr.bed"))
Intersect the positions of mismatches with TFBS inferred for Chimp and lifted over to human.
cd ~/Documents/Tools/bedtools2/
## ------------
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_linked_pt/hs_specieis_enhancers_A.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_linked_pt/hs_specieis_enhancers_A1.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia2/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_linked_pt/hs_specieis_enhancers_A2.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia3/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_linked_pt/hs_specieis_enhancers_A3.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia4/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_linked_pt/hs_specieis_enhancers_A4.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia5/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_linked_pt/hs_specieis_enhancers_A5.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia6/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_linked_pt/hs_specieis_enhancers_A6.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia7/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_linked_pt/hs_specieis_enhancers_A7.bed
= mclapply( as.list(names(genuine_lost_enhancers_that_do_nothing)),
lostI function(enh){ Figure_out_mismatching_sequences( genuine_lost_enhancers_that_do_nothing[which(names(genuine_lost_enhancers_that_do_nothing)==enh)],
which(names(lost_enhancers_not_linked_seq_Hs)==enh)],
lost_enhancers_not_linked_seq_Hs[which(names(lost_not_linked_pt)==enh)],
lost_not_linked_pt[which(names(lost_enhancers_not_linked_seq_Pt)==enh)]) },
lost_enhancers_not_linked_seq_Pt[mc.cores = 4L )
= do.call("rbind",lostI)
lost_enhancers_not_linked_hs_vs_Pt save(lost_enhancers_not_linked_hs_vs_Pt,
file=paste0(outputs_directory,"lost_enhancers_not_linked_hs_vs_Pt.RData"))
= GRanges( seqnames=Rle(lost_enhancers_not_linked_hs_vs_Pt$seqnames),
lost_enhancers_not_linked_hs_vs_Pt_gr ranges = IRanges( lost_enhancers_not_linked_hs_vs_Pt$start,
end=lost_enhancers_not_linked_hs_vs_Pt$end ),
kind=lost_enhancers_not_linked_hs_vs_Pt$type)
names(lost_enhancers_not_linked_hs_vs_Pt_gr) = lost_enhancers_not_linked_hs_vs_Pt$names
export.bed( lost_enhancers_not_linked_hs_vs_Pt_gr, con=paste0(outputs_directory,"lost_enhancers_not_linked_hs_vs_Pt_gr_UCSC.bed"))
seqlevelsStyle(lost_enhancers_not_linked_hs_vs_Pt_gr) = "ncbi"
export.gff( lost_enhancers_not_linked_hs_vs_Pt_gr, con=paste0(outputs_directory,"lost_enhancers_not_linked_hs_vs_Pt_gr.gtf"))
export.bed( lost_enhancers_not_linked_hs_vs_Pt_gr, con=paste0(outputs_directory,"lost_enhancers_not_linked_hs_vs_Pt_gr.bed"))
Again, intersect the positions of mismatches with TFBS inferred for Chimp and lifted over to human.
cd ~/Documents/Tools/bedtools2/
## ------------
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_not_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_not_linked_pt/hs_specieis_enhancers_A.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_not_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_not_linked_pt/hs_specieis_enhancers_A1.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_not_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia2/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_not_linked_pt/hs_specieis_enhancers_A2.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_not_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia3/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_not_linked_pt/hs_specieis_enhancers_A3.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_not_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia4/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_not_linked_pt/hs_specieis_enhancers_A4.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_not_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia5/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_not_linked_pt/hs_specieis_enhancers_A5.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_not_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia6/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_not_linked_pt/hs_specieis_enhancers_A6.bed
./bin/intersectBed -a /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/lost_enhancers_not_linked_hs_vs_Pt_gr_UCSC.bed -b /Volumes/T7/T7_backup_25_07_2023/Chimp_Motif_liftedOver_to_Humans_beds/A-kopia7/*.bed -C -filenames > /Volumes/Backup_4TB/Ciuba_et_all_data_package/data/outputs/TFBS_analysis/lost_enhancers_not_linked_pt/hs_specieis_enhancers_A7.bed
= import.bed(paste0(outputs_directory,"conserved_enhancers_hs_vs_Pt_ucsc.bed"))
conserved_enhancers_hs_vs_Pt = import.bed(paste0(outputs_directory,"conserved_enhancers.bed"))
conserved_enhancers names(conserved_enhancers) = conserved_enhancers$name
= conserved_enhancers[order(width(conserved_enhancers))]
conserved_enhancers = conserved_enhancers[which(!duplicated(names(conserved_enhancers))) ]
conserved_enhancers
= readBedtools_res( filePath=paste0(outputs_directory,"TFBS_analysis/enhancers_linked_with_activation/"),
linked_with_activation_TFBSchange chroms = paste0("chr",c(1:22,'X','Y')),4,7)
= readBedtools_UCSC( filePath=paste0(outputs_directory,"TFBS_analysis/enhancers_linked_with_activation_TFBS_chimp/"),
linked_with_activation_TFBSchange_chimp chroms = paste0("chr",c(1:22,'X','Y')),4,7)
= readBedtools_res( filePath=paste0(outputs_directory,"TFBS_analysis/enhancers_not_linked_with_activation/"),
not_linked_with_activation_TFBSchange chroms = paste0("chr",c(1:22,'X','Y')),4,7)
= readBedtools_res( filePath=paste0(outputs_directory,"TFBS_analysis/conserved_enhancers_Hs/"),
conserved_TFBSchange_Hs chroms = paste0("chr",c(1:22,'X','Y')),4,7)
= readBedtools_UCSC( filePath=paste0(outputs_directory,"TFBS_analysis/conserved_enhancers_Pt/"),
conserved_TFBSchange_Pt chroms = paste0("chr",c(1:22,'X','Y')),4,7)
= readBedtools_UCSC( filePath=paste0(outputs_directory,"TFBS_analysis/lost_enhancers_linked_pt/"),
lost_linked chroms = paste0("chr",c(1:22,'X','Y')),4,7)
= readBedtools_UCSC( filePath=paste0(outputs_directory,"TFBS_analysis/lost_enhancers_not_linked_pt/"),
lost_not_linked chroms = paste0("chr",c(1:22,'X','Y')),4,7)
#### --------------------------------------------------------------
= processTFBSresult(linked_with_activation_TFBSchange,
linked_with_activation_TFBSchange tfanno=TFsEnsemblG,
nameColumn="names")
= processTFBSresult(linked_with_activation_TFBSchange_chimp,
linked_with_activation_TFBSchange_chimp tfanno=TFsEnsemblG,
nameColumn = "names3")
= processTFBSresult(not_linked_with_activation_TFBSchange,
not_linked_with_activation_TFBSchange tfanno=TFsEnsemblG,
nameColumn="names")
= processTFBSresult(conserved_TFBSchange_Hs,
conserved_TFBSchange tfanno=TFsEnsemblG,
nameColumn = "names")
= processTFBSresult(conserved_TFBSchange_Pt,
conserved_TFBSchange_chimp tfanno=TFsEnsemblG,
nameColumn = "names3")
= processTFBSresult(lost_linked,
lost_linked_TFBSchange tfanno=TFsEnsemblG,
nameColumn = "names3")
= processTFBSresult(lost_not_linked,
lost_not_linked_TFBSchange tfanno=TFsEnsemblG,
nameColumn = "names3")
save( linked_with_activation_TFBSchange, not_linked_with_activation_TFBSchange,linked_with_activation_TFBSchange_chimp,
conserved_TFBSchange,conserved_TFBSchange_chimp,
lost_linked_TFBSchange,lost_not_linked_TFBSchange,file=paste0(objects_directory,"evolutionary_changes_in_TFBS.RData" ) )
save(linked_with_activation_TFBSchange,file=paste0(objects_directory,"linked_with_activation_TFBSchange.RData"))
save(linked_with_activation_TFBSchange_chimp,file=paste0(objects_directory,"linked_with_activation_TFBSchange_chimp.Rdata"))
load(paste0(objects_directory,"evolutionary_changes_in_TFBS.RData" ))
= import.bed(paste0(outputs_directory,"conserved_enhancers_hs_vs_Pt_ucsc.bed"))
conserved_enhancers_hs_vs_Pt = import.bed(paste0(outputs_directory,"conserved_enhancers.bed"))
conserved_enhancers names(conserved_enhancers) = conserved_enhancers$name
= conserved_enhancers[order(width(conserved_enhancers))]
conserved_enhancers = conserved_enhancers[which(!duplicated(names(conserved_enhancers))) ] conserved_enhancers
Overall conservation of TFBS - take conserved enhancers
= table( conserved_TFBSchange$TF )
conserved_TFBSchange_table = table(conserved_TFBSchange_chimp$TF)
conserved_TFBSchange_chimp_table = conserved_TFBSchange_table[ match(names(conserved_TFBSchange_chimp_table),names(conserved_TFBSchange_table))]
conserved_TFBSchange_human_table
all(names(conserved_TFBSchange_human_table)==names(conserved_TFBSchange_chimp_table))
## [1] TRUE
= table( linked_with_activation_TFBSchange$TF )
linked_TFBSchange_table = table(linked_with_activation_TFBSchange_chimp$TF)
linked_with_activation_TFBSchange_chimp_table = linked_TFBSchange_table[ match(names(linked_with_activation_TFBSchange_chimp_table),names(linked_TFBSchange_table))]
linked_with_activation_TFBSchange_human_table table(names(linked_with_activation_TFBSchange_human_table)==names(linked_with_activation_TFBSchange_human_table))
##
## TRUE
## 674
boxplot( log2(conserved_TFBSchange_human_table/conserved_TFBSchange_chimp_table),
log2(linked_with_activation_TFBSchange_human_table/linked_with_activation_TFBSchange_chimp_table),
col="white",border=c("green4","turquoise4"),ylab="",
notch=TRUE, outline=FALSE, ylim=c(-1.5,1.5),
ylab="Change in TFBS [log2(human/Chimp)]")
## Warning in (function (z, notch = FALSE, width = NULL, varwidth = FALSE, :
## Duplicated argument ylab = "Change in TFBS [log2(human/Chimp)]" is disregarded
axis(1,at=c(1,2),lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)
t.test( log2(conserved_TFBSchange_human_table/conserved_TFBSchange_chimp_table),
log2(linked_with_activation_TFBSchange_human_table/linked_with_activation_TFBSchange_chimp_table) )
##
## Welch Two Sample t-test
##
## data: log2(conserved_TFBSchange_human_table/conserved_TFBSchange_chimp_table) and log2(linked_with_activation_TFBSchange_human_table/linked_with_activation_TFBSchange_chimp_table)
## t = -8.9693, df = 1006.3, p-value < 0.00000000000000022
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2721183 -0.1744232
## sample estimates:
## mean of x mean of y
## 0.06966084 0.29293161
Conserved and species specific peaks feature TFBS changes frequently
= function(tfbsobj){
howManyChangesPerPeak unlist(lapply(split(start(tfbsobj),tfbsobj$peak),function(x){length(unique(x))}))
}
boxplot( howManyChangesPerPeak( linked_with_activation_TFBSchange ),
howManyChangesPerPeak( not_linked_with_activation_TFBSchange ),
howManyChangesPerPeak( conserved_TFBSchange ),
col="white",outline=FALSE,
names=c('linked','not linked','conserved'),las=2,
border=c('turquoise4','gray80','green4'),
ylab="changes in TFBS per element",lwd=2 )
axis(2,lwd=2,las=2)
axis(1,at=c(1,2,3),lwd=2,c('linked','not linked','conserved'),las=2)
box(col="black",lwd=2)
t.test( howManyChangesPerPeak( linked_with_activation_TFBSchange ),
howManyChangesPerPeak( not_linked_with_activation_TFBSchange ) )
##
## Welch Two Sample t-test
##
## data: howManyChangesPerPeak(linked_with_activation_TFBSchange) and howManyChangesPerPeak(not_linked_with_activation_TFBSchange)
## t = 7.0175, df = 1931.9, p-value = 0.000000000003114
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.7881839 1.3996136
## sample estimates:
## mean of x mean of y
## 6.746499 5.652600
t.test( howManyChangesPerPeak( conserved_TFBSchange ),
howManyChangesPerPeak( not_linked_with_activation_TFBSchange ) )
##
## Welch Two Sample t-test
##
## data: howManyChangesPerPeak(conserved_TFBSchange) and howManyChangesPerPeak(not_linked_with_activation_TFBSchange)
## t = -6.2941, df = 6123.5, p-value = 0.0000000003306
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.6712331 -0.3524108
## sample estimates:
## mean of x mean of y
## 5.140778 5.652600
=table(cut(width(conserved_TFBSchange),c(0,1,1000)))/length(conserved_TFBSchange)
A=table(cut(width(linked_with_activation_TFBSchange),c(0,1,1000)))/length(linked_with_activation_TFBSchange)
B=table(cut(width(not_linked_with_activation_TFBSchange),c(0,1,1000)))/length(not_linked_with_activation_TFBSchange)
C=rbind(A,B,C)
ABC
barplot( 100*ABC, beside=TRUE,col=c("green4","turquoise4","gray80"),ylim=c(0,100),
names=c("MM","Changes>1bp"),ylab="%" )
axis(2,lwd=2)
legend(x=5,y=90,c("Conserved","Linked","Not linked"),cex=1,
pch=15,col=c("green4","turquoise4","gray80"),bty="n")
Any particular TFs? Preparations
## ------------------------------------------------------------
# matrix for each peak
= makeMatrixTFBS4peaks( tfmut=linked_with_activation_TFBSchange,
TFmat_linked_with_activation theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=enhancers_linked_with_activation )
= makeMatrixTFBS4peaks( tfmut=not_linked_with_activation_TFBSchange,
TFmat_not_linked_with_activation theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=enhancers_not_linked_with_activation )
= makeMatrixTFBS4peaks( tfmut=linked_with_activation_TFBSchange_chimp,
TFmat_linked_with_activation_chimp theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=enhancers_linked_with_activation )
= makeMatrixTFBS4peaks( tfmut=conserved_TFBSchange,
TFmat_conserved theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=conserved_enhancers )
= makeMatrixTFBS4peaks( tfmut=conserved_TFBSchange_chimp,
TFmat_conserved_chimp theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=conserved_enhancers )
= makeMatrixTFBS4peaks( tfmut=lost_linked_TFBSchange,
TFmat_lost_linked theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=genuine_lost_enhancers_that_do_something )
= makeMatrixTFBS4peaks( tfmut=lost_not_linked_TFBSchange,
TFmat_lost_not_linked theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=genuine_lost_enhancers_that_do_nothing )
save(TFmat_linked_with_activation,TFmat_not_linked_with_activation,TFmat_linked_with_activation_chimp,
TFmat_conserved, TFmat_conserved_chimp,TFmat_lost_linked,TFmat_lost_not_linked,file=paste0(objects_directory,"TFmatrices_linked_not_linked.RData"))
Assess the significance of the observed differences in frequency
load(paste0(objects_directory,"TFmatrices_linked_not_linked.RData"))
= do.call("rbind",
RES apply(TFmat_linked_with_activation,2,function(x){data.frame( Motif=sum(x>0),
noMotif=sum(x==0) ) } ) )
= do.call("rbind",
SER apply(TFmat_not_linked_with_activation,2,function(x){data.frame( Motif=sum(x>0),
noMotif=sum(x==0) ) } ) )
= data.frame()
TFs_FT
for( i in colnames(TFmat_linked_with_activation) ){
=rbind(linked=RES[rownames(RES)==i,],
mnotLinked=SER[rownames(SER)==i,])
= fisher.test(m)
tp = data.frame(p_val=tp$p.value,
tp odds=tp$estimate,
number_in_linked = RES[rownames(RES)==i,1],
number_in_not_linked = SER[rownames(SER)==i,1],
fraction_in_linked = RES[rownames(RES)==i,1]/rowSums(RES[rownames(RES)==i,]),
fraction_in_not_linked = SER[rownames(SER)==i,1]/rowSums(SER[rownames(SER)==i,]),
tf = i)
=rbind(tp,TFs_FT) }
TFs_FT$p_adjust = p.adjust(TFs_FT$p_val)
TFs_FT$p_adjust_bin = cut(-log10(TFs_FT$p_adjust), c(-1,0,1, seq(2,10,length.out=252),45) )
TFs_FT
par(pty="s")
plot( x=TFs_FT$fraction_in_linked,
y=TFs_FT$fraction_in_not_linked,
pch=19, cex=0.5,
xlab="Linked with activation",
ylab="Not linked with activation",
xlim=c(0,0.3), ylim=c(0,0.3),
col=ifelse(TFs_FT$p_adjust<0.01,"blue3","wheat2"))
abline(a=0,b=1,col='black')
axis(1,lwd=2)
axis(2,lwd=2)
box(col='black',lwd=2)
text(x=TFs_FT$fraction_in_linked[TFs_FT$p_adjust<0.01 & TFs_FT$fraction_in_linked>0.1]+0.005,
y=TFs_FT$fraction_in_not_linked[TFs_FT$p_adjust<0.01 & TFs_FT$fraction_in_linked>0.1]+0.005,
$tf[TFs_FT$p_adjust<0.01 & TFs_FT$fraction_in_linked>0.1],
TFs_FTcex=1)
Odds of seeing that many stripe TFs
= TFs_FT[TFs_FT$p_adjust<0.01 ,]
TFs_FT_filt sum( TFs_FT_filt$tf %in% human_stripe_factors$V1 )/nrow(TFs_FT_filt)
## [1] 0.8651685
= rbind( affected = c(stripe=sum( TFs_FT[TFs_FT$p_adjust<0.01,]$tf %in% human_stripe_factors$V1 ),
m non_stripe = sum( ! TFs_FT[ TFs_FT$p_adjust<0.01,]$tf %in% human_stripe_factors$V1 )),
non_affected = c(stripe=sum( TFs_FT$tf %in% human_stripe_factors$V1 ),
non_stripe = sum( ! TFs_FT$tf %in% human_stripe_factors$V1)) )
m
## stripe non_stripe
## affected 77 12
## non_affected 199 476
fisher.test(m)
##
## Fisher's Exact Test for Count Data
##
## data: m
## p-value < 0.00000000000000022
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 8.046439 31.566055
## sample estimates:
## odds ratio
## 15.28785
par(pty="m",mar=c(5,3,3,1))
barplot( log2(fisher.test(m)$estimate), col="blue3", ylim=c(0,4))
axis(2,lwd=2)
= TFs_FT_filt$tf
keyTFs = keyTFs[keyTFs %in% human_stripe_factors$V1] keyTFs
=(1+colSums(TFmat_lost_linked>0))/nrow(TFmat_lost_linked)
l=(1+colSums(TFmat_lost_not_linked>0))/nrow(TFmat_lost_not_linked) L
= do.call("rbind",
LOS apply(TFmat_lost_linked,2,function(x){data.frame( Motif=1+sum(x>0),
noMotif=1+sum(x==0) ) } ) )
= do.call("rbind",
SOL apply(TFmat_lost_not_linked,2,function(x){data.frame( Motif=1+sum(x>0),
noMotif=1+sum(x==0) ) } ) )
= data.frame()
lost_TFs_FT_chimp
for( i in colnames(TFmat_linked_with_activation) ){
=rbind(linked=LOS[rownames(LOS)==i,],
mnotLinked=SOL[rownames(SOL)==i,])
= fisher.test(m)
tp = data.frame(p_val=tp$p.value,
tp odds=tp$estimate,
number_in_linked = LOS[rownames(LOS)==i,1],
number_in_not_linked = SOL[rownames(SOL)==i,1],
fraction_in_linked = LOS[rownames(LOS)==i,1]/rowSums(LOS[rownames(LOS)==i,]),
fraction_in_not_linked = SOL[rownames(SOL)==i,1]/rowSums(SOL[rownames(SOL)==i,]),
tf = i)
=rbind(tp,lost_TFs_FT_chimp) }
lost_TFs_FT_chimp
par(mfrow=c(1,1),mar=c(10,4,4,4), pty="m")
boxplot( lost_TFs_FT_chimp$odds[lost_TFs_FT_chimp$tf %in% keyTFs],
$odds[lost_TFs_FT_chimp$tf %in% human_stripe_factors$V1],
lost_TFs_FT_chimp$odds[! lost_TFs_FT_chimp$tf %in% human_stripe_factors$V1],
lost_TFs_FT_chimpcol="white",border=c("blue3","steelblue","coral3"),
ylim=c(0,3),ylab="Odds",
outline=FALSE,axes=FALSE)
abline(h=1)
axis(1,lwd=2,at=c(1,2,3),c("77 stripe TFs","All stripe TFs","non-stripe TFs"),las=2)
axis(2,lwd=2)
t.test(lost_TFs_FT_chimp$odds[lost_TFs_FT_chimp$tf %in% keyTFs],
$odds[! lost_TFs_FT_chimp$tf %in% human_stripe_factors$V1]) lost_TFs_FT_chimp
##
## Welch Two Sample t-test
##
## data: lost_TFs_FT_chimp$odds[lost_TFs_FT_chimp$tf %in% keyTFs] and lost_TFs_FT_chimp$odds[!lost_TFs_FT_chimp$tf %in% human_stripe_factors$V1]
## t = 4.2022, df = 332.24, p-value = 0.00003402
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1255168 0.3464608
## sample estimates:
## mean of x mean of y
## 1.374035 1.138047
t.test(lost_TFs_FT_chimp$odds[lost_TFs_FT_chimp$tf %in% human_stripe_factors$V1],
$odds[! lost_TFs_FT_chimp$tf %in% human_stripe_factors$V1]) lost_TFs_FT_chimp
##
## Welch Two Sample t-test
##
## data: lost_TFs_FT_chimp$odds[lost_TFs_FT_chimp$tf %in% human_stripe_factors$V1] and lost_TFs_FT_chimp$odds[!lost_TFs_FT_chimp$tf %in% human_stripe_factors$V1]
## t = 2.4189, df = 346.62, p-value = 0.01608
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.03773178 0.36604516
## sample estimates:
## mean of x mean of y
## 1.339935 1.138047
par(mfrow=c(1,1), pty="m")
= rbind( linked=table(cut(rowSums(TFmat_linked_with_activation[,keyTFs]),
m c(-Inf,0,1,2,4,Inf) ) )/nrow(TFmat_linked_with_activation),
not_linked=table(cut(rowSums(TFmat_not_linked_with_activation[,keyTFs]),
c(-Inf,0,1,2,4,Inf) ) )/nrow(TFmat_not_linked_with_activation))
par(mfrow=c(2,1),mar=c(4,4,1,1),pty="m")
barplot(m,beside=TRUE,col=c("turquoise4","gray70"),ylim=c(0,0.5),
names=c(0,1,2,4,">4"),ylab="Francion of sequences with TFs")
axis(2,lwd=2)
= c("SOX9","SOX2","NFIA","NFIB","AFT3","RUNX2","NR1F2","DBX2","LHX2","STAT3")
astroTFs = astroTFs[astroTFs %in% colnames(TFmat_linked_with_activation)]
astroTFs # par(mfrow=c(1,1))
= rbind( linked=table(cut(rowSums(TFmat_linked_with_activation[,astroTFs]),
M c(-Inf,0,1,2,4,Inf) ) )/nrow(TFmat_linked_with_activation),
not_linked=table(cut(rowSums(TFmat_not_linked_with_activation[,astroTFs]),
c(-Inf,0,1,2,4,Inf) ) )/nrow(TFmat_not_linked_with_activation))
barplot(M,beside=TRUE,col=c("turquoise4","gray70"),ylim=c(0,1),
names=c(0,1,2,4,">4"),ylab="Francion of sequences with TFs")
axis(2,lwd=2)
table(cut(rowSums(TFmat_linked_with_activation[,keyTFs]),
c(-Inf,0,1,2,4,Inf) ) )
##
## (-Inf,0] (0,1] (1,2] (2,4] (4, Inf]
## 325 185 102 147 684
1443 - sum(rowSums(TFmat_linked_with_activation[,keyTFs])==0)
## [1] 1118
How many changes in TF not being stripe factors?
= TFmat_linked_with_activation[,!colnames(TFmat_linked_with_activation) %in% human_stripe_factors$V1]
not_stripeTFs_changes table(rowSums(not_stripeTFs_changes>0))
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## 25 27 51 64 67 68 67 66 77 59 82 73 71 65 47 61 60 42 37 35 33 25 29 23 24 19
## 26 27 28 29 30 31 32 33 34 35 36 37 38 40 41 42 43 44 45 46 47 48 50 53 54 56
## 20 19 16 9 11 15 10 7 3 8 2 3 2 2 1 2 1 2 3 1 1 1 1 1 1 1
## 66 68 70
## 1 1 1
Export these enhancers
= TFmat_linked_with_activation[,keyTFs]
enhancers__stripe_factors = TFmat_linked_with_activation[rowSums(enhancers__stripe_factors)>0,]
enhancers_with_stripe_factors = TFmat_linked_with_activation[rowSums(enhancers__stripe_factors)==0,]
enhancers_wo_stripe_factors
save(enhancers_with_stripe_factors,
enhancers_wo_stripe_factors,file=paste0(objects_directory,"enhancers_stripeTFs_no_stripeTFs.RData"))
= function(tfmut,theTFs,allPeaks ){
numberTFperPeak # tfmut=linked_with_activation_TFBSchange_Hs_spe;theTFs=unique(TFsEnsemblG$Fixed);allPeaks=enhancers_linked_with_activationII
= matrix(0L,
res nrow=length(allPeaks),
ncol=length(unique(theTFs)) )
= as.data.frame(res)
res rownames(res) = names(allPeaks)
colnames(res) = unique(theTFs)
= split( tfmut$peak, tfmut$TF )
tp for( tf in unique(theTFs)) {
# tf="AFX3"
= which( colnames(res)==tf )
thisC = which( rownames(res) %in% tp[[tf]])
theseRows = table( tp[[tf]] )
numbers4rows = cbind(row=theseRows,
coordinates col=rep(thisC,length(theseRows)),
number = numbers4rows[match(rownames(res)[theseRows],names(numbers4rows))])
if(nrow(coordinates)>0){
cbind( coordinates[,1], coordinates[,2]) ] = coordinates[,3] }
res[
}return(res) }
## -----------------------
= linked_with_activation_TFBSchange[-queryHits(findOverlaps(linked_with_activation_TFBSchange,linked_with_activation_TFBSchange_chimp))]
linked_with_activation_TFBSchange_Hs_spe
= linked_with_activation_TFBSchange_chimp[-queryHits(findOverlaps(linked_with_activation_TFBSchange_chimp,linked_with_activation_TFBSchange))]
linked_with_activation_TFBSchange_Pt_spe
= numberTFperPeak( tfmut=linked_with_activation_TFBSchange_Hs_spe,
TFmat_linked_with_activation_Hs theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=enhancers_linked_with_activation )
= numberTFperPeak( tfmut=linked_with_activation_TFBSchange_Pt_spe,
TFmat_linked_with_activation_Pt theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=enhancers_linked_with_activation )
= conserved_TFBSchange[-queryHits(findOverlaps(conserved_TFBSchange,conserved_TFBSchange_chimp))]
conserved_TFBSchange_Hs_spe = conserved_TFBSchange_chimp[-queryHits(findOverlaps(conserved_TFBSchange_chimp,conserved_TFBSchange))]
conserved_TFBSchange_Pt_spe
= numberTFperPeak( tfmut=conserved_TFBSchange_Hs_spe,
TFmat_conserved_Hs theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=conserved_enhancers )
= numberTFperPeak( tfmut=conserved_TFBSchange_Pt_spe,
TFmat_conserved_Pt theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=conserved_enhancers )
all(rownames(TFmat_conserved_Hs)==rownames(TFmat_conserved_Pt))
## [1] TRUE
= (TFmat_conserved_Hs>TFmat_conserved_Pt)
net_TFBS_gain_conserved = (TFmat_conserved_Hs<TFmat_conserved_Pt)
net_TFBS_loss_conserved = (TFmat_linked_with_activation_Hs>TFmat_linked_with_activation_Pt)
net_TFBS_gain_linked = (TFmat_linked_with_activation_Hs<TFmat_linked_with_activation_Pt)
net_TFBS_loss_linked
## -----------------------
par(mfrow=c(1,2),pty="s",mar=c(4,4,3,3))
plot(colSums(net_TFBS_gain_linked),
colSums(net_TFBS_loss_linked),pch=19, cex=0.5,
main="Linked",ylab="TFBS loss",xlab="Gain in TFBS",
ylim=c(0,60),xlim=c(0,60),
col=ifelse(names(colSums(net_TFBS_gain_linked)) %in% keyTFs,"blue","gray"))
abline(a=0,b=1)
axis(1,lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)
plot(colSums(net_TFBS_gain_conserved),colSums(net_TFBS_loss_conserved), pch=19, cex=0.5,
main="Conserved",ylab="TFBS loss",xlab="Gain in TFBS" ,ylim=c(0,60),xlim=c(0,60),
col=ifelse(names(colSums(net_TFBS_gain_conserved)) %in% keyTFs,"blue","gray"))
abline(a=0,b=1)
axis(1,lwd=2)
axis(2,lwd=2)
box(col="black",lwd=2)
= HS_UP_Genes$ensembl_id
up_set = promoters_filtered_gr[ which( promoters_filtered_gr$gene_id %in% up_set ) ]
promoters_HITS_UP = resize(promoters_HITS_UP,1000000,fix="center")
promoters_HITS_UP_500
= data.frame( with_stripeTF = countOverlaps(promoters_HITS_UP_500,enhancers_linked_with_activation[which(names(enhancers_linked_with_activation) %in% rownames(enhancers_with_stripe_factors))]),
promoters_HITS_UP_500_counting wo_stripeTF = countOverlaps(promoters_HITS_UP_500,enhancers_linked_with_activation[which(names(enhancers_linked_with_activation) %in% rownames(enhancers_wo_stripe_factors))]),
any = countOverlaps(promoters_HITS_UP_500,genuine_gained_enhancers_gr) )
## stripe no stripe
= rownames(promoters_HITS_UP_500_counting[promoters_HITS_UP_500_counting$with_stripeTF>0& promoters_HITS_UP_500_counting$wo_stripeTF>0,])
prom_with_with = rownames(promoters_HITS_UP_500_counting[promoters_HITS_UP_500_counting$with_stripeTF>0 & promoters_HITS_UP_500_counting$wo_stripeTF==0,])
prom_with_wo
= rownames(promoters_HITS_UP_500_counting[promoters_HITS_UP_500_counting$with_stripeTF==0 & promoters_HITS_UP_500_counting$wo_stripeTF>0,])
prom_wo_with = rownames(promoters_HITS_UP_500_counting[promoters_HITS_UP_500_counting$with_stripeTF==0 & promoters_HITS_UP_500_counting$wo_stripeTF==0,])
prom_wo_wo
= promoters_HITS_UP_500_counting[rowSums(promoters_HITS_UP_500_counting[,1:2])>0,]
promoters_HITS_UP_500_counting_enh
= promoters_HITS_UP_500_counting_enh>0
m = m[order(m[,1],m[,2]),1:2]
m
par(mar=c(1,1,1,1))
image(t(m),col=c("white","coral2"),axes=FALSE)
box(col="black",lwd=2)
abline(v=0.5,lwd=2)
sum(m[,1]==0 & m[,2]>0)
## [1] 29
sum(m[,1]>0 & m[,2]>0)
## [1] 258
sum(m[,1]>0 & m[,2]==0)
## [1] 299
= function(TFdir,SPECIES){
readFootprintAnalysis_bed = as.list( unlist(strsplit(list.files(TFdir),"_FootPrints")) )
allF =do.call("rbind",lapply(allF,function(x){
res# x = allF[[1]]
=read.delim(paste0(TFdir,"/",x,"_FootPrints/",x,".bed"),
tpsep="\t",header=FALSE )
return( data.frame(score=tp$V5, TF=unlist(strsplit(x,"_"))[1], species=SPECIES ) ) }))
return(res) }
= readFootprintAnalysis_bed(paste0(outputs_directory,"footprint_analysis/Stripe_TF_HG38_Footprints_10bp/"), "Human")
footprintHg = readFootprintAnalysis_bed(paste0(outputs_directory,"footprint_analysis/Stripe_TF_PT06_Footprints_10bp/"), "Chimpanzee")
footprintPt
= footprintHg[footprintHg$TF %in% TFsEnsemblG[ TFsEnsemblG$Fixed %in% keyTFs,1], ]
footprintHg_keyTFs = footprintPt[footprintPt$TF %in% TFsEnsemblG[ TFsEnsemblG$Fixed %in% keyTFs,1], ]
footprintPt_keyTFs
= rbind(footprintHg,footprintPt)
footprint_scores $species = factor(footprint_scores$species,levels=c("Human","Chimpanzee"))
footprint_scores=ggboxplot(footprint_scores, x="TF", y="score",color = "species",
p1palette=c("black","red"),outlier.shape = NA,rotate = TRUE)
ggpar(p1,ylim = c(0,500)) + rotate_x_text(90)
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.
= import.bed(paste0(outputs_directory,"enhancers_linked_with_activation_TADs.bed"))
enhancers_linked_with_activation_TADs names(enhancers_linked_with_activation_TADs) = enhancers_linked_with_activation_TADs$name
= import.bed(paste0(outputs_directory,"enhancers_not_linked_with_activation_TADs.bed"))
enhancers_not_linked_with_activation_TADs names(enhancers_not_linked_with_activation_TADs) = enhancers_not_linked_with_activation_TADs$name
= readBedtools_res( filePath=paste0(outputs_directory,"/TFBS_analysis/enhancers_linked_with_activation_TADs/"),
linked_with_activation_TADs_TFBSchange chroms = paste0("chr",c(1:22,'X','Y')),4,7)
= readBedtools_res( filePath=paste0(outputs_directory,"/TFBS_analysis/enhancers_not_linked_with_activation_TADs/"),
not_linked_with_activation_TADs_TFBSchange chroms = paste0("chr",c(1:22,'X','Y')),4,7)
= processTFBSresult(linked_with_activation_TADs_TFBSchange,
linked_with_activation_TADs_TFBSchange tfanno=TFsEnsemblG,
nameColumn="names")
= processTFBSresult(not_linked_with_activation_TADs_TFBSchange,
not_linked_with_activation_TADs_TFBSchange tfanno=TFsEnsemblG,
nameColumn="names")
save(linked_with_activation_TADs_TFBSchange,
not_linked_with_activation_TADs_TFBSchange,file=paste0(objects_directory,"linked_or_not_with_activation_TADs_TFBSchange_chimp.RData"))
= makeMatrixTFBS4peaks( tfmut=linked_with_activation_TADs_TFBSchange,
TFmat_linked_with_activation_TADs theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=enhancers_linked_with_activation_TADs )
= makeMatrixTFBS4peaks( tfmut=not_linked_with_activation_TADs_TFBSchange,
TFmat_not_linked_with_activation_TADs theTFs=unique(TFsEnsemblG$Fixed),
allPeaks=enhancers_not_linked_with_activation_TADs )
save(TFmat_linked_with_activation_TADs,TFmat_not_linked_with_activation_TADs,
file=paste0(objects_directory,"TFmatrices_linked_not_linked_TADs.RData"))
load(paste0(objects_directory,"TFmatrices_linked_not_linked_TADs.RData"))
= do.call("rbind",
TAL apply(TFmat_linked_with_activation_TADs,2,function(x){data.frame( Motif=sum(x>0),
noMotif=sum(x==0) ) } ) )
= do.call("rbind",
LAT apply(TFmat_not_linked_with_activation_TADs,2,function(x){data.frame( Motif=sum(x>0),
noMotif=sum(x==0) ) } ) )
= data.frame()
TFs_TAD_FT for( i in colnames(TFmat_linked_with_activation_TADs) ){
=rbind(linked=TAL[rownames(TAL)==i,],
mnotLinked=LAT[rownames(LAT)==i,])
= fisher.test(m)
tp = data.frame(p_val=tp$p.value,
tp odds=tp$estimate,
number_in_linked = TAL[rownames(TAL)==i,1],
number_in_not_linked = LAT[rownames(LAT)==i,1],
fraction_in_linked = TAL[rownames(TAL)==i,1]/rowSums(TAL[rownames(TAL)==i,]),
fraction_in_not_linked = LAT[rownames(LAT)==i,1]/rowSums(LAT[rownames(LAT)==i,]),
tf = i)
=rbind(tp,TFs_TAD_FT) }
TFs_TAD_FT
$p_adjust = p.adjust(TFs_TAD_FT$p_val)
TFs_TAD_FT
par(pty="s",mfrow=c(1,1))
plot( x=TFs_TAD_FT$fraction_in_linked,
y=TFs_TAD_FT$fraction_in_not_linked,
pch=19, cex=0.5,
xlab="Linked with activation",
ylab="Not linked with activation",
xlim=c(0,0.3), ylim=c(0,0.3),
col=ifelse(TFs_TAD_FT$p_adjust<0.05 ,"blue3","wheat2"))
abline(a=0,b=1,col='black')
axis(1,lwd=2)
axis(2,lwd=2)
box(col='black',lwd=2)
text(x=TFs_TAD_FT$fraction_in_linked[TFs_TAD_FT$p_adjust<0.05 ]+0.005,
y=TFs_TAD_FT$fraction_in_not_linked[TFs_TAD_FT$p_adjust<0.05 ]+0.005,
$tf[TFs_TAD_FT$p_adjust<0.05 ],
TFs_TAD_FTcex=1)
$p_val<0.01 & TFs_TAD_FT$fraction_in_linked>0.1,] TFs_TAD_FT[TFs_TAD_FT
## p_val odds number_in_linked
## odds ratio673 0.00150988966405420457 1.608891 64
## odds ratio669 0.00061743780893750864 1.683180 63
## odds ratio664 0.00000001553881099418 1.845451 143
## odds ratio645 0.00002249720073059971 1.586955 135
## odds ratio639 0.00547001523869697359 1.401620 100
## odds ratio636 0.00001844604778766271 1.738707 91
## odds ratio635 0.00000177955637888710 1.693353 133
## odds ratio626 0.00051003649052152309 1.607932 79
## odds ratio623 0.00004148693474305783 1.610882 117
## odds ratio616 0.00020763714135284235 1.702754 72
## odds ratio603 0.00000000100972795250 2.345061 86
## odds ratio587 0.00044599744467552140 1.518042 109
## odds ratio584 0.00005385690669902456 1.799449 72
## odds ratio582 0.00000037117379643772 1.768726 130
## odds ratio579 0.00003393160944478522 1.537540 153
## odds ratio566 0.00000000003216698365 2.880740 68
## odds ratio538 0.00000410606699485287 1.786143 98
## odds ratio537 0.00000000049265940835 2.234227 101
## odds ratio514 0.00000028707333831881 1.828129 117
## odds ratio513 0.00000196034299164705 1.876367 89
## odds ratio512 0.00000000000004617375 2.398890 131
## odds ratio511 0.00000000010021448003 2.256986 106
## odds ratio478 0.00003385372076594007 1.704218 93
## odds ratio473 0.00003225347827537949 1.725282 90
## odds ratio458 0.00000033506023894474 1.969773 91
## odds ratio457 0.00000010818835810204 2.112920 80
## odds ratio408 0.00000000256319916115 1.926115 141
## odds ratio308 0.00000138201327477164 1.652499 155
## odds ratio287 0.00000002345307931912 2.008273 105
## odds ratio286 0.00000025777319341452 2.094659 77
## odds ratio284 0.00000000811899658359 2.205526 87
## odds ratio283 0.00000000007428138142 2.293758 103
## odds ratio282 0.00001458947286974133 1.698132 104
## odds ratio96 0.00000578088929590625 1.670073 124
## odds ratio95 0.00000000624475659928 2.205966 88
## odds ratio57 0.00001776370364941432 1.818494 78
## number_in_not_linked fraction_in_linked fraction_in_not_linked
## odds ratio673 316 0.1024 0.06620574
## odds ratio669 298 0.1008 0.06243453
## odds ratio664 661 0.2288 0.13848732
## odds ratio645 706 0.2160 0.14791536
## odds ratio639 571 0.1600 0.11963126
## odds ratio636 426 0.1456 0.08925204
## odds ratio635 657 0.2128 0.13764928
## odds ratio626 394 0.1264 0.08254766
## odds ratio623 597 0.1872 0.12507857
## odds ratio616 339 0.1152 0.07102451
## odds ratio603 304 0.1376 0.06369160
## odds ratio587 583 0.1744 0.12214540
## odds ratio584 322 0.1152 0.06746281
## odds ratio582 617 0.2080 0.12926880
## odds ratio579 831 0.2448 0.17410434
## odds ratio566 194 0.1088 0.04064530
## odds ratio538 450 0.1568 0.09428033
## odds ratio537 379 0.1616 0.07940499
## odds ratio514 534 0.1872 0.11187932
## odds ratio513 388 0.1424 0.08129059
## odds ratio512 475 0.2096 0.09951812
## odds ratio511 396 0.1696 0.08296669
## odds ratio478 444 0.1488 0.09302326
## odds ratio473 424 0.1440 0.08883302
## odds ratio458 380 0.1456 0.07961450
## odds ratio457 310 0.1280 0.06494867
## odds ratio408 627 0.2256 0.13136392
## odds ratio308 794 0.2480 0.16635240
## odds ratio287 436 0.1680 0.09134716
## odds ratio286 300 0.1232 0.06285355
## odds ratio284 326 0.1392 0.06830086
## odds ratio283 378 0.1648 0.07919547
## odds ratio282 502 0.1664 0.10517494
## odds ratio96 616 0.1984 0.12905929
## odds ratio95 330 0.1408 0.06913891
## odds ratio57 347 0.1248 0.07270061
## tf p_adjust
## odds ratio673 ZSCAN22 0.92254258473711903
## odds ratio669 ZNF76 0.38281144154125535
## odds ratio664 ZNF770 0.00001034884812212
## odds ratio645 ZNF467 0.01444320286904501
## odds ratio639 ZNF394 1.00000000000000000
## odds ratio636 ZNF350 0.01189770082304245
## odds ratio635 ZNF341 0.00116738898454994
## odds ratio626 ZNF281 0.31724269710438735
## odds ratio623 ZNF263 0.02638569049658478
## odds ratio616 ZNF148 0.13060376191093784
## odds ratio603 ZFX 0.00000067651772818
## odds ratio587 ZBTB17 0.27830240547752533
## odds ratio584 ZNF324 0.03419913575388060
## odds ratio582 WT1 0.00024460353185246
## odds ratio579 VEZF1 0.02168229843521776
## odds ratio566 THAP1 0.00000002168054698
## odds ratio538 TBX15 0.00267715568064407
## odds ratio537 TBX1 0.00000033057446300
## odds ratio514 SP4 0.00018975547662873
## odds ratio513 SP3 0.00128402465952881
## odds ratio512 SP2 0.00000000003116728
## odds ratio511 SP1 0.00000006734413058
## odds ratio478 RXRA 0.02166638129020165
## odds ratio473 RREB1 0.02067447957451825
## odds ratio458 RARA 0.00022113975770353
## odds ratio457 PURA 0.00007183706977975
## odds ratio408 PATZ1 0.00000171478023881
## odds ratio308 MAZ 0.00090798272152497
## odds ratio287 KLF6 0.00001559629774721
## odds ratio286 KLF5 0.00017064585404041
## odds ratio284 KLF3 0.00000541537072126
## odds ratio283 KLF16 0.00000004999136969
## odds ratio282 KLF15 0.00943938894672264
## odds ratio96 EGR2 0.00376335893163497
## odds ratio95 EGR1 0.00000417149740832
## odds ratio57 NR2F1 0.01147535255752165
=64000*1024^2
msoptions(future.globals.maxSize=ms)
=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_F_667_S13_SingleCell/raw_feature_bc_matrix/')
human1=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_F_698_S15_SingleCell/raw_feature_bc_matrix/')
human2=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_M_700_S14_SingleCell/raw_feature_bc_matrix/')
human3=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_F_702_9C_SingleCell/raw_feature_bc_matrix/')
human4=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_F_667_60C_SingleCell/raw_feature_bc_matrix/')
human5=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_F_669_3C_SingleCell/raw_feature_bc_matrix/')
human6=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_F_671_64C_SingleCell/raw_feature_bc_matrix/')
human7=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_M_673_62C_SingleCell/raw_feature_bc_matrix/')
human8=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_M_675_23C_SingleCell/raw_feature_bc_matrix/')
human9=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_M_677_63C_SingleCell/raw_feature_bc_matrix/')
human10=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_M_679_30C_SingleCell/raw_feature_bc_matrix/')
human11=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_M_681_34C_SingleCell/raw_feature_bc_matrix/')
human12=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_F_686_56C_SingleCell/raw_feature_bc_matrix/')
human13=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_M_690_26C_SingleCell/raw_feature_bc_matrix/')
human14=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_F_688_11C_SingleCell/raw_feature_bc_matrix/')
human15=paste0(outputs_directory,'/scRNA_published_data/HSapiens_SingleCell_PRJNA899373/HS_Prenatal_M_692_24C_SingleCell/raw_feature_bc_matrix/')
human16
=paste0(outputs_directory,'/scRNA_published_data/RhMacaque/scRNA_syn17093056_RMB683_DFC/multi/count/raw_feature_bc_matrix/')
macaque1=paste0(outputs_directory,'/scRNA_published_data/RhMacaque/scRNA_syn17093056_RMB691_DFC/multi/count/raw_feature_bc_matrix/')
macaque2=paste0(outputs_directory,'scRNA_published_data/RhMacaque/scRNA_SRR23687004_macaque/raw_feature_bc_matrix/')
macaque_Ch_78_1=paste0(outputs_directory,'scRNA_published_data/RhMacaque/E110_SRR23687017_M_DFC_scRNA/raw_feature_bc_matrix/')
macaque_Ch_110_DFC=paste0(outputs_directory,'scRNA_published_data/RhMacaque/E110_SRR23686999_M_OFC_scRNA/raw_feature_bc_matrix/')
macaque_Ch_110_OFC=paste0(outputs_directory,'scRNA_published_data/RhMacaque/E93_SRR23687065_M_DFC_scRNA/raw_feature_bc_matrix/')
macaque_Ch_93_DFC=paste0(outputs_directory,'scRNA_published_data/RhMacaque/E110_SRR23687057_M_DFC_scRNA/raw_feature_bc_matrix/')
macaque_Ch_110_DFC_S2=paste0(outputs_directory,'scRNA_published_data/RhMacaque/E110_SRR23687060_M_VFC_scRNA/raw_feature_bc_matrix/')
macaque_Ch_110_VFC=paste0(outputs_directory,'scRNA_published_data/RhMacaque/E110_SRR23687012_M_VFC_scRNA/raw_feature_bc_matrix/')
macaque_Ch_110_VFC_S2=paste0(outputs_directory,'scRNA_published_data/RhMacaque/E77_F_Frontal/raw_feature_bc_matrix/')
macaque_Ch_77_Frontal=paste0(outputs_directory,'scRNA_published_data/RhMacaque/E64_F_Frontal/raw_feature_bc_matrix/')
macaque_Ch_64_Frontal=paste0(outputs_directory,'scRNA_published_data/RhMacaque/E62_F_frontal_scRNA/raw_feature_bc_matrix/')
macaque_Ch_62_Frontal
= read.csv( paste0(outputs_directory,'/scRNA_published_data/GSE217511_CorticalPlate_Seuratmetadata.csv' ))
human_metadata = paste0(outputs_directory,'/scRNA_published_data/MetaTable.txt' )
human_sample_anno $UMI = unlist(lapply(strsplit(human_metadata$X,"_"),function(x){x[[1]]})) human_metadata
Let’s consider the data from the foetal like cells
=Read10X( human1 )
human1_expression=Read10X( human2 )
human2_expression=Read10X( human3 )
human3_expression=Read10X( human4 )
human4_expression=Read10X( human5 )
human5_expression=Read10X( human6 )
human6_expression=Read10X( human7 )
human7_expression=Read10X( human8 )
human8_expression=Read10X( human9 )
human9_expression=Read10X( human10 )
human10_expression=Read10X( human11 )
human11_expression=Read10X( human12 )
human12_expression=Read10X( human13 )
human13_expression=Read10X( human14 )
human14_expression=Read10X( human15 )
human15_expression=Read10X( human16 )
human16_expression
=Read10X( macaque1 )
macaque1_expression=Read10X( macaque2 )
macaque2_expression=Read10X( macaque_Ch_78_1 )
macaque_CN_78_1_expression=Read10X( macaque_Ch_110_DFC )
macaque_DFC_110_1_expression=Read10X( macaque_Ch_110_OFC )
macaque_OFC_110_1_expression= Read10X( macaque_Ch_93_DFC )
macaque_93_DFC_expression= Read10X( macaque_Ch_110_DFC_S2 )
macaque_110_DFC_S2_expression=Read10X( macaque_Ch_110_VFC)
macaque_110_VFC_expression=Read10X( macaque_Ch_110_VFC_S2)
macaque_110_VFC_S2_expression=Read10X( macaque_Ch_77_Frontal)
macaque_77_Frontal_expression=Read10X( macaque_Ch_64_Frontal)
macaque_64_Frontal_expression=Read10X( macaque_Ch_62_Frontal)
macaque_62_Frontal_expression
## ---------------------------
= CreateSeuratObject(human1_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human1_expression = CreateSeuratObject(human2_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human2_expression = CreateSeuratObject(human3_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human3_expression = CreateSeuratObject(human4_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human4_expression= CreateSeuratObject(human5_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human5_expression= CreateSeuratObject(human6_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human6_expression= CreateSeuratObject(human7_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human7_expression= CreateSeuratObject(human8_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human8_expression= CreateSeuratObject(human9_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human9_expression= CreateSeuratObject(human10_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human10_expression= CreateSeuratObject(human11_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human11_expression= CreateSeuratObject(human12_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human12_expression= CreateSeuratObject(human13_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human13_expression= CreateSeuratObject(human14_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human14_expression= CreateSeuratObject(human15_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human15_expression= CreateSeuratObject(human16_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
human16_expression
= CreateSeuratObject(macaque1_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque1_expression = CreateSeuratObject(macaque2_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque2_expression = CreateSeuratObject(macaque_CN_78_1_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque_CN_78_1_expression = CreateSeuratObject(macaque_DFC_110_1_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque_DFC_110_1_expression = CreateSeuratObject(macaque_OFC_110_1_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque_OFC_110_1_expression = CreateSeuratObject(macaque_93_DFC_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque_93_DFC_expression= CreateSeuratObject(macaque_110_DFC_S2_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque_110_DFC_S2_expression= CreateSeuratObject(macaque_110_VFC_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque_110_VFC_expression= CreateSeuratObject(macaque_110_VFC_S2_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque_110_VFC_S2_expression= CreateSeuratObject(macaque_77_Frontal_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque_77_Frontal_expression= CreateSeuratObject(macaque_64_Frontal_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque_64_Frontal_expression= CreateSeuratObject(macaque_62_Frontal_expression, project = "AstroEvo_old", min.cells = 3, min.features = 200)
macaque_62_Frontal_expression
## ----------------
"percent.mt"]] <- PercentageFeatureSet(human1_expression, pattern = "^MT-")
human1_expression[["percent.mt"]] <- PercentageFeatureSet(human2_expression, pattern = "^MT-")
human2_expression[["percent.mt"]] <- PercentageFeatureSet(human3_expression, pattern = "^MT-")
human3_expression[["percent.mt"]] <- PercentageFeatureSet(human4_expression, pattern = "^MT-")
human4_expression[["percent.mt"]] <- PercentageFeatureSet(human5_expression, pattern = "^MT-")
human5_expression[["percent.mt"]] <- PercentageFeatureSet(human6_expression, pattern = "^MT-")
human6_expression[["percent.mt"]] <- PercentageFeatureSet(human7_expression, pattern = "^MT-")
human7_expression[["percent.mt"]] <- PercentageFeatureSet(human8_expression, pattern = "^MT-")
human8_expression[["percent.mt"]] <- PercentageFeatureSet(human9_expression, pattern = "^MT-")
human9_expression[["percent.mt"]] <- PercentageFeatureSet(human10_expression, pattern = "^MT-")
human10_expression[["percent.mt"]] <- PercentageFeatureSet(human11_expression, pattern = "^MT-")
human11_expression[["percent.mt"]] <- PercentageFeatureSet(human12_expression, pattern = "^MT-")
human12_expression[["percent.mt"]] <- PercentageFeatureSet(human13_expression, pattern = "^MT-")
human13_expression[["percent.mt"]] <- PercentageFeatureSet(human14_expression, pattern = "^MT-")
human14_expression[["percent.mt"]] <- PercentageFeatureSet(human15_expression, pattern = "^MT-")
human15_expression[["percent.mt"]] <- PercentageFeatureSet(human16_expression, pattern = "^MT-")
human16_expression[[
"percent.mt"]] <- PercentageFeatureSet(macaque1_expression, pattern = "^MT-")
macaque1_expression[["percent.mt"]] <- PercentageFeatureSet(macaque2_expression, pattern = "^MT-")
macaque2_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_CN_78_1_expression, pattern = "^MT-")
macaque_CN_78_1_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_DFC_110_1_expression, pattern = "^MT-")
macaque_DFC_110_1_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_OFC_110_1_expression, pattern = "^MT-")
macaque_OFC_110_1_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_93_DFC_expression, pattern = "^MT-")
macaque_93_DFC_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_110_DFC_S2_expression, pattern = "^MT-")
macaque_110_DFC_S2_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_110_VFC_expression, pattern = "^MT-")
macaque_110_VFC_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_110_VFC_S2_expression, pattern = "^MT-")
macaque_110_VFC_S2_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_77_Frontal_expression, pattern = "^MT-")
macaque_77_Frontal_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_64_Frontal_expression, pattern = "^MT-")
macaque_64_Frontal_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_62_Frontal_expression, pattern = "^MT-")
macaque_62_Frontal_expression[[
## --------------------------------------------
$orig.ident = 'human1'
human1_expression$orig.ident = 'human2'
human2_expression$orig.ident = 'human3'
human3_expression$orig.ident = 'human4'
human4_expression$orig.ident = 'human5'
human5_expression$orig.ident = 'human6'
human6_expression$orig.ident = 'human7'
human7_expression$orig.ident = 'human8'
human8_expression$orig.ident = 'human9'
human9_expression$orig.ident = 'human10'
human10_expression$orig.ident = 'human11'
human11_expression$orig.ident = 'human12'
human12_expression$orig.ident = 'human13'
human13_expression$orig.ident = 'human14'
human14_expression$orig.ident = 'human15'
human15_expression$orig.ident = 'human16'
human16_expression
$orig.ident = 'Macaque1'
macaque1_expression$orig.ident = 'Macaque2'
macaque2_expression$orig.ident = 'Macaque3_78_1'
macaque_CN_78_1_expression$orig.ident = 'Macaque3_110_DFC'
macaque_DFC_110_1_expression$orig.ident = 'Macaque3_110_OFC'
macaque_OFC_110_1_expression$orig.ident = 'Macaque3_93_DFC'
macaque_93_DFC_expression$orig.ident ='Macaque3_110_DFC_S2'
macaque_110_DFC_S2_expression$orig.ident ='Macaque3_110_VFC'
macaque_110_VFC_expression$orig.ident ='Macaque3_110_VFC_S2'
macaque_110_VFC_S2_expression$orig.ident ='Macaque3_77_Frontal'
macaque_77_Frontal_expression$orig.ident ='Macaque3_64_Frontal'
macaque_64_Frontal_expression$orig.ident ='Macaque3_62_Frontal'
macaque_62_Frontal_expression
## ------------------------------------------
= rownames(human1_expression)
all_genes
= CellCycleScoring(human1_expression,
human1_expression g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(human2_expression,
human2_expression g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(human3_expression,
human3_expression g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(human4_expression,
human4_expression g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(human5_expression,
human5_expression g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(human6_expression,
human6_expression g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(human7_expression,
human7_expression g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(human8_expression,
human8_expression g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(human9_expression,
human9_expression g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(human10_expression,
human10_expression g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(human11_expression,
human11_expression g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(human12_expression,
human12_expression g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(human13_expression,
human13_expression g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(human14_expression,
human14_expression g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(human15_expression,
human15_expression g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(human16_expression,
human16_expression g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(macaque1_expression,
macaque1_expression g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(macaque2_expression,
macaque2_expression g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(macaque_CN_78_1_expression,
macaque_CN_78_1_expression g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(macaque_DFC_110_1_expression,
macaque_DFC_110_1_expression g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(macaque_OFC_110_1_expression,
macaque_OFC_110_1_expression g2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(macaque_93_DFC_expression,
macaque_93_DFC_expressiong2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(macaque_110_DFC_S2_expression,
macaque_110_DFC_S2_expressiong2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(macaque_110_VFC_expression,
macaque_110_VFC_expressiong2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(macaque_110_VFC_S2_expression,
macaque_110_VFC_S2_expressiong2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(macaque_77_Frontal_expression,
macaque_77_Frontal_expressiong2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(macaque_64_Frontal_expression,
macaque_64_Frontal_expressiong2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
= CellCycleScoring(macaque_62_Frontal_expression,
macaque_62_Frontal_expressiong2m.features = cc.genes$g2m.genes[ cc.genes$g2m.genes %in% all_genes ],
s.features = cc.genes$s.genes[ cc.genes$s.genes %in% all_genes ],
set.ident = FALSE )
"percent.mt"]] <- PercentageFeatureSet(human1_expression, pattern = "^MT-")
human1_expression[["percent.mt"]] <- PercentageFeatureSet(human2_expression, pattern = "^MT-")
human2_expression[["percent.mt"]] <- PercentageFeatureSet(human3_expression, pattern = "^MT-")
human3_expression[["percent.mt"]] <- PercentageFeatureSet(human4_expression, pattern = "^MT-")
human4_expression[["percent.mt"]] <- PercentageFeatureSet(human5_expression, pattern = "^MT-")
human5_expression[["percent.mt"]] <- PercentageFeatureSet(human6_expression, pattern = "^MT-")
human6_expression[["percent.mt"]] <- PercentageFeatureSet(human7_expression, pattern = "^MT-")
human7_expression[["percent.mt"]] <- PercentageFeatureSet(human8_expression, pattern = "^MT-")
human8_expression[["percent.mt"]] <- PercentageFeatureSet(human9_expression, pattern = "^MT-")
human9_expression[["percent.mt"]] <- PercentageFeatureSet(human10_expression, pattern = "^MT-")
human10_expression[["percent.mt"]] <- PercentageFeatureSet(human11_expression, pattern = "^MT-")
human11_expression[["percent.mt"]] <- PercentageFeatureSet(human12_expression, pattern = "^MT-")
human12_expression[["percent.mt"]] <- PercentageFeatureSet(human13_expression, pattern = "^MT-")
human13_expression[["percent.mt"]] <- PercentageFeatureSet(human14_expression, pattern = "^MT-")
human14_expression[["percent.mt"]] <- PercentageFeatureSet(human15_expression, pattern = "^MT-")
human15_expression[["percent.mt"]] <- PercentageFeatureSet(human16_expression, pattern = "^MT-")
human16_expression[["percent.mt"]] <- PercentageFeatureSet(macaque1_expression, pattern = "^MT-")
macaque1_expression[["percent.mt"]] <- PercentageFeatureSet(macaque2_expression, pattern = "^MT-")
macaque2_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_CN_78_1_expression, pattern = "^MT-")
macaque_CN_78_1_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_DFC_110_1_expression, pattern = "^MT-")
macaque_DFC_110_1_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_OFC_110_1_expression, pattern = "^MT-")
macaque_OFC_110_1_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_93_DFC_expression, pattern = "^MT-")
macaque_93_DFC_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_110_DFC_S2_expression, pattern = "^MT-")
macaque_110_DFC_S2_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_110_VFC_expression, pattern = "^MT-")
macaque_110_VFC_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_110_VFC_S2_expression, pattern = "^MT-")
macaque_110_VFC_S2_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_77_Frontal_expression, pattern = "^MT-")
macaque_77_Frontal_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_64_Frontal_expression, pattern = "^MT-")
macaque_64_Frontal_expression[["percent.mt"]] <- PercentageFeatureSet(macaque_62_Frontal_expression, pattern = "^MT-")
macaque_62_Frontal_expression[[
= subset(human1_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human1_expression = subset(human2_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human2_expression = subset(human3_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human3_expression = subset(human4_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human4_expression = subset(human5_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human5_expression = subset(human6_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human6_expression = subset(human7_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human7_expression = subset(human8_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human8_expression = subset(human9_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human9_expression = subset(human10_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human10_expression = subset(human11_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human11_expression = subset(human12_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human12_expression = subset(human13_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human13_expression = subset(human14_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human14_expression = subset(human15_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human15_expression = subset(human16_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
human16_expression
= subset(macaque1_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque1_expression = subset(macaque2_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque2_expression = subset(macaque_CN_78_1_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque_CN_78_1_expression = subset(macaque_DFC_110_1_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque_DFC_110_1_expression = subset(macaque_OFC_110_1_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque_OFC_110_1_expression
= subset(macaque_93_DFC_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque_93_DFC_expression = subset(macaque_110_DFC_S2_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque_110_DFC_S2_expression= subset(macaque_110_VFC_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque_110_VFC_expression= subset(macaque_110_VFC_S2_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque_110_VFC_S2_expression= subset(macaque_77_Frontal_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque_77_Frontal_expression= subset(macaque_64_Frontal_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque_64_Frontal_expression= subset(macaque_62_Frontal_expression, subset = nFeature_RNA > 200 & nFeature_RNA < 10000 & percent.mt < 1)
macaque_62_Frontal_expression
= merge( human1_expression,
alldata c(human2_expression,
human3_expression,
human4_expression,
human5_expression,
human6_expression,
human7_expression,
human8_expression,
human9_expression,
human10_expression,
human11_expression,
human12_expression,
human13_expression,
human14_expression,
human15_expression,
human16_expression,
macaque1_expression,
macaque2_expression,
macaque_CN_78_1_expression,
macaque_DFC_110_1_expression,
macaque_OFC_110_1_expression,
macaque_93_DFC_expression,
macaque_110_DFC_S2_expression,
macaque_110_VFC_expression,
macaque_110_VFC_S2_expression,
macaque_77_Frontal_expression,
macaque_64_Frontal_expression,
macaque_62_Frontal_expression), add.cell.ids = c('human1','human2','human3','human4','human5','human6','human7','human8','human9','human10','human11','human12','human13','human14','human15','human16',
"macaque1","macaque2","macaque_78_1","Macaque3_110_DFC","Macaque3_110_OFC","macaque_93_DFC_expression", "macaque_110_DFC_S2_expression", "macaque_110_VFC_expression", "macaque_110_VFC_S2_expression", "macaque_77_Frontal_expression", "macaque_64_Frontal_expression", "macaque_62_Frontal_expression"))
save(alldata,file=paste0(objects_directory,"scRNA_published_foetal_samples.RData"))
load(paste0(objects_directory,"scRNA_published_foetal_samples.RData"))
= SplitObject(alldata, split.by = "orig.ident")
split_seurat
= perform_clustering_to_find_astrocytes(split_seurat[[1]])
human1 = perform_clustering_to_find_astrocytes(split_seurat[[2]])
human2 = perform_clustering_to_find_astrocytes(split_seurat[[3]])
human3 = perform_clustering_to_find_astrocytes(split_seurat[[5]])
human5 = perform_clustering_to_find_astrocytes(split_seurat[[6]])
human6 = perform_clustering_to_find_astrocytes(split_seurat[[7]])
human7 = perform_clustering_to_find_astrocytes(split_seurat[[9]])
human9 = perform_clustering_to_find_astrocytes(split_seurat[[10]])
human10 = perform_clustering_to_find_astrocytes(split_seurat[[13]])
human13 = perform_clustering_to_find_astrocytes(split_seurat[[15]])
human15 = perform_clustering_to_find_astrocytes(split_seurat[[16]])
human16 ## takes longer
= perform_clustering_to_find_astrocytes(split_seurat[[4]]) # long
human4 = perform_clustering_to_find_astrocytes(split_seurat[[8]]) # long
human8 = perform_clustering_to_find_astrocytes(split_seurat[[11]]) # long
human11 = perform_clustering_to_find_astrocytes(split_seurat[[12]]) # long
human12 = perform_clustering_to_find_astrocytes(split_seurat[[14]]) # long
human14
save( human1, file=paste0(objects_directory,"human1_scRNA.RData"))
save( human2, file=paste0(objects_directory,"human2_scRNA.RData"))
save( human3, file=paste0(objects_directory,"human3_scRNA.RData"))
save( human4, file=paste0(objects_directory,"human4_scRNA.RData"))
save( human5, file=paste0(objects_directory,"human5_scRNA.RData"))
save( human6, file=paste0(objects_directory,"human6_scRNA.RData"))
save( human7, file=paste0(objects_directory,"human7_scRNA.RData"))
save( human8, file=paste0(objects_directory,"human8_scRNA.RData"))
save( human9, file=paste0(objects_directory,"human9_scRNA.RData"))
save( human10, file=paste0(objects_directory,"human10_scRNA.RData"))
save( human11, file=paste0(objects_directory,"human11_scRNA.RData"))
save( human12, file=paste0(objects_directory,"human12_scRNA.RData"))
save( human13, file=paste0(objects_directory,"human13_scRNA.RData"))
save( human14, file=paste0(objects_directory,"human14_scRNA.RData"))
save( human15, file=paste0(objects_directory,"human15_scRNA.RData"))
save( human16, file=paste0(objects_directory,"human16_scRNA.RData"))
="~/Desktop/Ciuba_et_al_SM/data/objects/"
objects_directory= perform_clustering_to_find_astrocytes(split_seurat[["Macaque1"]])
macaque1 save( macaque1, file=paste0(objects_directory,"macaque1_scRNA.RData"))
= perform_clustering_to_find_astrocytes(split_seurat[["Macaque2"]])
macaque2 save( macaque2, file=paste0(objects_directory,"macaque2_scRNA.RData"))
= perform_clustering_to_find_astrocytes(split_seurat[["Macaque3_78_1"]])
macaque3 save( macaque3, file=paste0(objects_directory,"macaque3_scRNA.RData"))
= perform_clustering_to_find_astrocytes(split_seurat[["Macaque3_110_DFC"]])
macaque4 save( macaque4, file=paste0(objects_directory,"macaque4_scRNA.RData"))
= perform_clustering_to_find_astrocytes(split_seurat[["Macaque3_110_OFC"]])
macaque5 save( macaque5, file=paste0(objects_directory,"macaque5_scRNA.RData"))
= perform_clustering_to_find_astrocytes(split_seurat[["Macaque3_93_DFC"]])
macaque6 save( macaque6, file=paste0(objects_directory,"macaque6_scRNA.RData"))
= perform_clustering_to_find_astrocytes(split_seurat[["Macaque3_110_DFC_S2"]])
macaque7 save( macaque7, file=paste0(objects_directory,"macaque7_scRNA.RData"))
= perform_clustering_to_find_astrocytes(split_seurat[["Macaque3_110_VFC"]])
macaque8 save( macaque8, file=paste0(objects_directory,"macaque8_scRNA.RData"))
= perform_clustering_to_find_astrocytes(split_seurat[["Macaque3_110_VFC_S2"]])
macaque9 save( macaque9, file=paste0(objects_directory,"macaque9_scRNA.RData"))
= perform_clustering_to_find_astrocytes(split_seurat[["Macaque3_77_Frontal"]])
macaque10 save( macaque10, file=paste0(objects_directory,"macaque10_scRNA.RData"))
= perform_clustering_to_find_astrocytes(split_seurat[["Macaque3_64_Frontal"]])
macaque11 save( macaque11, file=paste0(objects_directory,"macaque11_scRNA.RData"))
= perform_clustering_to_find_astrocytes(split_seurat[["Macaque3_62_Frontal"]])
macaque12 save( macaque12, file=paste0(objects_directory,"macaque12_scRNA.RData"))
load(paste0(objects_directory,"human1_scRNA.RData"))
load(paste0(objects_directory,"human2_scRNA.RData"))
load(paste0(objects_directory,"human3_scRNA.RData"))
load(paste0(objects_directory,"human4_scRNA.RData"))
load(paste0(objects_directory,"human5_scRNA.RData"))
load(paste0(objects_directory,"human6_scRNA.RData"))
load(paste0(objects_directory,"human7_scRNA.RData"))
load(paste0(objects_directory,"human8_scRNA.RData"))
load(paste0(objects_directory,"human9_scRNA.RData"))
load(paste0(objects_directory,"human10_scRNA.RData"))
load(paste0(objects_directory,"human11_scRNA.RData"))
load(paste0(objects_directory,"human12_scRNA.RData"))
load(paste0(objects_directory,"human13_scRNA.RData"))
load(paste0(objects_directory,"human14_scRNA.RData"))
load(paste0(objects_directory,"human15_scRNA.RData"))
load(paste0(objects_directory,"human16_scRNA.RData"))
= findClusterCorrespondingToAstrocytes(human1,chosenClusterSet = "RNA_snn_res.2",
human1_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= findClusterCorrespondingToAstrocytes(human2,chosenClusterSet = "RNA_snn_res.2",
human2_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= findClusterCorrespondingToAstrocytes(human3,chosenClusterSet = "RNA_snn_res.2",
human3_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= findClusterCorrespondingToAstrocytes(human4,chosenClusterSet = "RNA_snn_res.2",
human4_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= findClusterCorrespondingToAstrocytes(human5,chosenClusterSet = "RNA_snn_res.2",
human5_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= findClusterCorrespondingToAstrocytes(human6,chosenClusterSet = "RNA_snn_res.2",
human6_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= findClusterCorrespondingToAstrocytes(human7,chosenClusterSet = "RNA_snn_res.2",
human7_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= findClusterCorrespondingToAstrocytes(human8,chosenClusterSet = "RNA_snn_res.2",
human8_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= findClusterCorrespondingToAstrocytes(human9,chosenClusterSet = "RNA_snn_res.2",
human9_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= findClusterCorrespondingToAstrocytes(human10,chosenClusterSet = "RNA_snn_res.2",
human10_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= findClusterCorrespondingToAstrocytes(human11,chosenClusterSet = "RNA_snn_res.2",
human11_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= findClusterCorrespondingToAstrocytes(human12,chosenClusterSet = "RNA_snn_res.2",
human12_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= findClusterCorrespondingToAstrocytes(human13,chosenClusterSet = "RNA_snn_res.2",
human13_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= findClusterCorrespondingToAstrocytes(human14,chosenClusterSet = "RNA_snn_res.2",
human14_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= findClusterCorrespondingToAstrocytes(human15,chosenClusterSet = "RNA_snn_res.2",
human15_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= findClusterCorrespondingToAstrocytes(human16,chosenClusterSet = "RNA_snn_res.2",
human16_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
save( human1_astrocyte_counts, human2_astrocyte_counts, human3_astrocyte_counts, human4_astrocyte_counts, human5_astrocyte_counts,
file=paste0(objects_directory,"human_scRNA_pseudobulk_data.RData"))
human6_astrocyte_counts, human7_astrocyte_counts, human8_astrocyte_counts, human9_astrocyte_counts, human10_astrocyte_counts, human11_astrocyte_counts, human12_astrocyte_counts, human13_astrocyte_counts, human14_astrocyte_counts, human15_astrocyte_counts, human16_astrocyte_counts,
= getAstrocytes(human1,chosenClusterSet = "RNA_snn_res.2",
human1_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 153
= getAstrocytes(human2,chosenClusterSet = "RNA_snn_res.2",
human2_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 276
= getAstrocytes(human3,chosenClusterSet = "RNA_snn_res.2",
human3_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 252
= getAstrocytes(human4,chosenClusterSet = "RNA_snn_res.2",
human4_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 280
= getAstrocytes(human5,chosenClusterSet = "RNA_snn_res.2",
human5_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 153
= getAstrocytes(human6,chosenClusterSet = "RNA_snn_res.2",
human6_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 85
= getAstrocytes(human7,chosenClusterSet = "RNA_snn_res.2",
human7_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 41
= getAstrocytes(human8,chosenClusterSet = "RNA_snn_res.2",
human8_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 205
= getAstrocytes(human9,chosenClusterSet = "RNA_snn_res.2",
human9_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 40
= getAstrocytes(human10,chosenClusterSet = "RNA_snn_res.2",
human10_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 153
= getAstrocytes(human11,chosenClusterSet = "RNA_snn_res.2",
human11_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 153
= getAstrocytes(human12,chosenClusterSet = "RNA_snn_res.2",
human12_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 153
= getAstrocytes(human13,chosenClusterSet = "RNA_snn_res.2",
human13_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 153
= getAstrocytes(human14,chosenClusterSet = "RNA_snn_res.2",
human14_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 153
= getAstrocytes(human15,chosenClusterSet = "RNA_snn_res.2",
human15_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 153
= getAstrocytes(human16,chosenClusterSet = "RNA_snn_res.2",
human16_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9")) # 153
= getAstrocytes(human1,chosenClusterSet = "RNA_snn_res.2",
human1_astrocyte + astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
1] "astrocytes are in cluster 26 found 153 astrocytes"
[> human2_astrocyte = getAstrocytes(human2,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
1] "astrocytes are in cluster 9 found 276 astrocytes"
[> human3_astrocyte = getAstrocytes(human3,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
1] "astrocytes are in cluster 10 found 252 astrocytes"
[> human4_astrocyte = getAstrocytes(human4,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
1] "astrocytes are in cluster 22 found 280 astrocytes"
[> human5_astrocyte = getAstrocytes(human5,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
1] "astrocytes are in cluster 26 found 153 astrocytes"
[> human6_astrocyte = getAstrocytes(human6,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
1] "astrocytes are in cluster 20 found 85 astrocytes"
[> human7_astrocyte = getAstrocytes(human7,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
1] "astrocytes are in cluster 23 found 41 astrocytes"
[> human8_astrocyte = getAstrocytes(human8,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
1] "astrocytes are in cluster 21 found 205 astrocytes"
[> human9_astrocyte = getAstrocytes(human9,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
1] "astrocytes are in cluster 22 found 40 astrocytes"
[> human10_astrocyte = getAstrocytes(human10,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
1] "astrocytes are in cluster 21 found 82 astrocytes"
[> human11_astrocyte = getAstrocytes(human11,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
1] "astrocytes are in cluster 31 found 259 astrocytes"
[> human12_astrocyte = getAstrocytes(human12,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
1] "astrocytes are in cluster 27 found 96 astrocytes"
[> human13_astrocyte = getAstrocytes(human13,chosenClusterSet = "RNA_snn_res.2",
+ astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
1] "astrocytes are in cluster 12 found 112 astrocytes" [
load(paste0(objects_directory,"human_scRNA_pseudobulk_data.RData"))
= data.frame(human1=human1_astrocyte_counts,
human_astrocyte_counts human2=human2_astrocyte_counts,
human3=human3_astrocyte_counts,
human4=human4_astrocyte_counts,
human5=human5_astrocyte_counts,
human6=human6_astrocyte_counts,
human7=human7_astrocyte_counts,
human8=human8_astrocyte_counts,
human9=human9_astrocyte_counts,
human10=human10_astrocyte_counts,
human11=human11_astrocyte_counts,
human12=human12_astrocyte_counts,
human13=human13_astrocyte_counts,
human14=human14_astrocyte_counts,
human15=human15_astrocyte_counts,
human16=human16_astrocyte_counts,
row.names = names(human1_astrocyte_counts))
= data.frame(Species=rep("Human",ncol(human_astrocyte_counts)),
human_astrocyte_counts_metadata Human_NHP=rep("Human",ncol(human_astrocyte_counts)),
study=rep("Mixed",ncol(human_astrocyte_counts)),
stage=rep("Foetal",ncol(human_astrocyte_counts)),
row.names=colnames(human_astrocyte_counts))
save( human_astrocyte_counts, human_astrocyte_counts_metadata,
file=paste0(objects_directory,"human_astrocyte_counts.RData"))
load(paste0(objects_directory,"macaque1_scRNA.RData"))
load(paste0(objects_directory,"macaque2_scRNA.RData"))
load(paste0(objects_directory,"macaque3_scRNA.RData"))
load(paste0(objects_directory,"macaque4_scRNA.RData"))
load(paste0(objects_directory,"macaque5_scRNA.RData"))
load(paste0(objects_directory,"macaque6_scRNA.RData"))
load(paste0(objects_directory,"macaque7_scRNA.RData"))
load(paste0(objects_directory,"macaque8_scRNA.RData"))
load(paste0(objects_directory,"macaque9_scRNA.RData"))
load(paste0(objects_directory,"macaque10_scRNA.RData"))
load(paste0(objects_directory,"macaque11_scRNA.RData"))
load(paste0(objects_directory,"macaque12_scRNA.RData"))
= findClusterCorrespondingToAstrocytes(macaque1,chosenClusterSet = "RNA_snn_res.2",
macaque1_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= findClusterCorrespondingToAstrocytes(macaque2,chosenClusterSet = "RNA_snn_res.2",
macaque2_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= findClusterCorrespondingToAstrocytes(macaque3,chosenClusterSet = "RNA_snn_res.2",
macaque3_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
save( macaque1_astrocyte_counts, macaque2_astrocyte_counts, macaque3_astrocyte_counts,
file="~/Desktop/macaques123.RData")
rm(list=c("macaque1","macaque2","macaque3"))
gc()
= findClusterCorrespondingToAstrocytes(macaque4,chosenClusterSet = "RNA_snn_res.2",
macaque4_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= findClusterCorrespondingToAstrocytes(macaque5,chosenClusterSet = "RNA_snn_res.2",
macaque5_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
save( macaque4_astrocyte_counts, macaque5_astrocyte_counts,
file="~/Desktop/macaques45.RData")
rm(list=c("macaque4","macaque5"))
gc()
= findClusterCorrespondingToAstrocytes(macaque6,chosenClusterSet = "RNA_snn_res.2",
macaque6_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= findClusterCorrespondingToAstrocytes(macaque7,chosenClusterSet = "RNA_snn_res.2",
macaque7_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
save( macaque6_astrocyte_counts, macaque7_astrocyte_counts,
file="~/Desktop/macaques67.RData")
rm(list=c("macaque6","macaque7"))
gc()
= findClusterCorrespondingToAstrocytes(macaque8,chosenClusterSet = "RNA_snn_res.2",
macaque8_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= findClusterCorrespondingToAstrocytes(macaque9,chosenClusterSet = "RNA_snn_res.2",
macaque9_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
save( macaque8_astrocyte_counts,macaque9_astrocyte_counts,
file="~/Desktop/macaques89.RData")
rm(list=c("macaque8","macaque9"))
gc()
= findClusterCorrespondingToAstrocytes(macaque10,chosenClusterSet = "RNA_snn_res.2",
macaque10_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= findClusterCorrespondingToAstrocytes(macaque11,chosenClusterSet = "RNA_snn_res.2",
macaque11_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
save( macaque10_astrocyte_counts,macaque11_astrocyte_counts,
file="~/Desktop/macaques10_11.RData")
rm(list=c("macaque10","macaque11"))
gc()
= findClusterCorrespondingToAstrocytes(macaque12,chosenClusterSet = "RNA_snn_res.2",
macaque12_astrocyte_counts astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
save( macaque12_astrocyte_counts,
file="~/Desktop/macaques_12.RData")
rm(list=c("macaque12"))
gc()
= getAstrocytes(macaque1,chosenClusterSet = "RNA_snn_res.2",
macaque1_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= getAstrocytes(macaque2,chosenClusterSet = "RNA_snn_res.2",
macaque2_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= getAstrocytes(macaque3,chosenClusterSet = "RNA_snn_res.2",
macaque3_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= getAstrocytes(macaque4,chosenClusterSet = "RNA_snn_res.2",
macaque4_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= getAstrocytes(macaque5,chosenClusterSet = "RNA_snn_res.2",
macaque5_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= getAstrocytes(macaque6,chosenClusterSet = "RNA_snn_res.2",
macaque6_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= getAstrocytes(macaque7,chosenClusterSet = "RNA_snn_res.2",
macaque7_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= getAstrocytes(macaque8,chosenClusterSet = "RNA_snn_res.2",
macaque8_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= getAstrocytes(macaque9,chosenClusterSet = "RNA_snn_res.2",
macaque9_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= getAstrocytes(macaque10,chosenClusterSet = "RNA_snn_res.2",
macaque10_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= getAstrocytes(macaque11,chosenClusterSet = "RNA_snn_res.2",
macaque11_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= getAstrocytes(macaque12,chosenClusterSet = "RNA_snn_res.2",
macaque12_astrocyte astrocyticMarkers = c("SLC1A3","SOX9","GFAP","AQP4","ALDH1A1","ID4","APOE","S100A9"))
= data.frame(macaque1_late=macaque1_astrocyte_counts, # 807
macaque_astrocyte_counts macaque2_late=macaque2_astrocyte_counts, # 36
Macaque3_78_1=macaque3_astrocyte_counts, # 687
Macaque3_110_DFC=macaque4_astrocyte_counts, # 783
Macaque3_110_OFC=macaque5_astrocyte_counts, # 114
Macaque3_93_DFC=macaque6_astrocyte_counts, # 613
Macaque3_110_DFC_S2=macaque7_astrocyte_counts, # 37
Macaque3_110_VFC=macaque8_astrocyte_counts, # 713
Macaque3_110_VFC_S2=macaque9_astrocyte_counts, # 265
Macaque3_77_Frontal=macaque10_astrocyte_counts, # 441
Macaque3_64_Frontal=macaque11_astrocyte_counts, # 314
Macaque3_62_Frontal=macaque12_astrocyte_counts, # 246
row.names = names(macaque1_astrocyte_counts))
= data.frame(Species=rep("Macaque",ncol(macaque_astrocyte_counts)),
macaque_astrocyte_counts_metadata Human_NHP=rep("NHP",ncol(macaque_astrocyte_counts)),
study=rep("Mixed",ncol(macaque_astrocyte_counts)),
stage=rep("Foetal",ncol(macaque_astrocyte_counts)),
row.names=colnames(macaque_astrocyte_counts))
save( macaque_astrocyte_counts, macaque_astrocyte_counts_metadata,
file=paste0(objects_directory,"macaque_astrocyte_counts.RData"))
“human14”,“human7”,“human13” here we find only few astrocytes, we remove these samples from the analysis.
load(paste0(objects_directory,"macaque_astrocyte_counts.RData"))
load(paste0(objects_directory,"human_astrocyte_counts.RData"))
all(rownames(macaque_astrocyte_counts)==rownames(human_astrocyte_counts))
## [1] TRUE
= data.frame( macaque_astrocyte_counts, human_astrocyte_counts )
stitched_counts = rbind( macaque_astrocyte_counts_metadata,human_astrocyte_counts_metadata )
st_metadata all(colnames(stitched_counts)==rownames(st_metadata))
## [1] TRUE
= DESeqDataSetFromMatrix( countData = stitched_counts[,! colnames(stitched_counts) %in% c("human14","human7","human13")],
data colData = st_metadata[! rownames(st_metadata) %in% c("human14","human7","human13"),],
design = ~ Human_NHP )
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
= estimateSizeFactors(data)
data = estimateDispersions(data, fitType = "local") data
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
= DESeq(data, fitType = 'local') data
## using pre-existing size factors
## estimating dispersions
## found already estimated dispersions, replacing these
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 89 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
= results(data, contrast = c("Human_NHP","Human", "NHP") )
degs = counts(data,normalized=TRUE)
human_macaque_fetal_norm_counts = counts(data,normalized=FALSE)
human_macaque_fetal_unnorm_counts
save(data,degs,
human_macaque_fetal_norm_counts,
human_macaque_fetal_unnorm_counts,file=paste0(objects_directory,"pseudobulk_published_scRNA_Foetal.RData"))
= degs[! is.na(degs$padj), ]
degs = degs[ degs$padj < 0.1, ]
degs_01 sum( degs_01$log2FoldChange>0 )
## [1] 5212
sum( degs_01$log2FoldChange<0 )
## [1] 6219
We confirm (87/237) 36% of up-regulated genes and 28% (104/301) of down-regulated genes.
= read.delim(paste0(outputs_directory,"up_engs.txt"),as.is=TRUE, header=FALSE)
up.hits.ensids = read.delim(paste0(outputs_directory,"dn_engs.txt"),as.is=TRUE, header=FALSE)
dn.hits.ensids = unique(genemap$hgnc_symbol[genemap$ensembl_gene_id %in% up.hits.ensids$V1])
up.hits.geneN = unique(genemap$hgnc_symbol[genemap$ensembl_gene_id %in% dn.hits.ensids$V1])
dn.hits.geneN
= up.hits.geneN[up.hits.geneN %in% rownames(degs)]
up.hits.geneN_filt = dn.hits.geneN[dn.hits.geneN %in% rownames(degs)]
dn.hits.geneN_filt length(up.hits.geneN_filt)
## [1] 238
length(dn.hits.geneN_filt)
## [1] 301
= degs[rownames(degs) %in% up.hits.geneN_filt,]
degs_us_up = degs[rownames(degs) %in% dn.hits.geneN_filt,]
degs_us_dn
$padj<0.1 & degs_us_up$log2FoldChange>0,] degs_us_up[degs_us_up
## log2 fold change (MLE): Human_NHP Human vs NHP
## Wald test p-value: Human_NHP Human vs NHP
## DataFrame with 87 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue padj
## <numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
## SCNN1D 6.61796 5.252182 0.580160 9.05299 1.39105e-19 9.86892e-19
## CDK11A 35.76199 2.444782 0.184597 13.24392 4.89269e-40 9.54270e-39
## SLC35E2A 58.70908 2.730201 0.189547 14.40384 4.89453e-47 1.27041e-45
## H6PD 20.64759 1.287565 0.170613 7.54670 4.46417e-14 2.23169e-13
## DFFA 17.20583 0.636637 0.168038 3.78865 1.51468e-04 3.34767e-04
## ... ... ... ... ... ... ...
## ADA2 8.39128 2.439363 0.293316 8.31651 9.05931e-17 5.33378e-16
## LZTR1 25.92802 0.882482 0.168153 5.24808 1.53688e-07 4.68215e-07
## C1QTNF6 7.36232 1.673785 0.369422 4.53082 5.87541e-06 1.52153e-05
## MT-ATP8 24.80009 8.290334 0.666553 12.43761 1.63309e-35 2.54872e-34
## MT-ATP6 15.51741 7.188377 0.712925 10.08294 6.57311e-24 5.92171e-23
$padj<0.1 & degs_us_dn$log2FoldChange<0,] degs_us_dn[degs_us_dn
## log2 fold change (MLE): Human_NHP Human vs NHP
## Wald test p-value: Human_NHP Human vs NHP
## DataFrame with 104 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## SRSF10 104.4759 -0.932483 0.197947 -4.71077 0.000002467874236896
## RCAN3 15.2877 -2.830897 0.421129 -6.72216 0.000000000017904770
## PDIK1L 14.7172 -1.528401 0.215272 -7.09987 0.000000000001248702
## SRSF4 109.6152 -0.319722 0.169547 -1.88574 0.059329607370328094
## PRPF38A 32.0105 -1.528840 0.200637 -7.61992 0.000000000000025384
## ... ... ... ... ... ...
## ZNF776 24.28439 -2.44103 0.202366 -12.06248 1.66682e-33
## OLIG2 23.58954 -2.73026 0.728066 -3.75002 1.76820e-04
## DONSON 10.98414 -1.23557 0.311389 -3.96792 7.25017e-05
## MT-CO3 138.76363 -4.75420 0.436041 -10.90310 1.11399e-27
## C1GALT1C1L 1.77877 -3.83977 0.630962 -6.08558 1.16069e-09
## padj
## <numeric>
## SRSF10 0.000006656184798295
## RCAN3 0.000000000075424177
## PDIK1L 0.000000000005675825
## SRSF4 0.089430349211630344
## PRPF38A 0.000000000000128673
## ... ...
## ZNF776 2.37697e-32
## OLIG2 3.88128e-04
## DONSON 1.66126e-04
## MT-CO3 1.20738e-26
## C1GALT1C1L 4.24191e-09
= rownames(degs_us_up[degs_us_up$padj<0.1 & degs_us_up$log2FoldChange>0,])
conf_up = rownames(degs_us_dn[degs_us_dn$padj<0.1 & degs_us_dn$log2FoldChange<0,])
conf_down
barplot( c(length(conf_up)/length(up.hits.geneN_filt),
length(conf_down)/length(dn.hits.geneN_filt)),
col=c("green4","wheat3"), ylim=c(0,0.5),ylab="Fraction",
names=c("Up","Down"),xlab="EAGs")
axis(2,lwd=1)
Boxplots of chosen genes
=st_metadata[! rownames(st_metadata) %in% c("human14","human7","human13"),]
sa= function( ct, gene, sa, cols ){
plotAGene # ct = human_macaque_fetal_norm_counts; gene="CTCF"
# sa = st_metadata[! rownames(st_metadata) %in% c("human14","human7","human13"),]
# cols = c("black","blue")
= split( ct[rownames(ct)==gene,], sa$Species )[c("Human","Macaque")]
x boxplot(x,border=cols,main=gene,col="white")
}
"CTCF",] degs[
## log2 fold change (MLE): Human_NHP Human vs NHP
## Wald test p-value: Human_NHP Human vs NHP
## DataFrame with 1 row and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue padj
## <numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
## CTCF 73.2475 -0.458664 0.186933 -2.45363 0.0141422 0.0240199
plotAGene( human_macaque_fetal_norm_counts, "CTCF", sa, c("black","blue")) # P=0.0147813
"TEAD3",] degs[
## log2 fold change (MLE): Human_NHP Human vs NHP
## Wald test p-value: Human_NHP Human vs NHP
## DataFrame with 1 row and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## TEAD3 11.8658 1.36589 0.24607 5.55085 0.0000000284291
## padj
## <numeric>
## TEAD3 0.0000000923959
plotAGene( human_macaque_fetal_norm_counts, "TEAD3", sa, c("black","blue")) # P=3.20079e-08
= read.delim(paste0(outputs_directory,"Supplementary_Table_15_human_DE.txt"))
kanton_hits = kanton_hits[kanton_hits$Average.expression..human.>kanton_hits$Average.expression..chimp.,]
kanton_hits_up = kanton_hits[kanton_hits$Average.expression..human.<kanton_hits$Average.expression..chimp.,]
kanton_hits_dn = kanton_hits_up$Symbol
kanton_hits_up = kanton_hits_dn$Symbol kanton_hits_dn
= read.delim( paste0(outputs_directory,'Jorstad_tables/Astro_human_vs_chimp_sig_genes.txt'),sep=",")
hs_pt = read.delim( paste0(outputs_directory,'Jorstad_tables/Astro_human_vs_gorilla_sig_genes.txt'),sep=",")
hs_pp = read.delim( paste0(outputs_directory,'Jorstad_tables/Astro_human_vs_rhesus_sig_genes.txt'), sep=",")
hs_rm = read.delim( paste0(outputs_directory,'Jorstad_tables/Astro_human_vs_marmoset_sig_genes.txt'),sep=",")
hs_cj
= hs_pt[!is.na(hs_pt$padj),]
hs_pt = hs_pp[!is.na(hs_pp$padj),]
hs_pp = hs_rm[!is.na(hs_rm$padj),]
hs_rm = hs_cj[!is.na(hs_cj$padj),]
hs_cj
# identify downregulated and up regulated genes
=0
thr
= hs_pt$gene[hs_pt$log2FoldChange<(-1*thr) & hs_pt$padj<0.1]
down1 = hs_pp$gene[hs_pp$log2FoldChange<(-1*thr) & hs_pp$padj<0.1]
down2 = hs_rm$gene[hs_rm$log2FoldChange<(-1*thr) & hs_rm$padj<0.1]
down3 = hs_cj$gene[hs_cj$log2FoldChange<(-1*thr) & hs_cj$padj<0.1]
down4
= hs_pt$gene[hs_pt$log2FoldChange>thr & hs_pt$padj<0.1]
up1 = hs_pp$gene[hs_pp$log2FoldChange>thr & hs_pp$padj<0.1]
up2 = hs_rm$gene[hs_rm$log2FoldChange>thr & hs_rm$padj<0.1]
up3 = hs_cj$gene[hs_cj$log2FoldChange>thr & hs_cj$padj<0.1]
up4
=down1[down1 %in% down2[down2 %in% down3[down3 %in% down4]]]
down1234=up1[up1 %in% up2[up2 %in% up3[up3 %in% up4]]]
up1234
=down1[down1 %in% down2]
down12=up1[up1 %in% up2]
up12
=down1[down1 %in% down2[down2 %in% down3]]
down123=up1[up1 %in% up2[up2 %in% up3]] up123
these are the tables I obtained from Shaojie Ma directly.
load(paste0(outputs_directory,'/Wilcox_DEG_results_raw.Rdata'))
= deg_species[deg_species$cluster=="Astro" & deg_species$p_val_adj<0.01,]
deg_species_filt
#############################
= deg_species_filt[log2(deg_species_filt$ratio_fc)>0,]
deg_species_filt_astro_hs = deg_species_filt_astro_hs[deg_species_filt_astro_hs$species1=="Human" & deg_species_filt_astro_hs$species2 %in% c("Chimpanzee","Rhesus"),]
deg_species_filt_astro_hs
= table(deg_species_filt_astro_hs$gene)
up_genes = names(up_genes[up_genes>1])
up_genes
#############################
= deg_species_filt[log2(deg_species_filt$ratio_fc)<(0),]
deg_species_filt_astro_hs = deg_species_filt_astro_hs[deg_species_filt_astro_hs$species1=="Human" & deg_species_filt_astro_hs$species2 %in% c("Chimpanzee","Rhesus"),]
deg_species_filt_astro_hs
= table(deg_species_filt_astro_hs$gene)
dn_genes = names(dn_genes[dn_genes>1])
dn_genes
any( up_genes %in% dn_genes )
## [1] FALSE
length(up_genes)
## [1] 1429
length(dn_genes)
## [1] 1123
= unique( c(kanton_hits_up,up123,up_genes,up.hits.geneN,conf_up))
all_up_all
= data.frame( Kanton = all_up_all %in% kanton_hits_up,
all_up_all Jorstad = all_up_all %in% up123,
Ma = all_up_all %in% up_genes,
Foetal = all_up_all %in% conf_up,
Ciuba = all_up_all %in% up.hits.geneN,
row.names = all_up_all)
= all_up_all[all_up_all$Ciuba & rowSums(all_up_all[,1:4])>0,]
all_up_all all_up_all
## Kanton Jorstad Ma Foetal Ciuba
## NBPF11 TRUE FALSE FALSE TRUE TRUE
## NBPF14 TRUE FALSE FALSE TRUE TRUE
## PABPC1L TRUE FALSE FALSE TRUE TRUE
## PALLD TRUE TRUE TRUE TRUE TRUE
## PCAT6 TRUE FALSE FALSE FALSE TRUE
## PIGZ TRUE TRUE FALSE TRUE TRUE
## SCNN1D TRUE FALSE FALSE TRUE TRUE
## SCRG1 TRUE TRUE TRUE FALSE TRUE
## THBS4 TRUE FALSE TRUE TRUE TRUE
## PAGR1 FALSE TRUE FALSE TRUE TRUE
## STK33 FALSE TRUE TRUE TRUE TRUE
## VKORC1 FALSE TRUE FALSE FALSE TRUE
## AQP1 FALSE TRUE TRUE FALSE TRUE
## MTCH1 FALSE TRUE TRUE FALSE TRUE
## PRDX6 FALSE TRUE TRUE FALSE TRUE
## RMDN1 FALSE TRUE FALSE FALSE TRUE
## RANGRF FALSE TRUE FALSE FALSE TRUE
## GUK1 FALSE TRUE TRUE FALSE TRUE
## ATP6V1E2 FALSE TRUE TRUE TRUE TRUE
## S100A13 FALSE TRUE FALSE FALSE TRUE
## FAM228B FALSE TRUE FALSE FALSE TRUE
## LIN7A FALSE TRUE FALSE TRUE TRUE
## ACACA FALSE FALSE TRUE TRUE TRUE
## BAIAP3 FALSE FALSE TRUE TRUE TRUE
## C1orf54 FALSE FALSE TRUE TRUE TRUE
## C1QTNF6 FALSE FALSE TRUE TRUE TRUE
## C22orf46 FALSE FALSE TRUE FALSE TRUE
## CPS1 FALSE FALSE TRUE FALSE TRUE
## DGCR6L FALSE FALSE TRUE FALSE TRUE
## EFHD1 FALSE FALSE TRUE TRUE TRUE
## GFPT2 FALSE FALSE TRUE TRUE TRUE
## GTF3C5 FALSE FALSE TRUE TRUE TRUE
## HSPB1 FALSE FALSE TRUE FALSE TRUE
## MLH1 FALSE FALSE TRUE TRUE TRUE
## MMP19 FALSE FALSE TRUE FALSE TRUE
## MOV10 FALSE FALSE TRUE TRUE TRUE
## NDUFV1 FALSE FALSE TRUE FALSE TRUE
## NR1H3 FALSE FALSE TRUE TRUE TRUE
## PDLIM7 FALSE FALSE TRUE FALSE TRUE
## RHOBTB3 FALSE FALSE TRUE FALSE TRUE
## SIRT3 FALSE FALSE TRUE TRUE TRUE
## STYXL1 FALSE FALSE TRUE TRUE TRUE
## TCF25 FALSE FALSE TRUE TRUE TRUE
## TCTN3 FALSE FALSE TRUE TRUE TRUE
## TMEM9B-AS1 FALSE FALSE TRUE FALSE TRUE
## TRIP6 FALSE FALSE TRUE FALSE TRUE
## TSR3 FALSE FALSE TRUE FALSE TRUE
## VIM FALSE FALSE TRUE FALSE TRUE
## ZNF266 FALSE FALSE TRUE TRUE TRUE
## ZNHIT3 FALSE FALSE TRUE FALSE TRUE
## CDK11A FALSE FALSE FALSE TRUE TRUE
## H6PD FALSE FALSE FALSE TRUE TRUE
## DFFA FALSE FALSE FALSE TRUE TRUE
## PAQR7 FALSE FALSE FALSE TRUE TRUE
## SRGAP2B FALSE FALSE FALSE TRUE TRUE
## HHLA3 FALSE FALSE FALSE TRUE TRUE
## SLC35E2A FALSE FALSE FALSE TRUE TRUE
## NBPF1 FALSE FALSE FALSE TRUE TRUE
## NBPF15 FALSE FALSE FALSE TRUE TRUE
## NBPF9 FALSE FALSE FALSE TRUE TRUE
## NBPF19 FALSE FALSE FALSE TRUE TRUE
## NBPF26 FALSE FALSE FALSE TRUE TRUE
## ACOX3 FALSE FALSE FALSE TRUE TRUE
## CBR4 FALSE FALSE FALSE TRUE TRUE
## TMEM129 FALSE FALSE FALSE TRUE TRUE
## SULT1C4 FALSE FALSE FALSE TRUE TRUE
## TEAD3 FALSE FALSE FALSE TRUE TRUE
## MAN2B2 FALSE FALSE FALSE TRUE TRUE
## RIPK1 FALSE FALSE FALSE TRUE TRUE
## SRD5A1 FALSE FALSE FALSE TRUE TRUE
## WDR27 FALSE FALSE FALSE TRUE TRUE
## INSYN2B FALSE FALSE FALSE TRUE TRUE
## C1QTNF3-AMACR FALSE FALSE FALSE TRUE TRUE
## ABCB4 FALSE FALSE FALSE TRUE TRUE
## CCND3 FALSE FALSE FALSE TRUE TRUE
## MAPKAP1 FALSE FALSE FALSE TRUE TRUE
## NUP43 FALSE FALSE FALSE TRUE TRUE
## PDE1C FALSE FALSE FALSE TRUE TRUE
## POLR2J3 FALSE FALSE FALSE TRUE TRUE
## ADAM9 FALSE FALSE FALSE TRUE TRUE
## EPHB4 FALSE FALSE FALSE TRUE TRUE
## COL27A1 FALSE FALSE FALSE TRUE TRUE
## SPDYE3 FALSE FALSE FALSE TRUE TRUE
## OSBPL5 FALSE FALSE FALSE TRUE TRUE
## CUBN FALSE FALSE FALSE TRUE TRUE
## ELMOD1 FALSE FALSE FALSE TRUE TRUE
## SHLD2 FALSE FALSE FALSE TRUE TRUE
## EML3 FALSE FALSE FALSE TRUE TRUE
## TIMM23B-AGAP6 FALSE FALSE FALSE TRUE TRUE
## AGAP4 FALSE FALSE FALSE TRUE TRUE
## FAM111B FALSE FALSE FALSE TRUE TRUE
## TIMM23B FALSE FALSE FALSE TRUE TRUE
## AGAP9 FALSE FALSE FALSE TRUE TRUE
## DGKA FALSE FALSE FALSE TRUE TRUE
## DHRS12 FALSE FALSE FALSE TRUE TRUE
## RFLNA FALSE FALSE FALSE TRUE TRUE
## LTB4R FALSE FALSE FALSE TRUE TRUE
## LPCAT2 FALSE FALSE FALSE TRUE TRUE
## CNTNAP1 FALSE FALSE FALSE TRUE TRUE
## ADCY9 FALSE FALSE FALSE TRUE TRUE
## SLCO3A1 FALSE FALSE FALSE TRUE TRUE
## NPIPA1 FALSE FALSE FALSE TRUE TRUE
## ADA2 FALSE FALSE FALSE TRUE TRUE
## MAN2B1 FALSE FALSE FALSE TRUE TRUE
## CARD8 FALSE FALSE FALSE TRUE TRUE
## SLC66A2 FALSE FALSE FALSE TRUE TRUE
## ZNF486 FALSE FALSE FALSE TRUE TRUE
## GYG2 FALSE FALSE FALSE TRUE TRUE
## LZTR1 FALSE FALSE FALSE TRUE TRUE
## MT-ATP6 FALSE FALSE FALSE TRUE TRUE
## MT-ATP8 FALSE FALSE FALSE TRUE TRUE
= unique( c(kanton_hits_dn,down123,dn_genes,dn.hits.geneN,conf_down))
all_dn_all
= data.frame( Kanton = all_dn_all %in% kanton_hits_dn,
all_dn_all Jorstad = all_dn_all %in% down123,
Ma = all_dn_all %in% dn_genes,
foetal = all_dn_all %in% conf_down,
Ciuba = all_dn_all %in% dn.hits.geneN,
row.names = all_dn_all)
= all_dn_all[all_dn_all$Ciuba & rowSums(all_dn_all[,1:4])>0,]
all_dn_all all_dn_all
## Kanton Jorstad Ma foetal Ciuba
## CELF4 TRUE FALSE FALSE FALSE TRUE
## FGF13 TRUE FALSE FALSE FALSE TRUE
## SYN1 TRUE FALSE TRUE TRUE TRUE
## PDZRN4 FALSE TRUE TRUE TRUE TRUE
## PLCL2 FALSE TRUE TRUE FALSE TRUE
## SRSF4 FALSE TRUE FALSE TRUE TRUE
## PBLD FALSE TRUE TRUE TRUE TRUE
## GABPB1 FALSE TRUE TRUE FALSE TRUE
## UNC5D FALSE TRUE TRUE TRUE TRUE
## RAPGEF5 FALSE TRUE FALSE FALSE TRUE
## DCC FALSE TRUE TRUE FALSE TRUE
## ATP8A2 FALSE TRUE TRUE FALSE TRUE
## PANK3 FALSE TRUE FALSE TRUE TRUE
## RCAN3 FALSE TRUE TRUE TRUE TRUE
## MAP3K2 FALSE TRUE FALSE FALSE TRUE
## NUDT4 FALSE TRUE FALSE TRUE TRUE
## RND3 FALSE TRUE TRUE TRUE TRUE
## SPAST FALSE TRUE TRUE FALSE TRUE
## FBXO11 FALSE TRUE FALSE FALSE TRUE
## ACIN1 FALSE FALSE TRUE FALSE TRUE
## AHCTF1 FALSE FALSE TRUE FALSE TRUE
## ATAD2B FALSE FALSE TRUE FALSE TRUE
## CECR2 FALSE FALSE TRUE FALSE TRUE
## CEP104 FALSE FALSE TRUE FALSE TRUE
## CREBRF FALSE FALSE TRUE FALSE TRUE
## CSPP1 FALSE FALSE TRUE FALSE TRUE
## DONSON FALSE FALSE TRUE TRUE TRUE
## DYRK2 FALSE FALSE TRUE TRUE TRUE
## EED FALSE FALSE TRUE TRUE TRUE
## EFL1 FALSE FALSE TRUE TRUE TRUE
## ERCC6L2 FALSE FALSE TRUE FALSE TRUE
## FBXW7 FALSE FALSE TRUE FALSE TRUE
## GRK4 FALSE FALSE TRUE FALSE TRUE
## INA FALSE FALSE TRUE TRUE TRUE
## INSR FALSE FALSE TRUE FALSE TRUE
## KAT6A FALSE FALSE TRUE FALSE TRUE
## KLHL24 FALSE FALSE TRUE FALSE TRUE
## MBTD1 FALSE FALSE TRUE FALSE TRUE
## MIB1 FALSE FALSE TRUE FALSE TRUE
## MLLT10 FALSE FALSE TRUE FALSE TRUE
## PGBD2 FALSE FALSE TRUE FALSE TRUE
## POLR1B FALSE FALSE TRUE TRUE TRUE
## PPM1A FALSE FALSE TRUE TRUE TRUE
## PPP4R3B FALSE FALSE TRUE TRUE TRUE
## PTPN4 FALSE FALSE TRUE FALSE TRUE
## RAB3A FALSE FALSE TRUE TRUE TRUE
## RPRD2 FALSE FALSE TRUE FALSE TRUE
## STRN3 FALSE FALSE TRUE FALSE TRUE
## STXBP1 FALSE FALSE TRUE FALSE TRUE
## SYT16 FALSE FALSE TRUE FALSE TRUE
## TERF1 FALSE FALSE TRUE TRUE TRUE
## TFDP2 FALSE FALSE TRUE FALSE TRUE
## TRIM2 FALSE FALSE TRUE TRUE TRUE
## TRIM23 FALSE FALSE TRUE TRUE TRUE
## TTC33 FALSE FALSE TRUE TRUE TRUE
## TUBB4A FALSE FALSE TRUE TRUE TRUE
## UBN2 FALSE FALSE TRUE FALSE TRUE
## ZNF148 FALSE FALSE TRUE FALSE TRUE
## ZNF595 FALSE FALSE TRUE FALSE TRUE
## ZRANB3 FALSE FALSE TRUE FALSE TRUE
## NUP133 FALSE FALSE FALSE TRUE TRUE
## RSBN1 FALSE FALSE FALSE TRUE TRUE
## CCDC181 FALSE FALSE FALSE TRUE TRUE
## PRPF38A FALSE FALSE FALSE TRUE TRUE
## ETAA1 FALSE FALSE FALSE TRUE TRUE
## PDIK1L FALSE FALSE FALSE TRUE TRUE
## SOX11 FALSE FALSE FALSE TRUE TRUE
## AIDA FALSE FALSE FALSE TRUE TRUE
## SRSF10 FALSE FALSE FALSE TRUE TRUE
## GDAP2 FALSE FALSE FALSE TRUE TRUE
## ARL6 FALSE FALSE FALSE TRUE TRUE
## PHOSPHO2 FALSE FALSE FALSE TRUE TRUE
## SMARCA5 FALSE FALSE FALSE TRUE TRUE
## KCNH7 FALSE FALSE FALSE TRUE TRUE
## C1GALT1C1L FALSE FALSE FALSE TRUE TRUE
## KIF2A FALSE FALSE FALSE TRUE TRUE
## CDC5L FALSE FALSE FALSE TRUE TRUE
## PRPF4B FALSE FALSE FALSE TRUE TRUE
## CLK4 FALSE FALSE FALSE TRUE TRUE
## OARD1 FALSE FALSE FALSE TRUE TRUE
## KIF3A FALSE FALSE FALSE TRUE TRUE
## CEP162 FALSE FALSE FALSE TRUE TRUE
## EIF4E FALSE FALSE FALSE TRUE TRUE
## ZUP1 FALSE FALSE FALSE TRUE TRUE
## ZCCHC10 FALSE FALSE FALSE TRUE TRUE
## PGM2 FALSE FALSE FALSE TRUE TRUE
## HDAC2 FALSE FALSE FALSE TRUE TRUE
## ZKSCAN8 FALSE FALSE FALSE TRUE TRUE
## BRD2 FALSE FALSE FALSE TRUE TRUE
## CFAP69 FALSE FALSE FALSE TRUE TRUE
## CBLL1 FALSE FALSE FALSE TRUE TRUE
## RANBP6 FALSE FALSE FALSE TRUE TRUE
## ZNF92 FALSE FALSE FALSE TRUE TRUE
## C9orf72 FALSE FALSE FALSE TRUE TRUE
## TMEM196 FALSE FALSE FALSE TRUE TRUE
## ZBTB10 FALSE FALSE FALSE TRUE TRUE
## UBXN2B FALSE FALSE FALSE TRUE TRUE
## RPAP3 FALSE FALSE FALSE TRUE TRUE
## FAM76B FALSE FALSE FALSE TRUE TRUE
## FOLH1 FALSE FALSE FALSE TRUE TRUE
## IKZF5 FALSE FALSE FALSE TRUE TRUE
## SMC3 FALSE FALSE FALSE TRUE TRUE
## KMT5B FALSE FALSE FALSE TRUE TRUE
## DPF2 FALSE FALSE FALSE TRUE TRUE
## LIN7C FALSE FALSE FALSE TRUE TRUE
## DCDC1 FALSE FALSE FALSE TRUE TRUE
## GVQW3 FALSE FALSE FALSE TRUE TRUE
## HSPA14 FALSE FALSE FALSE TRUE TRUE
## C10orf143 FALSE FALSE FALSE TRUE TRUE
## YAF2 FALSE FALSE FALSE TRUE TRUE
## PKP2 FALSE FALSE FALSE TRUE TRUE
## ATP2B1 FALSE FALSE FALSE TRUE TRUE
## VCPKMT FALSE FALSE FALSE TRUE TRUE
## CAND1 FALSE FALSE FALSE TRUE TRUE
## ZC2HC1C FALSE FALSE FALSE TRUE TRUE
## RBM26 FALSE FALSE FALSE TRUE TRUE
## THTPA FALSE FALSE FALSE TRUE TRUE
## ZNF200 FALSE FALSE FALSE TRUE TRUE
## CTCF FALSE FALSE FALSE TRUE TRUE
## AKTIP FALSE FALSE FALSE TRUE TRUE
## NRG4 FALSE FALSE FALSE TRUE TRUE
## ADAP2 FALSE FALSE FALSE TRUE TRUE
## DLL3 FALSE FALSE FALSE TRUE TRUE
## ZNF175 FALSE FALSE FALSE TRUE TRUE
## APOE FALSE FALSE FALSE TRUE TRUE
## OSBPL2 FALSE FALSE FALSE TRUE TRUE
## ZNF304 FALSE FALSE FALSE TRUE TRUE
## ZNF776 FALSE FALSE FALSE TRUE TRUE
## EID2B FALSE FALSE FALSE TRUE TRUE
## MEX3C FALSE FALSE FALSE TRUE TRUE
## ZNF17 FALSE FALSE FALSE TRUE TRUE
## ZNF600 FALSE FALSE FALSE TRUE TRUE
## ZNF181 FALSE FALSE FALSE TRUE TRUE
## PEG3 FALSE FALSE FALSE TRUE TRUE
## OLIG2 FALSE FALSE FALSE TRUE TRUE
## ZNF134 FALSE FALSE FALSE TRUE TRUE
## RBMX FALSE FALSE FALSE TRUE TRUE
## PHF6 FALSE FALSE FALSE TRUE TRUE
## MT-CO3 FALSE FALSE FALSE TRUE TRUE
## PGAM4 FALSE FALSE FALSE TRUE TRUE
## RTL5 FALSE FALSE FALSE TRUE TRUE
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] pl_PL.UTF-8/pl_PL.UTF-8/pl_PL.UTF-8/C/pl_PL.UTF-8/pl_PL.UTF-8
##
## attached base packages:
## [1] tools grid stats4 parallel stats graphics grDevices
## [8] utils datasets methods base
##
## other attached packages:
## [1] ggpubr_0.6.0
## [2] readr_2.1.4
## [3] SeuratObject_4.1.3
## [4] Seurat_4.3.0
## [5] ggVennDiagram_1.2.2
## [6] rBLAST_0.99.2
## [7] Rsubread_2.6.4
## [8] BSgenome.Ptroglodytes.UCSC.panTro6_1.4.2
## [9] beeswarm_0.4.0
## [10] VennDiagram_1.7.3
## [11] futile.logger_1.4.3
## [12] scuttle_1.2.1
## [13] SingleCellExperiment_1.14.1
## [14] forcats_1.0.0
## [15] RColorBrewer_1.1-3
## [16] glmGamPoi_1.4.0
## [17] reshape2_1.4.4
## [18] kableExtra_1.3.4
## [19] plotly_4.10.1
## [20] dplyr_1.1.2
## [21] ggrepel_0.9.3
## [22] data.table_1.14.8
## [23] pheatmap_1.0.12
## [24] LSD_4.1-0
## [25] BSgenome.Hsapiens.UCSC.hg38_1.4.3
## [26] BSgenome_1.60.0
## [27] colorspace_2.1-0
## [28] rtracklayer_1.52.1
## [29] Rsamtools_2.8.0
## [30] Biostrings_2.60.2
## [31] XVector_0.32.0
## [32] GenomicFeatures_1.44.2
## [33] biomaRt_2.48.3
## [34] Gviz_1.36.2
## [35] st_1.2.7
## [36] sda_1.3.8
## [37] fdrtool_1.2.17
## [38] corpcor_1.6.10
## [39] entropy_1.3.1
## [40] smoothmest_0.1-3
## [41] MASS_7.3-58.3
## [42] genefilter_1.74.1
## [43] edgeR_3.34.1
## [44] limma_3.48.3
## [45] DESeq2_1.32.0
## [46] SummarizedExperiment_1.22.0
## [47] MatrixGenerics_1.4.3
## [48] matrixStats_0.63.0
## [49] GenomicRanges_1.44.0
## [50] GenomeInfoDb_1.28.4
## [51] geneplotter_1.70.0
## [52] annotate_1.70.0
## [53] XML_3.99-0.14
## [54] AnnotationDbi_1.54.1
## [55] IRanges_2.26.0
## [56] S4Vectors_0.30.2
## [57] lattice_0.21-8
## [58] locfit_1.5-9.7
## [59] Biobase_2.52.0
## [60] BiocGenerics_0.38.0
## [61] plyr_1.8.8
## [62] ggplot2_3.4.2
## [63] Matrix_1.5-4
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.3 scattermore_0.8
## [3] tidyr_1.3.0 bit64_4.0.5
## [5] knitr_1.42 irlba_2.3.5.1
## [7] DelayedArray_0.18.0 rpart_4.1.19
## [9] KEGGREST_1.32.0 RCurl_1.98-1.12
## [11] AnnotationFilter_1.16.0 generics_0.1.3
## [13] cowplot_1.1.1 lambda.r_1.2.4
## [15] RSQLite_2.3.1 RANN_2.6.1
## [17] proxy_0.4-27 future_1.32.0
## [19] tzdb_0.3.0 bit_4.0.5
## [21] spatstat.data_3.0-1 webshot_0.5.4
## [23] xml2_1.3.3 httpuv_1.6.9
## [25] xfun_0.38 hms_1.1.3
## [27] jquerylib_0.1.4 evaluate_0.20
## [29] promises_1.2.0.1 fansi_1.0.4
## [31] restfulr_0.0.15 progress_1.2.2
## [33] dbplyr_2.3.2 igraph_1.4.2
## [35] DBI_1.1.3 htmlwidgets_1.6.2
## [37] spatstat.geom_3.1-0 purrr_1.0.1
## [39] ellipsis_0.3.2 backports_1.4.1
## [41] deldir_1.0-6 sparseMatrixStats_1.4.2
## [43] vctrs_0.6.1 ensembldb_2.16.4
## [45] ROCR_1.0-11 abind_1.4-5
## [47] cachem_1.0.7 withr_2.5.0
## [49] RVenn_1.1.0 progressr_0.13.0
## [51] checkmate_2.1.0 sctransform_0.3.5
## [53] GenomicAlignments_1.28.0 prettyunits_1.1.1
## [55] goftest_1.2-3 svglite_2.1.1
## [57] cluster_2.1.4 lazyeval_0.2.2
## [59] crayon_1.5.2 spatstat.explore_3.1-0
## [61] units_0.8-1 labeling_0.4.2
## [63] pkgconfig_2.0.3 nlme_3.1-162
## [65] ProtGenerics_1.24.0 nnet_7.3-18
## [67] rlang_1.1.0 globals_0.16.2
## [69] lifecycle_1.0.3 miniUI_0.1.1.1
## [71] filelock_1.0.2 BiocFileCache_2.0.0
## [73] dichromat_2.0-0.1 invgamma_1.1
## [75] polyclip_1.10-4 lmtest_0.9-40
## [77] ashr_2.2-54 carData_3.0-5
## [79] zoo_1.8-11 base64enc_0.1-3
## [81] ggridges_0.5.4 png_0.1-8
## [83] viridisLite_0.4.1 rjson_0.2.21
## [85] bitops_1.0-7 KernSmooth_2.23-20
## [87] blob_1.2.4 DelayedMatrixStats_1.14.3
## [89] classInt_0.4-9 mixsqp_0.3-48
## [91] SQUAREM_2021.1 stringr_1.5.0
## [93] spatstat.random_3.1-4 parallelly_1.35.0
## [95] rstatix_0.7.2 jpeg_0.1-10
## [97] ggsignif_0.6.4 beachmat_2.8.1
## [99] scales_1.2.1 memoise_2.0.1
## [101] magrittr_2.0.3 ica_1.0-3
## [103] zlibbioc_1.38.0 compiler_4.1.0
## [105] BiocIO_1.2.0 fitdistrplus_1.1-8
## [107] cli_3.6.1 listenv_0.9.0
## [109] patchwork_1.1.2 pbapply_1.7-0
## [111] htmlTable_2.4.1 formatR_1.14
## [113] Formula_1.2-5 tidyselect_1.2.0
## [115] stringi_1.7.12 highr_0.10
## [117] yaml_2.3.7 latticeExtra_0.6-30
## [119] sass_0.4.5 VariantAnnotation_1.38.0
## [121] future.apply_1.10.0 rstudioapi_0.14
## [123] foreign_0.8-84 gridExtra_2.3
## [125] farver_2.1.1 Rtsne_0.16
## [127] digest_0.6.31 shiny_1.7.4
## [129] Rcpp_1.0.10 car_3.1-2
## [131] broom_1.0.4 later_1.3.0
## [133] RcppAnnoy_0.0.20 httr_1.4.5
## [135] biovizBase_1.40.0 sf_1.0-12
## [137] tensor_1.5 rvest_1.0.3
## [139] reticulate_1.28 truncnorm_1.0-9
## [141] splines_4.1.0 uwot_0.1.14
## [143] spatstat.utils_3.0-2 sp_1.6-0
## [145] systemfonts_1.0.4 xtable_1.8-4
## [147] jsonlite_1.8.4 futile.options_1.0.1
## [149] R6_2.5.1 Hmisc_5.0-1
## [151] pillar_1.9.0 htmltools_0.5.5
## [153] mime_0.12 glue_1.6.2
## [155] fastmap_1.1.1 BiocParallel_1.26.2
## [157] class_7.3-21 codetools_0.2-19
## [159] utf8_1.2.3 spatstat.sparse_3.0-1
## [161] bslib_0.4.2 tibble_3.2.1
## [163] curl_5.0.0 leiden_0.4.3
## [165] interp_1.1-4 survival_3.5-5
## [167] rmarkdown_2.21 munsell_0.5.0
## [169] e1071_1.7-13 GenomeInfoDbData_1.2.6
## [171] gtable_0.3.3