diff --git a/Betty_Figure5_DGE.R b/Betty_Figure5_DGE.R
deleted file mode 100644
index 71ba56f5181d02b9a9766e7415ae42f53dfe7f5d..0000000000000000000000000000000000000000
--- a/Betty_Figure5_DGE.R
+++ /dev/null
@@ -1,165 +0,0 @@
-#reading count tables
-library(reshape2)
-library(plyr)
-library(dplyr)
-##read in all tables
-tbl<- (1:58278)
-#used 58278 bc I know there are 58278 features from featuresCount
-for (x in list.files(pattern="*.txt")) {
-  u<-read.table(x, header=T,stringsAsFactors = F)
-  tbl <- cbind(tbl,u)
-}
-
-#get rid of tbl row numbers after if you want
-tbl <- tbl[ ,-1]
-
-#truncate column names, or switch to Betty's names
-names <- t(read.table('Betty_sampleNames.txt', header = F, stringsAsFactors = F))
-colnames(tbl) <- names
-tbl$Gene_id<-row.names(tbl)
-
-tbl_mod <- read.table("BettyGeneMatrix.txt", header = F, stringsAsFactors = F)
-setwd('/Users/erica/Desktop/Betty_DGEs/')
-colnames(tbl_mod)<-tbl_mod[2,]
-row.names(tbl_mod) <- tbl_mod[ ,1]
-Betty_data_wt <-tbl_mod[4:58281,c(16:29,32:33)]
-#convert the matrix data from character to integer values, need to for umap input
-Betty_data_wt_N<- type.convert(Betty_data_wt,na.strings = "NA", as.is = FALSE, dec = ".")
-
-#extract just non-invading
-Betty_data_wt_NI <- Betty_data_wt_N[ , grepl( "N" , names( Betty_data_wt_N ) ) ] 
-#extract just invading
-Betty_data_wt_I <- Betty_data_wt_N[ , grepl( "I" , names( Betty_data_wt_N ) ) ] 
-#remove controls for now
-Betty_data_wt_noC <- Betty_data_wt_N[ ,-(15:16)]
-
-#get list of Gene Ids to convert into gene names
-library("biomaRt")
-#convertign gene IDs using biomart
-Betty_gene_IDs<-rownames(Betty_data_wt_noC)
-ensembl=useMart("ensembl",dataset="hsapiens_gene_ensembl")
-E_Ids=Betty_gene_IDs#list of entrez gene ids
-Betty_Gene_names <- getBM(attributes=c('ensembl_gene_id_version', 'ensembl_gene_id','external_gene_name'), 
-                          filters = 'ensembl_gene_id_version', 
-                          values = E_Ids, 
-                          mart = ensembl)
-annot <- Betty_Gene_names[,c("ensembl_gene_id_version","external_gene_name")]
-
-#remove N1,N2,N4 (due to how they cluster on tsne and umap)
-Betty_data_wt_NI_red <- Betty_data_wt_NI[ ,c(3,5,6,7)]
-Betty_data_wt_I_red <- Betty_data_wt_I[ ,c(3,5,6,7)]
-Betty_data_wt_noC_red <- Betty_data_wt_N[ ,-c(1:4,7,8,15:16,17)]
-
-library(edgeR)
-#calculate normalization factors using TMM
-Betty_NI_norm = calcNormFactors(Betty_data_wt_NI_red, method='TMM')
-Betty_I_norm = calcNormFactors(Betty_data_wt_I_red, method='TMM')
-Betty_norm = calcNormFactors(Betty_data_wt_noC_red, method='TMM')
-# calculate dispersion factors using:
-Betty_NI_disp<- estimateCommonDisp(Betty_data_wt_NI_red, group=NULL, lib.size=NULL, tol=1e-06,
-                                   rowsum.filter=5, verbose=FALSE)
-Betty_I_disp<- estimateCommonDisp(Betty_data_wt_I_red, group=NULL, lib.size=NULL, tol=1e-06,
-                                  rowsum.filter=5, verbose=FALSE)
-
-Betty_disp<- estimateCommonDisp(Betty_data_wt_noC_red, group=NULL, lib.size=NULL, tol=1e-06,
-                                rowsum.filter=5, verbose=FALSE)
-
-d <- DGEList(counts=Betty_data_wt_noC_red, group=c(1,2,1,2,1,2,1,2), lib.size=Betty_norm)
-Betty_norm = calcNormFactors(Betty_data_wt_noC_red, method='TMM')
-Betty_disp<- estimateCommonDisp(Betty_data_wt_noC_red, group=c(1,2,1,2,1,2,1,2), lib.size=Betty_norm, tol=1e-06,
-                                rowsum.filter=5, verbose=FALSE)
-dim(d) #58278
-
-#filter out lowly expressed genes
-keep <- rowSums(cpm(d)>1) >= 0.1
-d <- d[keep, , keep.lib.sizes=FALSE]
-#test with Fisher exact test
-de <- exactTest(d, dispersion=Betty_disp)
-de_et_topTags <- topTags(de, n=58278)
-#fisher exact test table:
-write.table(de_et_topTags, file="ALL_TopTags_et_wt_red.txt", row.names=TRUE, col.names=TRUE, sep="\t")
-
-#testing using GLM
-design <- model.matrix(~factor(c(1,2,1,2,1,2,1,2)))
-y <- estimateDisp(d,design)
-fit <- glmQLFit(y,design)
-qlf <- glmQLFTest(fit,coef=2)
-qlf_topTags <- topTags(qlf, n=58278)
-
-library(reshape2)
-library(plyr)
-library(dplyr)
-library(reshape)
-
-#get individualcpm normalized values for each sample to make a heatmap
-qlf_topTags_ind <-rownames(topTags(qlf, n=58278)$table)
-tagsbysample_qlf<-as.data.frame(cpm(d)[qlf_topTags_ind, order(d$samples$group)])
-tagsbysample_qlf$Gene_id <-rownames(tagsbysample_qlf)
-
-#get gene names
-annot <- read.table("ALL_TopTags_qlf_wt_red_GeneNames.txt", header = T, stringsAsFactors = F)
-
-#retrieve genes with a significant pvalue
-qlf_topTags_id<-subset(qlf_topTags$table, FDR < 0.05) #244
-qlf_topTags_id$Gene_id <-rownames(qlf_topTags_id)
-qlf_heatmap_mat <- merge(qlf_topTags_id,annot, by.x="Gene_id",by.y="GenestableIDversion") #now 216
-qlf_heatmap_mat <- qlf_heatmap_mat[ ,c(1,7)]
-qlf_heatmap_mat_ind <- merge(qlf_heatmap_mat,tagsbysample_qlf,by="Gene_id")
-
-##facetted heatmap
-qlf_heatmap_mat_ind <- qlf_heatmap_mat_ind[ ,-1]
-qlf_heatmap_mat_ind_melt <- melt(qlf_heatmap_mat_ind,id.vars="Genename") 
-qlf_heatmap_mat_ind_melt_split <- t(data.frame(strsplit(as.character(qlf_heatmap_mat_ind_melt$variable),"",fixed = T)))
-qlf_heatmap_mat_ind_mod <- cbind(qlf_heatmap_mat_ind_melt,qlf_heatmap_mat_ind_melt_split)
-#qlf_heatmap_mat_ind_mod$gene_name<-qlf_heatmap_mat_ind_mod$external_gene_name
-#rownames(qlf_heatmap_mat_ind_mod) <- qlf_heatmap_mat_ind_mod$gene_name
-qlf_heatmap_mat_ind_mod_red <- qlf_heatmap_mat_ind_mod[ ,c(1,3,4,5)]
-colnames(qlf_heatmap_mat_ind_mod_red) <- c('gene','value','state','exp')
-
-##making a gene clustered, viridis, facetted heat map for qlf signficant genes
-library(ggplot2)
-library(viridis)
-library(ggExtra)
-col_breaks <- c(1,5,10,20,50,100,250,500,750,1000)
-
-##cluster data manually to get order on the geom_tile()
-rownames(qlf_heatmap_mat_ind) <- qlf_heatmap_mat_ind$Genename
-qlf_heatmap_mat_ind <- qlf_heatmap_mat_ind[ ,-1]
-hr_qlf <- hclust(as.dist(1-cor(t(qlf_heatmap_mat_ind), method="pearson")),
-                 method="average")
-
-gene_order <- hr_qlf$labels
-
-pdf(file="Betty_facet_qlf_heat_mod.pdf",width=4, height=10)
-par(mar=c(7,4,4,2)+0.1) 
-ggplot(qlf_heatmap_mat_ind_mod_red,aes(state,gene,fill=value))+
-  geom_tile(color= "white",size=0.1) + 
-  scale_fill_viridis_c(name="Gene \nExpression (CPM)",limits = c(0,50), breaks =col_breaks,option="viridis",
-                       na.value = "yellow") + 
-  scale_y_discrete(limits=gene_order, labels=gene_order) +
-  scale_x_discrete(limits=c("N","I"), labels=c("N", "NI")) +
-  facet_grid(.~exp, switch = "x", scales = "free_x", space = "free_x") +
-  labs(x="State", y="Gene") +
-  theme(legend.position = "bottom")+
-  theme(plot.title=element_text(size = 16, face="bold"))+
-  theme(axis.text.y=element_text(size=5)) +
-  theme(strip.background = element_rect(colour="white"))+
-  theme(plot.title=element_text(hjust=0))+
-  theme(axis.ticks=element_blank())+
-  theme(axis.text.y=element_text(size=3))+
-  theme(axis.text.x=element_text(size=12))+
-  theme(legend.title=element_text(size=8))+
-  theme(legend.text=element_text(size=6))+
-  theme(strip.text.x = element_text(size=0))
-  guides(fill=guide_colorbar(nbin=20)) +
-  removeGrid() 
-dev.off()
-
-hmap_order <- data.frame(qlf_heatmap_mat_ind[(hr_qlf$labels), ])
-write.csv(hmap_order ,"Betty_qlfheatmap.csv")
-
-
-
-
-
-