library(qiime2R)
library(CoDaSeq)
library(phyloseq)
library(ggplot2)
library(tidyr)
library("dplyr")
library("gridExtra")
#library(RCircos)
#library(circlize)
library(viridis)
#library(UpSetR)
#library(knitr)
library(tidyr)
library(magrittr)
library(vegan)
library(stringr)
library(patchwork)
`%ni%` = Negate(`%in%`)
source("~/Desktop/PhaeoPhycosphere/functions/legendplot.R")
colors=c('#e9e9e9','#C14450','#f0b08f','#c2aba6','#60555f','#3c6481','#9fd6e6','#256F64','#63a375')
red<- c("#EB8B8E","#FBD5E2","#E7B6AF","#AC6873", "#D82354")
orange <- c("#FAAA6D","#FECF92")
yellow <- c("#FFC317","#F7F4B7", "#CC9C3C")
green <- c("#16866F","#1E3F1C","#99A339","#516A65","#8BC89F")
blue <- c("#005694","#B7E1DD","#66879E","#1BAAE2","#5FC8D8")
purple <- c("#E7D7CE","#A699A9","#434582","#81347D", "#B5218E")
colors30 <- c(blue, purple, red, yellow, orange, green, "black")
Import Additional functions:
scripts <- c("graphical_methods.R",
"tree_methods.R",
"plot_merged_trees.R",
"specificity_methods.R",
"ternary_plot.R",
"richness.R",
"edgePCA.R",
"copy_number_correction.R",
"import_frogs.R",
"prevalence.R",
"compute_niche.R")
urls <- paste0("https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/R/", scripts)
for (url in urls) {
source(url)
}
MERGED run analysis all 16S data from two runs (includes Antarctica single colonies (original and after 1 yr) and Antarctica environmental samples)
Import and format feature table (ASV counts in each sample) and meta date (sample names, )
ps<-qza_to_phyloseq(features="MERGEDtable99.qza")
meta<- read.csv("MERGEDsampleMetaData.txt", sep = "\t", header = TRUE)
row.names(meta)<- meta$SampleID
meta$Year <- as.character(meta$Year)
meta$Station_Year <- paste(meta$Station, meta$Year)
meta<- meta %>% separate(Name, sep="-", into = c("type", "number
"), remove = FALSE)
META <- sample_data(meta)
Import and format SILVA taxonomy:
SILVA 138-99 taxonomy trimmed to match the primers used on samples
#taxonomy <- read.csv("MERGED_taxonomy.csv", stringsAsFactors = FALSE)
taxonomy <- read.csv("silva_primered_MERGED_taxonomy.csv", stringsAsFactors = FALSE)
names(taxonomy) <- c("row", "tax", "Confidence") #change the headers (column names)
row.names(taxonomy) <-taxonomy[[1]] #move the feature ID column to become the row names
taxonomy <- taxonomy[,(-1)] #delete the feature ID column
taxonomy <- separate(taxonomy, tax, c("Domain","Phylum", "Class", "Order", "Family", "Genus", "Species", "D7", "D8", "D9", "D10", "D11", "D12", "D13", "D14"), sep = ";", fill = "right")
taxonomy <- taxonomy[,c(1:7)]
taxonomy$D0 <- with(taxonomy, ifelse(Order == " o__Chloroplast", "Chloroplast", "Bacteria"))
col_order<- c("D0", "Domain","Phylum", "Class", "Order", "Family", "Genus", "Species" )
taxonomy<- taxonomy[, col_order]
taxmat <- as.matrix(taxonomy)
TAX = tax_table(taxmat)
Import and format PhytoREF taxonomy:
PhytoREF trained with same primers
phytoR <- read.table("PhytoRef_tax_str.txt", header = FALSE)
library(splitstackshape)
phytoR_split<- cSplit(phytoR, "V1", "|")
phytoR_split$taxa <- paste(phytoR_split$V1_03, phytoR_split$V1_04, phytoR_split$V1_05, phytoR_split$V1_06 , phytoR_split$V1_07, phytoR_split$V1_08, phytoR_split$V1_09, phytoR_split$V1_10, phytoR_split$V1_11, phytoR_split$V1_12, sep = "; ")
phytoR_split$taxonomy <- paste(phytoR_split$taxa, phytoR_split$V2, sep = " ")
phytoR <- phytoR_split %>% dplyr::select(V1_01, taxonomy)
write.table(phytoR, "phytoRef_tax_final.txt", quote = FALSE, sep = "\t", col.names = FALSE, row.names = FALSE)
PRtaxonomy <- read.csv("phytoRef_MERGED_taxonomy.csv", stringsAsFactors = FALSE)
names(PRtaxonomy) <- c("row", "tax", "Confidence") #change the headers (column names)
row.names(PRtaxonomy) <-PRtaxonomy[[1]] #move the feature ID column to become the row names
PRtaxonomy <- PRtaxonomy[,(-1)] #delete the feature ID column
PRtaxonomy <- separate(PRtaxonomy, tax, c("Domain","Phylum", "Class", "Order", "D5", "D6", "D7", "D8", "D9", "D10", "D11", "D12", "D13"), sep = ";", fill = "right")
#taxonomy <- taxonomy[,c(1:7)]
#col_order<- c("D0", "Domain","Phylum", "Class", "Order", "Family", "Genus", "Species" )
#taxonomy<- taxonomy[, col_order]
PRtaxmat <- as.matrix(PRtaxonomy)
PRTAX = tax_table(PRtaxmat)
Make phyloseq object:
ps = merge_phyloseq(ps, TAX, META)
Plot relative abundance as stacked bar plot (all samples including chloroplast sequences) to visualize the relative abundance of chloroplast and bacterial sequences in each sample:
ps<- subset_taxa(ps, D0 %in% c("Bacteria", "Chloroplast") )
ps <- subset_samples(ps, Dataset %in% c("PantCol", "Antarctica"))
psra<- transform_sample_counts(ps, function(OTU) 100* OTU/sum(OTU))
glomD1<- tax_glom(psra, 'D0')
taxabarplot<-plot_bar(glomD1, x= "Name", fill = "D0") + scale_y_continuous(expand = c(0, 0)) + ggtitle("") + theme(legend.title=element_blank()) + geom_bar(aes(fill=D0), stat="identity", position="stack", width =0.9) +theme_classic() + theme(text = element_text(size=14))+theme(axis.text.x = element_text(angle = 90)) + xlab("Sample") + ylab("Relative Abundance (%)") + theme(text = element_text(size=14)) + scale_fill_manual(values=c("lightgrey", "#7BB03B"), name = "") + xlab("") + facet_grid(~Dataset + Year, scales = "free")
taxabarplot
Supplemental Figure 2 ggsave(“chloroplast_barplot.pdf”, width = 10, height = 5)
Get summary statistics about the amount of chloroplast sequences in single-colony samples (these are all Phaeocystis/host and not data that are particularly useful)”
pantcol_chloro_stats <- subset_samples(glomD1, Dataset == "PantCol")
chloro_stat_melt <- psmelt(pantcol_chloro_stats) %>% filter(D0 == "Chloroplast")
min(chloro_stat_melt$Abundance)
## [1] 52.67964
mean(chloro_stat_melt$Abundance)
## [1] 72.86961
max(chloro_stat_melt$Abundance)
## [1] 96.43276
Create a phyloseq object with only chloroplast sequences and switch out taxonomy from SILVA to PhytoRef:
ps_chloro <- subset_taxa(ps, Order == " o__Chloroplast" )
ps_chlorotax <- merge_phyloseq(ps_chloro, PRTAX)
Create a phyloseq object with only bacterial sequences:
ps_nochloro <- subset_taxa(ps, Order != " o__Chloroplast" & Domain != "Unassigned")
ggrare(ps_nochloro, step = 1000, color = "Dataset", label = "Name", se = FALSE)
## rarefying sample A1-A2-4971
## rarefying sample A10-A14-4971
## rarefying sample A11-A15-4971
## rarefying sample A12-A20-4971
## rarefying sample A2-A5-4971
## rarefying sample A3-A6-4971
## rarefying sample A4-A8-4971
## rarefying sample A5-A9-4971
## rarefying sample A6-A10-4971
## rarefying sample A7-A11-4971
## rarefying sample A8-A12-4971
## rarefying sample A9-A13-4971
## rarefying sample B1-A21-4971
## rarefying sample B10-A30-4971
## rarefying sample B11-A31-4971
## rarefying sample B12-A32-4971
## rarefying sample B2-A22-4971
## rarefying sample B3-A23-4971
## rarefying sample B4-A24-4971
## rarefying sample B5-A25-4971
## rarefying sample B6-A26-4971
## rarefying sample B7-A27-4971
## rarefying sample B8-A28-4971
## rarefying sample B9-A29-4971
## rarefying sample C1-A33-4971
## rarefying sample C2-A34-4971
## rarefying sample C3-A35-4971
## rarefying sample Ant1
## rarefying sample Ant10
## rarefying sample Ant11
## rarefying sample Ant12
## rarefying sample Ant13
## rarefying sample Ant14
## rarefying sample Ant15
## rarefying sample Ant16
## rarefying sample Ant17
## rarefying sample Ant2
## rarefying sample Ant3
## rarefying sample Ant4
## rarefying sample Ant5
## rarefying sample Ant6
## rarefying sample Ant7
## rarefying sample Ant8
## rarefying sample Ant9
## rarefying sample col1
## rarefying sample col10
## rarefying sample col11
## rarefying sample col12
## rarefying sample col13
## rarefying sample col14
## rarefying sample col2
## rarefying sample col3
## rarefying sample col4
## rarefying sample col5
## rarefying sample col6
## rarefying sample col7
## rarefying sample col8
## rarefying sample col9
Rarefaction for Antarctica colonies:
PSColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") & Name != "A30")
colony_bacteria_rare<- ggrare(PSColony, step = 1000, color = "Station_Year", se = FALSE) + theme_test() + scale_color_manual(values= c("#005694","#FFC317", "#5FC8D8" ))
## rarefying sample A1-A2-4971
## rarefying sample A10-A14-4971
## rarefying sample A11-A15-4971
## rarefying sample A12-A20-4971
## rarefying sample A2-A5-4971
## rarefying sample A3-A6-4971
## rarefying sample A4-A8-4971
## rarefying sample A5-A9-4971
## rarefying sample A6-A10-4971
## rarefying sample A7-A11-4971
## rarefying sample A8-A12-4971
## rarefying sample A9-A13-4971
## rarefying sample B1-A21-4971
## rarefying sample B11-A31-4971
## rarefying sample B12-A32-4971
## rarefying sample B2-A22-4971
## rarefying sample B3-A23-4971
## rarefying sample B4-A24-4971
## rarefying sample B5-A25-4971
## rarefying sample B6-A26-4971
## rarefying sample B7-A27-4971
## rarefying sample B8-A28-4971
## rarefying sample B9-A29-4971
## rarefying sample C1-A33-4971
## rarefying sample C2-A34-4971
## rarefying sample C3-A35-4971
## rarefying sample col1
## rarefying sample col10
## rarefying sample col11
## rarefying sample col12
## rarefying sample col13
## rarefying sample col14
## rarefying sample col2
## rarefying sample col3
## rarefying sample col4
## rarefying sample col5
## rarefying sample col6
## rarefying sample col7
## rarefying sample col8
## rarefying sample col9
colony_bacteria_rare
ant_enviro<- subset_samples(ps_nochloro, Dataset %in% c("Antarctica") & Year == "2021")
field_bacteria_rare<-ggrare( ant_enviro, step = 1000, color = "type", se = FALSE) + theme_test() + scale_color_manual(values= c("#1E3F1C","#8BC89F"))
## rarefying sample Ant1
## rarefying sample Ant10
## rarefying sample Ant11
## rarefying sample Ant12
## rarefying sample Ant13
## rarefying sample Ant14
## rarefying sample Ant15
## rarefying sample Ant16
## rarefying sample Ant17
## rarefying sample Ant2
## rarefying sample Ant3
## rarefying sample Ant4
## rarefying sample Ant5
## rarefying sample Ant6
## rarefying sample Ant7
## rarefying sample Ant8
## rarefying sample Ant9
field_bacteria_rare
colony_bacteria_rare + field_bacteria_rare
Supplemental Figure 3
ASV Richness
totalOTU <- data.frame(otu_table(ps_nochloro))
totalOTU$rowsu <- rowSums(totalOTU)
totalOTUnotzero <- totalOTU %>% filter(rowsu >1)
dim(totalOTUnotzero)
## [1] 1429 59
plugin <- ps_nochloro %>%
estimate_richness(measures = "Observed") %$% Observed
Dataset <- ps_nochloro %>% sample_data %$% Dataset
Station <- ps_nochloro %>% sample_data %$% Station
Replicate <- ps_nochloro %>% sample_data %$% Rep
Month <- ps_nochloro %>% sample_data %$% Month
Year <- ps_nochloro %>% sample_data %$% Year
richness<- data.frame(plugin, Dataset, Station, Replicate, Month, Year )
names(richness) <- c("richness", "Dataset", "Station", "Replicate", "Month", "Year")
richness %>%group_by(Dataset, Year) %>% summarize(mean = mean(richness), min = min(richness), max = max(richness))
## # A tibble: 3 × 5
## # Groups: Dataset [2]
## Dataset Year mean min max
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Antarctica 2021 199. 140 317
## 2 PantCol 2021 44.1 28 67
## 3 PantCol 2023 24.9 18 31
RichPlot<- richness %>% filter(Dataset == "PantCol" ) %>% ggplot( aes(x=Station, y=richness, fill = Year))+ facet_grid(~ Year + Station, scales = "free") +geom_boxplot() + theme_bw() + scale_fill_manual(values = c( "#75D1B7", "#256F64", "black", "grey")) + theme(text = element_text(size=14)) +ylab("Observed Richness") +ggtitle("")+ theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) +xlab("")#+ limy(0, 90)
RichPlot
ggsave(“richness_antarctica_colonies.pdf”, width = 5, height =3)
Figure 2A
Richness summary stats for manuescript: - need total unique ASVs for each sample type plus the mean and range per samples
PSColony_field<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") & Name != "A30" &Year == 2021)
PSColony_field = filter_taxa(PSColony_field, function(x) sum(x) > 1, TRUE)
PSColony_field
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 241 taxa and 26 samples ]
## sample_data() Sample Data: [ 26 samples by 13 sample variables ]
## tax_table() Taxonomy Table: [ 241 taxa by 8 taxonomic ranks ]
-> 241 unique ASVs across all field-collected colony microbiomes
plugin <- PSColony_field %>%
estimate_richness(measures = "Observed") %$% Observed
Dataset <- PSColony_field %>% sample_data %$% Dataset
Station <- PSColony_field %>% sample_data %$% Station
Replicate <- PSColony_field %>% sample_data %$% Rep
Month <- PSColony_field %>% sample_data %$% Month
Year <- PSColony_field %>% sample_data %$% Year
richness<- data.frame(plugin, Dataset, Station, Replicate, Month, Year )
names(richness) <- c("richness", "Dataset", "Station", "Replicate", "Month", "Year")
min(richness$richness)
## [1] 28
mean(richness$richness)
## [1] 43.80769
max(richness$richness)
## [1] 67
^ min, mean, max per field-collected colony
Cultured-colonies:
PSColony_culture<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") & Name != "A30" & Year == 2023)
PSColony_culture = filter_taxa(PSColony_culture, function(x) sum(x) > 1, TRUE)
PSColony_culture
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 74 taxa and 14 samples ]
## sample_data() Sample Data: [ 14 samples by 13 sample variables ]
## tax_table() Taxonomy Table: [ 74 taxa by 8 taxonomic ranks ]
-> 74 ASVs total across all cultured colonies
plugin <- PSColony_culture %>%
estimate_richness(measures = "Observed") %$% Observed
Dataset <- PSColony_culture %>% sample_data %$% Dataset
Station <- PSColony_culture %>% sample_data %$% Station
Replicate <- PSColony_culture %>% sample_data %$% Rep
Month <- PSColony_culture %>% sample_data %$% Month
Year <- PSColony_culture %>% sample_data %$% Year
richness<- data.frame(plugin, Dataset, Station, Replicate, Month, Year )
names(richness) <- c("richness", "Dataset", "Station", "Replicate", "Month", "Year")
min(richness$richness)
## [1] 18
mean(richness$richness)
## [1] 24.92857
max(richness$richness)
## [1] 31
^ min, mean, and max ASVs counts in cultured-colony microbiomes
First look Barplot (at Order level):
ps_nochloro_RA<- transform_sample_counts(ps_nochloro, function(OTU) 100* OTU/sum(OTU))
ps_nochloro_RA_glomO<- tax_glom(ps_nochloro_RA, 'Order')
taxabarplot<-plot_bar(ps_nochloro_RA_glomO, x= "Name", fill = "Order") + scale_y_continuous(expand = c(0, 0)) + ggtitle("") + theme(legend.title=element_blank()) + geom_bar(aes(fill=Order), stat="identity", position="stack", width =0.9) +theme_classic() + theme(text = element_text(size=14))+theme(axis.text.x = element_text(angle = 90)) + xlab("Sample") + ylab("Relative Abundance (%)") + theme(text = element_text(size=14)) + scale_fill_manual(values= rep( colors, 30))+ xlab("") + facet_grid(~Dataset, scales= "free_x")
taxabarplot+ theme(legend.position="none")
Colonies barplot
PSColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") & Name != "A30")
ps_colony_RA<- transform_sample_counts(PSColony, function(OTU) 100* OTU/sum(OTU))
ps_colony_RA_glomO<- tax_glom(ps_colony_RA, 'Order')
ps_colony_RA_glomO_F = filter_taxa(ps_colony_RA_glomO , function(x) sum(x) > 1, TRUE)
mergemelt<- psmelt(ps_colony_RA_glomO_F)
mergemelt$Order<-str_sub(mergemelt$Order, 5, str_length(mergemelt$Order))
mergemelt$Order[mergemelt$Abundance < 1] <- "z< 1% abund."
spatial_plot <- ggplot(data=mergemelt, aes(x=Name, y=Abundance, fill=Order)) + facet_grid(~ Year + Station, scales = "free")
spatial_plot + geom_bar(aes(), stat="identity", position="stack") +
theme_classic() + theme(legend.position="bottom") + scale_fill_manual(values =c( rep(colors30, 2))) + guides(fill=guide_legend(nrow=5)) + theme(legend.title =element_blank())
ggsave(“antarctica_colonies.pdf”, width = 5, height =5)
Figure 2B
PSColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") & Name != "A30")
OTU4clr<- data.frame(t(data.frame(otu_table(PSColony))))
row.names(OTU4clr) <- str_remove(row.names(OTU4clr), "X")
row.names(OTU4clr) <- gsub("\\.", "-", row.names(OTU4clr))
OTUs.clr <- codaSeq.clr(OTU4clr + 0.5, samples.by.row=TRUE)
OTU2 <- otu_table(as.matrix(OTUs.clr), taxa_are_rows = FALSE)
psCLR <- phyloseq(OTU2,TAX,META)
ordu = ordinate(psCLR, "PCoA", "euclidean")
p<-plot_ordination(psCLR, ordu)+theme_bw() + theme(text = element_text(size=14)) + geom_hline(yintercept = 0, linetype = "dashed", color = "lightgrey") + geom_vline(xintercept = 0, linetype = "dashed", color = "lightgrey") + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) +geom_point(aes(fill=Year, shape = Station), size =3) + scale_shape_manual(values= c(21,24)) +scale_fill_manual(values=colors[-1]) +guides(fill = guide_legend(override.aes=list(shape=21)))
p
ggsave(“ant_col_PCoA.pdf”, width = 5, height =3)
Supplemental Figure 4
PERMANOVA by year (field-collected v. cultured colony microbiomes)
OTUs<- data.frame(otu_table(psCLR))
meta4year <- meta[row.names(meta) %in% row.names(OTUs),]
meta4year$Sample.Type <- factor(meta4year$Year , levels = c("2021", "2023"))
set.seed(1)
adonis2(vegdist(OTUs, method = "euclidean") ~ Sample.Type, data = meta4year)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = vegdist(OTUs, method = "euclidean") ~ Sample.Type, data = meta4year)
## Df SumOfSqs R2 F Pr(>F)
## Sample.Type 1 3714.3 0.53776 44.209 0.001 ***
## Residual 38 3192.6 0.46224
## Total 39 6906.9 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Field-collected and cultured colony microbiomes are significantly different
PCoA of just field-collected colonies to check for differences between stations:
AntColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") & Name != "A30" & Year == "2021")
OTU4clr<- data.frame(t(data.frame(otu_table(AntColony))))
row.names(OTU4clr) <- str_remove(row.names(OTU4clr), "X")
row.names(OTU4clr) <- gsub("\\.", "-", row.names(OTU4clr))
OTUs.clr <- codaSeq.clr(OTU4clr + 0.5, samples.by.row=TRUE)
OTU2 <- otu_table(as.matrix(OTUs.clr), taxa_are_rows = FALSE)
psCLR <- phyloseq(OTU2,TAX,META)
ordu = ordinate(psCLR, "PCoA", "euclidean")
p<-plot_ordination(psCLR, ordu)+theme_bw() + theme(text = element_text(size=14)) + geom_hline(yintercept = 0, linetype = "dashed", color = "lightgrey") + geom_vline(xintercept = 0, linetype = "dashed", color = "lightgrey") + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) +geom_point(aes(fill=Station, shape = Dataset), size =3) + scale_shape_manual(values= c(21,21)) +scale_fill_manual(values=colors[-1]) +guides(fill = guide_legend(override.aes=list(shape=21)))
p
PERMANOVA by station :
OTUs<- data.frame(otu_table(psCLR))
meta4stat <- meta[row.names(meta) %in% row.names(OTUs),]
meta4stat$Sample.Type <- factor(meta4stat$Station , levels = c("400", "600"))
set.seed(1)
adonis2(vegdist(OTUs, method = "euclidean") ~ Sample.Type, data = meta4stat)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = vegdist(OTUs, method = "euclidean") ~ Sample.Type, data = meta4stat)
## Df SumOfSqs R2 F Pr(>F)
## Sample.Type 1 158.22 0.05458 1.3855 0.058 .
## Residual 24 2740.78 0.94542
## Total 25 2899.00 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Stations are not significantly different
Prepare data frame:
AntColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") & Region == "Antarctica" & Name != "A30")
nchF = filter_taxa(AntColony, function(x) sum(x) > 1, TRUE)
nchfffra<- transform_sample_counts(nchF, function(OTU) 100* OTU/sum(OTU))
a_ra <- psmelt(nchfffra)
altero<- subset_taxa(nchfffra, Order == " o__Alteromonadales")
sw_ra <- psmelt(altero)
sw_long<- sw_ra %>% dplyr::select(OTU, Name, Order, Family, Genus, Abundance, Station)
sw_long<- sw_long[rev(order(sw_long$Genus)),]
factord<- unique(sw_long$OTU)
sw_long$OTU <- factor(sw_long$OTU, levels = factord)
sw_long$Genus<-str_sub(sw_long$Genus, 5, str_length(sw_long$Genus))
heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
geom_tile(aes( fill=Abundance)) +
theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
panel.background = element_blank()) +
ylab("") +
xlab("") +
scale_size(name = "Relative\nabundance")+ facet_grid(~Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$Genus) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c()
heat_plot
Supplemental Figure 5
oceano<- subset_taxa(nchfffra, Order == " o__Oceanospirillales")
sw_ra <- psmelt(oceano)
sw_long<- sw_ra %>% dplyr::select(OTU, Name, Order, Family, Genus, Abundance, Station, Year)
sw_long<- sw_long[rev(order(sw_long$Genus)),]
factord<- unique(sw_long$OTU)
sw_long$OTU <- factor(sw_long$OTU, levels = factord)
sw_long$Genus<-str_sub(sw_long$Genus, 5, str_length(sw_long$Genus))
heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
geom_tile(aes( fill=Abundance)) +
theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
panel.background = element_blank()) +
ylab("") +
xlab("") +
scale_size(name = "Relative\nabundance")+ facet_grid(~Year+Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$OTU) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c()
heat_plot
heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
geom_tile(aes( fill=Abundance)) +
theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
panel.background = element_blank()) +
ylab("") +
xlab("") +
scale_size(name = "Relative\nabundance")+ facet_grid(~Year+Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$Genus) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c()
heat_plot
ggsave(“oceano_heatplot.pdf”, width = 7, height = 5)
Supplemental FIgure 8
rhodo<- subset_taxa(nchfffra, Order == " o__Rhodobacterales")
sw_ra <- psmelt(rhodo)
sw_long<- sw_ra %>% dplyr::select(OTU, Name, Order, Family, Genus, Abundance, Station, Year)
sw_long<- sw_long[rev(order(sw_long$Genus)),]
factord<- unique(sw_long$OTU)
sw_long$OTU <- factor(sw_long$OTU, levels = factord)
sw_long$Genus<-str_sub(sw_long$Genus, 5, str_length(sw_long$Genus))
heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
geom_tile(aes( fill=Abundance)) +
theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
panel.background = element_blank()) +
ylab("") +
xlab("") +
scale_size(name = "Relative\nabundance")+ facet_grid(~Year+Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$Genus) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c()
heat_plot
ggsave(“Rhodo_heatplot.pdf”, width = 7, height = 5) Supplemental Figure 7
sulfito <- sw_long %>% filter(Year == "2023") %>% filter(OTU == "8ee2658ac7dd8e0f20d396c3d3e1a539")
min(sulfito$Abundance)
## [1] 12.24997
mean(sulfito$Abundance)
## [1] 22.04735
max(sulfito$Abundance)
## [1] 30.79332
^ summary stats for sulfitobacter rel abunandance in cultured-colony microbiomes
cellv<- subset_taxa(nchfffra, Order == " o__Cellvibrionales")
sw_ra <- psmelt(cellv)
sw_long<- sw_ra %>% dplyr::select(OTU, Name, Order, Family, Genus, Abundance, Station, Year)
sw_long<- sw_long[rev(order(sw_long$Genus)),]
factord<- unique(sw_long$OTU)
sw_long$OTU <- factor(sw_long$OTU, levels = factord)
sw_long$Genus<-str_sub(sw_long$Genus, 5, str_length(sw_long$Genus))
heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
geom_tile(aes( fill=Abundance)) +
theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
panel.background = element_blank()) +
ylab("") +
xlab("") +
scale_size(name = "Relative\nabundance")+ facet_grid(~Year+Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$Genus) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c()
heat_plot
heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
geom_tile(aes( fill=Abundance)) +
theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
panel.background = element_blank()) +
ylab("") +
xlab("") +
scale_size(name = "Relative\nabundance")+ facet_grid(~Year+Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$OTU) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c()
heat_plot
bc13a8f6afa703399c40d5879cc68221 = Sar92 clade ASV <- get summary stats
sw_long %>% filter(OTU == "bc13a8f6afa703399c40d5879cc68221") %>% summarize(mean = mean(Abundance), min = min(Abundance), max = max(Abundance))
## mean min max
## 1 2.78122 0 18.11352
caulo<- subset_taxa(nchfffra, Order == " o__Caulobacterales")
sw_ra <- psmelt(caulo)
sw_long<- sw_ra %>% dplyr::select(OTU, Name, Order, Family, Genus, Abundance, Station, Year)
sw_long<- sw_long[rev(order(sw_long$Genus)),]
factord<- unique(sw_long$OTU)
sw_long$OTU <- factor(sw_long$OTU, levels = factord)
sw_long$Genus<-str_sub(sw_long$Genus, 5, str_length(sw_long$Genus))
heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
geom_tile(aes( fill=Abundance)) +
theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
panel.background = element_blank()) +
ylab("") +
xlab("") +
scale_size(name = "Relative\nabundance")+ facet_grid(~Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$Genus) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c() +facet_grid(~Year+Station, scales = "free_x", space = "free")
heat_plot
algimonas <- sw_long %>% filter(Year =="2023") %>% filter(OTU %in% c("e7f391400f0a3e23b1c732d60e33016c", "715318dadbd602bb7863947527c67e3e")) %>% group_by(Name) %>% summarize(total = sum(Abundance))
min(algimonas$total)
## [1] 6.431744
mean(algimonas$total)
## [1] 18.20647
max(algimonas$total)
## [1] 29.7241
sd(algimonas$total)
## [1] 6.349521
^ summary stats for algimonas in cultured-colony microbiomes
sphingo<- subset_taxa(nchfffra, Order == " o__Sphingomonadales")
sw_ra <- psmelt(sphingo)
sw_long<- sw_ra %>% dplyr::select(OTU, Name, Order, Family, Genus, Abundance, Station, Year)
sw_long<- sw_long[rev(order(sw_long$Genus)),]
factord<- unique(sw_long$OTU)
sw_long$OTU <- factor(sw_long$OTU, levels = factord)
sw_long$Genus<-str_sub(sw_long$Genus, 5, str_length(sw_long$Genus))
heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
geom_tile(aes( fill=Abundance)) +
theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
panel.background = element_blank()) +
ylab("") +
xlab("") +
scale_size(name = "Relative\nabundance")+ facet_grid(~Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$Genus) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c() +facet_grid(~Year+Station, scales = "free_x", space = "free")
heat_plot
heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
geom_tile(aes( fill=Abundance)) +
theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
panel.background = element_blank()) +
ylab("") +
xlab("") +
scale_size(name = "Relative\nabundance")+ facet_grid(~Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$OTU) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c() +facet_grid(~Year+Station, scales = "free_x", space = "free")
heat_plot
Blast results for the the Sphingomonas ASV abundant in field-collected colony microbiomes:
f1ea98d49baf3dd5dd4b2456017e5d48 GCAGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCAATGCCGCGTGAGTGATGAAGGCCTTAGGGTTGTAAAGCTCTTTTACCAGGGATGATAATGACAGTACCTGGAGAATAAGCTCCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGAGCTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGCTTTTCAAGTCAGGGGTGAAATCCCGGGGCTCAACCCCGGAACTGCCCTTGAAACTGGATGGCTAGAATACTGGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAAGAACACCAGTGGCGAAGGCGACTCACTGGACAGTTATTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATAC
Erythrobacter - previously found in marine sediment
OTU4chloro<- (data.frame(otu_table(ps_chloro)))
names(OTU4chloro) <- gsub("\\.", "-", names(OTU4chloro))
PRtaxdf <- PRtaxonomy %>% filter(row.names(PRtaxonomy) %in% row.names(OTU4chloro))
PRtaxmat <- as.matrix(PRtaxdf)
PRTAX = tax_table(PRtaxmat)
OTU2 <- otu_table(as.matrix(OTU4chloro), taxa_are_rows = TRUE)
ps_chlorotax <- phyloseq(OTU2,PRTAX,META)
ps_chloro_RA<- transform_sample_counts(ps_chlorotax, function(OTU) 100* OTU/sum(OTU))
ps_chloro_RA_glomO<- tax_glom(ps_chloro_RA, 'Order')
pouch_col_chloro <- subset_samples(ps_chloro_RA_glomO, Dataset %in% c("Antarctica") & Year == "2021")
nchF = filter_taxa(pouch_col_chloro, function(x) sum(x) > 1, TRUE)
a_ra <- psmelt(nchF)
a4long<- a_ra %>% dplyr::select(OTU, Sample, Name, Order, Class, Abundance, Station, Year)
a4long<- a4long[rev(order(a4long$Order)),]
factord<- unique(a4long$OTU)
a4long$OTU <- factor(a4long$OTU, levels = factord)
extraction_log<- read.csv("antRNA_extractions .csv")
FTextractions <- extraction_log %>% filter(SampleType =="FT")
CTDextractions <- extraction_log %>% filter(SampleType =="CTD")
ftlog <- read.csv("ftlog.csv")
ftlog$Name<- paste0("FT-", ftlog$Sample)
ctdlog <- read.csv("ctdlog.csv") %>% filter(Depth == 0)
ctdlog$Name <- paste0("CTD-", ctdlog$Sample_ID)
ftlog_notprocessed<- ftlog %>% select(Name, start_lat_degdec, start_long_degdec) %>% filter(Name %ni% a4long$Name)
names(ftlog_notprocessed)<- c("Name", "Lat", "Long")
ftlog_vols <- ftlog %>% select(Name, start_lat_degdec, start_long_degdec, vol_start, vol_end) %>% filter(Name %in% a4long$Name)
ftlog_vols$vol <- ftlog_vols$vol_end - ftlog_vols$vol_start
min(ftlog_vols$vol)
## [1] 5.3
mean(ftlog_vols$vol)
## [1] 11.13333
max(ftlog_vols$vol)
## [1] 18
ftlog<- ftlog %>% select(Name, start_lat_degdec, start_long_degdec) %>% filter(Name %in% a4long$Name)
names(ftlog)<- c("Name", "Lat", "Long")
ctdlog<- ctdlog %>% select(Name, Lat, Long) %>% filter(Name %in% a4long$Name)
sample_locs <- rbind(ftlog, ctdlog)
a4long_locs<- left_join(a4long, sample_locs, by = "Name")
wider_a4long_locs<- a4long_locs %>% select(Name, Order, Abundance, Lat, Long) %>% pivot_wider( names_from = c("Order"), values_from = c("Abundance"))
^ volumes filtered per flow-through/underway sample
library(ggspatial)
library(rnaturalearth)
library(rnaturalearthdata)
##
## Attaching package: 'rnaturalearthdata'
## The following object is masked from 'package:rnaturalearth':
##
## countries110
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(scatterpie)
scatter_colors <- c("#ef3c23", "#cfe5cc", "#fac92c" ,"darkgrey", "#215F38", "#0181BB")
world <- ne_countries(scale = "medium", returnclass = "sf")
strainlist <- unique(a4long_locs$Order)
rna <- read.csv("ctdlog.csv")
FT_scatterpie_plot<-ggplot(data = world) + geom_sf(lwd = 0) + theme_void() + geom_scatterpie(data=wider_a4long_locs %>% filter(str_detect(Name, "FT")),aes(x=Long, y=Lat, group = Name, r=0.2), cols = strainlist, color=NA, alpha=.8 ) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + xlab("")+ ylab("")+ggtitle("Underway") +
scale_fill_manual(values = scatter_colors) +
coord_sf(xlim = c(-70, -65), ylim = c(-66, -62), expand = TRUE)+ theme(legend.position = "bottom") +
geom_point(data = ftlog_notprocessed, aes (x= Long, y= Lat), color = "red", size = 0.1) +
geom_point(data = (rna %>% filter(Study == "Grid")), aes (x= Long, y= Lat), color = "black", size = 0.2)
CTD_scatterpie_plot<-ggplot(data = world) + geom_sf(lwd = 0) + theme_void() + geom_scatterpie(data=wider_a4long_locs %>% filter(str_detect(Name, "CTD")),aes(x=Long, y=Lat, group = Name, r=0.5), cols = strainlist, color=NA, alpha=.8 ) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + xlab("")+ ylab("")+ggtitle("CTD/Niskin") + scale_fill_manual(values = scatter_colors) + coord_sf(xlim = c(-73, -56), ylim = c(-68, -60), expand = TRUE) + theme(legend.position = "none") + geom_point(data = (rna %>% filter(Study == "Grid")), aes (x= Long, y= Lat), color = "black", size = 0.1)
CTD_scatterpie_plot + FT_scatterpie_plot +plot_layout(guides = "collect") &
theme(legend.position='bottom')
Figure 3 A-B
PSColony<- subset_samples(ps_nochloro, Dataset %in% c("Antarctica") & Name != "A30" & Year !=2023)
ps_colony_RA<- transform_sample_counts(PSColony, function(OTU) 100* OTU/sum(OTU))
ps_colony_RA_glomO<- tax_glom(ps_colony_RA, 'Order')
ps_colony_RA_glomO_F = filter_taxa(ps_colony_RA_glomO , function(x) sum(x) > 1, TRUE)
mergemelt<- psmelt(ps_colony_RA_glomO_F)
mergemelt$Order<-str_sub(mergemelt$Order, 5, str_length(mergemelt$Order))
mergemelt$Order[mergemelt$Abundance < 1] <- "z< 1% abund."
bact_a4long_locs<- left_join(mergemelt, sample_locs, by = "Name")
bact_wider_a4long_locs<- bact_a4long_locs %>% select(Name, Order, Abundance, Lat, Long) %>% group_by(Name, Order, Lat, Long) %>% summarize(Abundance = sum(Abundance)) %>% ungroup() %>% pivot_wider( names_from = c("Order"), values_from = c("Abundance"), values_fill = 0)
scatter_colors <- c(colors30[1:3], "#F95700", colors30[7:8], colors30[12], colors30[15:16], colors30[21])
strainlist <- c("Flavobacteriales", "Oceanospirillales" ,"Alteromonadales" , "SAR11_clade", "Thiomicrospirales", "Cellvibrionales" , "Rhodobacterales" , "SAR86_clade","Burkholderiales", "Chitinophagales" )
bact_wider_a4long_locs<- data.frame(bact_wider_a4long_locs)
FT_scatterpie_plot<-ggplot(data = world) + geom_sf(lwd = 0) + theme_void() + geom_scatterpie(data=bact_wider_a4long_locs %>% filter(str_detect(Name, "FT")),aes(x=Long, y=Lat, group = Name, r=0.2), cols = strainlist, color=NA, alpha=.8 ) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + xlab("")+ ylab("")+ggtitle("Flow-through") +
scale_fill_manual(values = scatter_colors) +
coord_sf(xlim = c(-70, -65), ylim = c(-66, -62), expand = TRUE) + theme(legend.position = "bottom") +
geom_point(data = (rna %>% filter(Study == "Grid")), aes (x= Long, y= Lat), color = "black", size = 0.2)
CTD_scatterpie_plot<-ggplot(data = world) + geom_sf(lwd = 0) + theme_void() + geom_scatterpie(data=bact_wider_a4long_locs %>% filter(str_detect(Name, "CTD")),aes(x=Long, y=Lat, group = Name, r=0.5), cols = strainlist, color=NA, alpha=.8 ) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + xlab("")+ ylab("")+ggtitle("CTD/Niskin") + scale_fill_manual(values = scatter_colors) + coord_sf(xlim = c(-73, -56), ylim = c(-68, -60), expand = TRUE) + theme(legend.position = "none") + geom_point(data = (rna %>% filter(Study == "Grid")), aes (x= Long, y= Lat), color = "black", size = 0.1)
CTD_scatterpie_plot + FT_scatterpie_plot + plot_layout(guides = "collect") &
theme(legend.position='bottom')
Figure 3 C-D
PSColony4corr<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") & Name != "A30" & Year !=2023)
ps_colony_RA4corr<- transform_sample_counts(PSColony4corr, function(OTU) 100* OTU/sum(OTU))
ps_colony_RA4corr <- filter_taxa(ps_colony_RA4corr, function(x) sum(x) > 1, TRUE)
melted_Colony4Corr <- psmelt(ps_colony_RA4corr)
colAlteromonas4orr <- melted_Colony4Corr %>% filter(Order == " o__Alteromonadales")
AlteromonasASVs <- unique(colAlteromonas4orr$OTU)
Ant4corr<- subset_samples(ps_nochloro, Dataset %in% c("Antarctica") & Name != "A30" & Year !=2023)
Ant4corr<- transform_sample_counts(Ant4corr, function(OTU) 100* OTU/sum(OTU))
melted_Ant4corrr <- psmelt(Ant4corr)
altero_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% AlteromonasASVs)
#pivot wider
wider_altero<- altero_Ant4corrr %>% select(Name, OTU, Abundance) %>% pivot_wider( names_from = c("OTU"), values_from = c("Abundance"))
#join with Phaeo abundance
longer_altero<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(wider_altero) %>% pivot_longer(!c(Name, " Prymnesiophyceae"), names_to = "bact_ASV", values_to = "rel_abund")
## Joining with `by = join_by(Name)`
names(longer_altero)[2] <- "Prymnesiophyceae"
Correlation between all ALteromonadales ASVs in both colony and environmental samples to phaeocystis ASVs in environmental samples:
longer_altero %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% ggplot(aes(x=Prymnesiophyceae, y=rel_abund, color = source)) + geom_point() +facet_wrap(~bact_ASV)
The most abundant colony-associated ASV:
longer_altero %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% filter(bact_ASV =="6c1096cb4d53196f3b70850af15c7b35") %>% ggplot(aes(x=Prymnesiophyceae, y=rel_abund, color = source)) + geom_point() +facet_wrap(~bact_ASV)
####Paraglaciecola Sum up all paraglaciecola ASVs and plot:
altero_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% AlteromonasASVs)
summed_genera <- altero_Ant4corrr %>% group_by(Name, Genus) %>% summarize(total_abund = sum(Abundance)) %>% filter( Genus == " g__Paraglaciecola")
## `summarise()` has grouped output by 'Name'. You can override using the
## `.groups` argument.
#join with Phaeo abundance
longer_altero<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(summed_genera)
## Joining with `by = join_by(Name)`
names(longer_altero)[2] <- "Prymnesiophyceae"
Paraglaciecola <-longer_altero %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% ggplot(aes(x=Prymnesiophyceae, y=total_abund, fill = source, shape = source)) + geom_point(size = 3) + scale_shape_manual(values = c(21, 24)) +theme_test() + ggtitle("Paraglaciecola sp.") + geom_smooth(method = "lm", se = FALSE, aes(color = source)) + scale_fill_manual(values = c("lightgrey", "black" )) + scale_color_manual(values = c("darkgrey", "black" ))
Paraglaciecola
## `geom_smooth()` using formula = 'y ~ x'
CTD lm and significance:
data = longer_altero %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% filter(source == "CTD")
summary(lm(formula = total_abund ~ Prymnesiophyceae, data = data ))
##
## Call:
## lm(formula = total_abund ~ Prymnesiophyceae, data = data)
##
## Residuals:
## 1 2 3 4 5
## 0.09972 -0.27477 1.87663 -2.75799 1.05640
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.18753 1.69346 -0.701 0.534
## Prymnesiophyceae 0.14951 0.04697 3.183 0.050 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.027 on 3 degrees of freedom
## Multiple R-squared: 0.7716, Adjusted R-squared: 0.6954
## F-statistic: 10.13 on 1 and 3 DF, p-value: 0.04997
Flowthrough lm and significance:
data = longer_altero %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% filter(source == "FT")
summary(lm(formula = total_abund ~ Prymnesiophyceae, data = data ))
##
## Call:
## lm(formula = total_abund ~ Prymnesiophyceae, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.149 -3.813 -0.034 1.679 8.334
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.3266 4.1028 -1.542 0.1541
## Prymnesiophyceae 0.3645 0.1091 3.339 0.0075 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.098 on 10 degrees of freedom
## Multiple R-squared: 0.5272, Adjusted R-squared: 0.4799
## F-statistic: 11.15 on 1 and 10 DF, p-value: 0.007501
Sum up Colwellia ASVs and plot:
altero_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% AlteromonasASVs)
summed_genera <- altero_Ant4corrr %>% group_by(Name, Genus) %>% summarize(total_abund = sum(Abundance)) %>% filter( Genus == " g__Colwellia")
## `summarise()` has grouped output by 'Name'. You can override using the
## `.groups` argument.
#join with Phaeo abundance
longer_altero<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(summed_genera)
## Joining with `by = join_by(Name)`
names(longer_altero)[2] <- "Prymnesiophyceae"
Colwellia <- longer_altero %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% ggplot(aes(x=Prymnesiophyceae, y=total_abund, fill = source, shape = source)) + geom_point(size = 3) + scale_shape_manual(values = c(21, 24)) +theme_test() + ggtitle("Colwellia sp.") + scale_fill_manual(values = c("lightgrey", "black" )) + scale_color_manual(values = c("darkgrey", "black" ))
Colwellia
FT-10 53.369710 g__Colwellia 7.29 % (end of 600 line) FT-18 16.081232 g__Colwellia 13.48 % (landward end of flowthroughs on 400 line) FT-7 16.412716 g__Colwellia 19.24 % (landward end of flowthroughs on 600 line)
REMEMBER - field is RNA = activity - Colwellia is more ACTIVE at lower phaeo sites?
Visualize %Paraglaciecola/Colwellia in each colony - Plot rel abund of all alteronmonadales genera for field colonies:
PSColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") & Name != "A30" &Year == 2021)
ps_colony_RA<- transform_sample_counts(PSColony, function(OTU) 100* OTU/sum(OTU))
ps_colony_RA <- subset_taxa(ps_colony_RA, Order == " o__Alteromonadales")
ps_colony_RA_glomG<- tax_glom(ps_colony_RA, 'Genus')
ps_colony_RA_glomG_F = filter_taxa(ps_colony_RA_glomG , function(x) sum(x) > 1, TRUE)
mergemelt<- psmelt(ps_colony_RA_glomG_F)
mergemelt$Genus<-str_sub(mergemelt$Genus, 5, str_length(mergemelt$Genus))
#mergemelt$Genus[mergemelt$Abundance < 1] <- "z< 1% abund."
spatial_plot <- ggplot(data=mergemelt, aes(x=Name, y=Abundance, fill=Genus)) + facet_grid(~ Station, scales = "free")
spatial_plot + geom_bar(aes(), stat="identity", position="stack") +
theme_classic() + theme(legend.position="bottom") + scale_fill_manual(values =c( "#262626", "#d0cef3","#e99eb2", "#ffc100", "#fafff1" )) + guides(fill=guide_legend(nrow=1)) + theme(legend.title =element_blank())
ggsave(“Alteromonas_barplot.pdf”, width = 7, height = 4) Supplemental Figure 6
PSColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") & Name != "A30" &Year == 2021)
ps_colony_RA<- transform_sample_counts(PSColony, function(OTU) 100* OTU/sum(OTU))
ps_colony_RA <- subset_taxa(ps_colony_RA, Order == " o__Oceanospirillales")
ps_colony_RA_glomF<- tax_glom(ps_colony_RA, 'Family')
ps_colony_RA_glomG_F = filter_taxa(ps_colony_RA_glomF , function(x) sum(x) > 1, TRUE)
mergemelt<- psmelt(ps_colony_RA_glomG_F)
mergemelt$Family<-str_sub(mergemelt$Family, 5, str_length(mergemelt$Family))
#mergemelt$Genus[mergemelt$Abundance < 1] <- "z< 1% abund."
spatial_plot <- ggplot(data=mergemelt, aes(x=Name, y=Abundance, fill=Family)) + facet_grid(~ Station, scales = "free")
spatial_plot + geom_bar(aes(), stat="identity", position="stack") +
theme_classic() + theme(legend.position="bottom") + scale_fill_manual(values =c( "#262626", "#d0cef3","#e99eb2", "#ffc100", "#fafff1" )) + guides(fill=guide_legend(nrow=1)) + theme(legend.title =element_blank())
ggsave(“Oceano_barplot.pdf”, width = 7, height = 4) Supplemental Figure 9A
PSColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") & Name != "A30" &Year == 2021)
ps_colony_RA<- transform_sample_counts(PSColony, function(OTU) 100* OTU/sum(OTU))
ps_colony_RA <- subset_taxa(ps_colony_RA, Order == " o__Oceanospirillales")
#ps_colony_RA_glomF<- tax_glom(ps_colony_RA, 'Family')
ps_colony_RA_F = filter_taxa(ps_colony_RA , function(x) sum(x) > 1, TRUE)
mergemelt<- psmelt(ps_colony_RA_F)
mergemelt$Family<-str_sub(mergemelt$Family, 5, str_length(mergemelt$Family))
#mergemelt$Genus[mergemelt$Abundance < 1] <- "z< 1% abund."
mergemelt <- mergemelt %>% filter(OTU %in% c("90fd832464c5fbc417533484304f566f", "3f64882f4221742c1cfa89d89cfe7975"))
spatial_plot <- ggplot(data=mergemelt, aes(x=Name, y=Abundance, fill=OTU)) + facet_grid(~ Station, scales = "free")
spatial_plot + geom_bar(aes(), stat="identity", position="stack") +
theme_classic() + theme(legend.position="bottom") + scale_fill_manual(values =c( "hotpink" ,"pink")) + guides(fill=guide_legend(nrow=1)) + theme(legend.title =element_blank())
ggsave(“Oceano_barplot_ASVs.pdf”, width = 7, height = 4) Supplemental Figure 9B
col4corr_oceano <- melted_Colony4Corr %>% filter(Order == " o__Oceanospirillales")
oceanoASVs <- unique(col4corr_oceano$OTU)
oceano_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% oceanoASVs)
#pivot wider
wider_oceano<- oceano_Ant4corrr%>% select(Name, OTU, Abundance) %>% pivot_wider( names_from = c("OTU"), values_from = c("Abundance"))
#join with Phaeo abundance
longer<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(wider_oceano) %>% pivot_longer(!c(Name, " Prymnesiophyceae"), names_to = "bact_ASV", values_to = "rel_abund")
## Joining with `by = join_by(Name)`
names(longer)[2] <- "Prymnesiophyceae"
longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% ggplot(aes(x=Prymnesiophyceae, y=rel_abund, color = source)) + geom_point() +facet_wrap(~bact_ASV)
mostly driven by 90fd … and d536 90fd832464c5fbc417533484304f566f d536522dff81586b788ee4acdbadb881
The colony-associated Oceanospirillales was dominated by two ASVs 90fd832464c5fbc417533484304f566f (uncultured) & 3f64882f4221742c1cfa89d89cfe7975 (Profundimonas sp., family: Nitrincolaceae)
Environmentally abundant = 90fd832464c5fbc417533484304f566f (uncultured, family: Nitrincolaceae) - most abundant by a long shot (~5-25% of enviro samples) &
d536522dff81586b788ee4acdbadb881 (0-5.5ish of enviro samples) (uncultured, family: Nitrincolaceae)
col4corr_oceano <- melted_Colony4Corr %>% filter(Order == " o__Oceanospirillales") %>% filter(OTU %in% c("90fd832464c5fbc417533484304f566f","3f64882f4221742c1cfa89d89cfe7975","d536522dff81586b788ee4acdbadb881" ) )
oceanoASVs <- unique(col4corr_oceano$OTU)
oceano_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% oceanoASVs)
#pivot wider
wider_oceano<- oceano_Ant4corrr%>% select(Name, OTU, Abundance) %>% pivot_wider( names_from = c("OTU"), values_from = c("Abundance"))
#join with Phaeo abundance
longer<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(wider_oceano) %>% pivot_longer(!c(Name, " Prymnesiophyceae"), names_to = "bact_ASV", values_to = "rel_abund")
## Joining with `by = join_by(Name)`
names(longer)[2] <- "Prymnesiophyceae"
longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% ggplot(aes(x=Prymnesiophyceae, y=rel_abund, fill = source)) +facet_wrap(~bact_ASV) + geom_point(size = 3, shape = 21) + theme_test() + ggtitle("Oceanospirillales") + geom_smooth(method = "lm", se = FALSE, aes(color = source)) + scale_fill_manual(values = c("#00A4CC", "#F95700" )) + scale_color_manual(values = c("#00A4CC", "#F95700" ))
## `geom_smooth()` using formula = 'y ~ x'
ggsave(“oceano_phaeo_correlations.pdf”, width =8.5, height = 3.5) Supplemental Figure 10
filter(OTU %in% c(“90fd832464c5fbc417533484304f566f”,“3f64882f4221742c1cfa89d89cfe7975”,“d536522dff81586b788ee4acdbadb881” ) )
CTD
data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% filter(source == "CTD") %>% filter(bact_ASV %in% c("3f64882f4221742c1cfa89d89cfe7975"))
summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))
##
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
##
## Residuals:
## 1 2 3 4 5
## -0.03678 -0.07714 0.27464 -0.06177 -0.09894
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.080436 0.149688 -0.537 0.6283
## Prymnesiophyceae 0.016111 0.004152 3.881 0.0303 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1792 on 3 degrees of freedom
## Multiple R-squared: 0.8339, Adjusted R-squared: 0.7785
## F-statistic: 15.06 on 1 and 3 DF, p-value: 0.03031
FT
data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% filter(source == "FT") %>% filter(bact_ASV %in% c("3f64882f4221742c1cfa89d89cfe7975"))
summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))
##
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.39245 -0.19911 -0.02003 0.12273 0.64741
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.496254 0.225084 -2.205 0.05202 .
## Prymnesiophyceae 0.022874 0.005988 3.820 0.00337 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2797 on 10 degrees of freedom
## Multiple R-squared: 0.5934, Adjusted R-squared: 0.5527
## F-statistic: 14.59 on 1 and 10 DF, p-value: 0.003374
CTD
data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% filter(source == "CTD") %>% filter(bact_ASV %in% c("90fd832464c5fbc417533484304f566f"))
summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))
##
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
##
## Residuals:
## 1 2 3 4 5
## 1.1352 -2.4046 4.2002 -3.0602 0.1293
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.3459 2.8160 2.609 0.0798 .
## Prymnesiophyceae 0.2891 0.0781 3.701 0.0343 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.371 on 3 degrees of freedom
## Multiple R-squared: 0.8203, Adjusted R-squared: 0.7605
## F-statistic: 13.7 on 1 and 3 DF, p-value: 0.03425
FT
data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% filter(source == "FT") %>% filter(bact_ASV %in% c("90fd832464c5fbc417533484304f566f"))
summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))
##
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.1933 -1.6592 0.6824 2.1373 3.4966
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.17810 2.48608 2.083 0.0639 .
## Prymnesiophyceae 0.19804 0.06614 2.994 0.0135 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.089 on 10 degrees of freedom
## Multiple R-squared: 0.4727, Adjusted R-squared: 0.42
## F-statistic: 8.966 on 1 and 10 DF, p-value: 0.01347
CTD
data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% filter(source == "CTD") %>% filter(bact_ASV %in% c("d536522dff81586b788ee4acdbadb881"))
summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))
##
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
##
## Residuals:
## 1 2 3 4 5
## -0.5064 -0.4827 0.6102 2.7111 -2.3323
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.005792 1.781907 0.003 0.998
## Prymnesiophyceae 0.075820 0.049420 1.534 0.223
##
## Residual standard error: 2.133 on 3 degrees of freedom
## Multiple R-squared: 0.4396, Adjusted R-squared: 0.2529
## F-statistic: 2.354 on 1 and 3 DF, p-value: 0.2225
FT
data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% filter(source == "FT") %>% filter(bact_ASV %in% c("d536522dff81586b788ee4acdbadb881"))
summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))
##
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.6077 -0.3321 -0.1682 0.3708 2.4802
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.72098 0.85415 -0.844 0.4184
## Prymnesiophyceae 0.06464 0.02272 2.845 0.0174 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.061 on 10 degrees of freedom
## Multiple R-squared: 0.4473, Adjusted R-squared: 0.392
## F-statistic: 8.093 on 1 and 10 DF, p-value: 0.0174
sum_up
col4corr_oceano <- melted_Colony4Corr %>% filter(Order == " o__Oceanospirillales")
oceanoASVs <- unique(col4corr_oceano$OTU)
oceano_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% oceanoASVs) %>% group_by(Name) %>% summarize(total_abund = sum(Abundance))
#pivot wider
#wider_oceano<- oceano_Ant4corrr%>% select(Name, OTU, Abundance) %>% pivot_wider( names_from = c("OTU"), values_from = c("Abundance"))
#join with Phaeo abundance
longer<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(oceano_Ant4corrr) %>% pivot_longer(!c(Name, " Prymnesiophyceae"), names_to = "bact_ASV", values_to = "rel_abund")
## Joining with `by = join_by(Name)`
names(longer)[2] <- "Prymnesiophyceae"
Oceanospirillales<- longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% ggplot(aes(x=Prymnesiophyceae, y=rel_abund, fill = source, shape = source)) + geom_point(size = 3) + scale_shape_manual(values = c(21, 24)) +theme_test() + ggtitle("Oceanospirillales") + geom_smooth(method = "lm", se = FALSE, aes(color = source)) + scale_fill_manual(values = c("lightgrey", "black" )) + scale_color_manual(values = c("darkgrey", "black" ))
Oceanospirillales
## `geom_smooth()` using formula = 'y ~ x'
CTD
data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% filter(source == "CTD")
summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))
##
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
##
## Residuals:
## 1 2 3 4 5
## 0.6289 -2.9977 5.0767 -0.4163 -2.2916
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.30982 3.07224 2.379 0.0977 .
## Prymnesiophyceae 0.38046 0.08521 4.465 0.0209 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.678 on 3 degrees of freedom
## Multiple R-squared: 0.8692, Adjusted R-squared: 0.8256
## F-statistic: 19.94 on 1 and 3 DF, p-value: 0.02092
FT
data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% filter(source == "FT")
summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))
##
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.5743 -1.1521 0.0937 0.6093 5.1538
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.84218 2.59413 3.794 0.00352 **
## Prymnesiophyceae 0.16548 0.06901 2.398 0.03746 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.223 on 10 degrees of freedom
## Multiple R-squared: 0.3651, Adjusted R-squared: 0.3016
## F-statistic: 5.749 on 1 and 10 DF, p-value: 0.03746
Paraglaciecola + Colwellia + Oceanospirillales + plot_layout(guides = "collect")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
ggsave(“phaeo_bact_correlations.pdf”, width =8.5, height = 3.5)
Figure 3C
col4corr_sphing <- melted_Colony4Corr %>% filter(Order == " o__Sphingomonadales")
sphingASVs <- unique(col4corr_sphing$OTU)
sphing_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% sphingASVs)
#pivot wider
wider_sphing<- sphing_Ant4corrr%>% select(Name, OTU, Abundance) %>% pivot_wider( names_from = c("OTU"), values_from = c("Abundance"))
#join with Phaeo abundance
longer<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(wider_sphing) %>% pivot_longer(!c(Name, " Prymnesiophyceae"), names_to = "bact_ASV", values_to = "rel_abund")
## Joining with `by = join_by(Name)`
names(longer)[2] <- "Prymnesiophyceae"
longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% ggplot(aes(x=Prymnesiophyceae, y=rel_abund, color = source)) + geom_point() +facet_wrap(~bact_ASV)
The colony-associated Sphingomonas ASVs are not detected in seawater samples…
PSColony<- subset_samples(ps_nochloro, Dataset %in% c("Antarctica") & Name != "A30" & Year !=2023)
ps_colony_RA<- transform_sample_counts(PSColony, function(OTU) 100* OTU/sum(OTU))
ps_colony_RA_glomO<- tax_glom(ps_colony_RA, 'Order')
mergemelt<- psmelt(ps_colony_RA_glomO)
mergemelt$Order<-str_sub(mergemelt$Order, 5, str_length(mergemelt$Order))
mergemelt <- mergemelt %>% filter(Order == "Sphingobacteriales")
sphingo_sums <- mergemelt %>% group_by(Sample, Name, Station) %>% summarize(total = sum(Abundance)) %>% filter(total > 0 )
## `summarise()` has grouped output by 'Sample', 'Name'. You can override using
## the `.groups` argument.
sphingo_sums
## # A tibble: 6 × 4
## # Groups: Sample, Name [6]
## Sample Name Station total
## <chr> <chr> <chr> <dbl>
## 1 Ant1 FT-10 600 0.00374
## 2 Ant12 FT-20 400-line 0.0383
## 3 Ant17 CTD-26 600-line 0.00819
## 4 Ant2 FT-21 400-line 0.0160
## 5 Ant6 FT-9 600-line 0.00795
## 6 Ant8 FT-13 600-500tran 0.0195
max(sphingo_sums$total)
## [1] 0.03833988
Sphingomonas as a group is not detected or extremely low abundance in all environmental samples…
col4corr_oceano <- melted_Colony4Corr %>% filter(Order == " o__Cellvibrionales")
oceanoASVs <- unique(col4corr_oceano$OTU)
oceano_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% oceanoASVs)
#pivot wider
wider_oceano<- oceano_Ant4corrr%>% select(Name, OTU, Abundance) %>% pivot_wider( names_from = c("OTU"), values_from = c("Abundance"))
#join with Phaeo abundance
longer<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(wider_oceano) %>% pivot_longer(!c(Name, " Prymnesiophyceae"), names_to = "bact_ASV", values_to = "rel_abund")
## Joining with `by = join_by(Name)`
names(longer)[2] <- "Prymnesiophyceae"
longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% ggplot(aes(x=Prymnesiophyceae, y=rel_abund, color = source)) + geom_point() +facet_wrap(~bact_ASV)
bc13a8f6afa703399c40d5879cc68221
oceano_Ant4corrr <- melted_Ant4corrr %>% filter(OTU == "bc13a8f6afa703399c40d5879cc68221")
#pivot wider
wider_oceano<- oceano_Ant4corrr%>% select(Name, OTU, Abundance) %>% pivot_wider( names_from = c("OTU"), values_from = c("Abundance"))
#join with Phaeo abundance
longer<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(wider_oceano) %>% pivot_longer(!c(Name, " Prymnesiophyceae"), names_to = "bact_ASV", values_to = "rel_abund")
## Joining with `by = join_by(Name)`
names(longer)[2] <- "Prymnesiophyceae"
cv<- longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% ggplot(aes(x=Prymnesiophyceae, y=rel_abund, fill = source)) + geom_point(size = 3, shape = 21) + theme_test() + ggtitle("SAR92") + geom_smooth(method = "lm", se = FALSE, aes(color = source)) + scale_fill_manual(values = c("#00A4CC", "#F95700" )) + scale_color_manual(values = c("#00A4CC", "#F95700" ))
cv
## `geom_smooth()` using formula = 'y ~ x'
min(longer$rel_abund)
## [1] 0.6245234
max(longer$rel_abund)
## [1] 8.160423
mean(longer$rel_abund)
## [1] 3.495746
CTD
data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% filter(source == "CTD")
summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))
##
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
##
## Residuals:
## 1 2 3 4 5
## -1.5689 0.2756 1.2457 2.9162 -2.8686
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.20253 2.20070 1.001 0.391
## Prymnesiophyceae 0.06870 0.06104 1.126 0.342
##
## Residual standard error: 2.635 on 3 degrees of freedom
## Multiple R-squared: 0.2969, Adjusted R-squared: 0.06256
## F-statistic: 1.267 on 1 and 3 DF, p-value: 0.3423
FT
data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% filter(source == "FT")
summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))
##
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4769 -0.4509 -0.1104 0.3728 1.8167
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.61598 0.79262 0.777 0.45506
## Prymnesiophyceae 0.07258 0.02109 3.442 0.00631 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9849 on 10 degrees of freedom
## Multiple R-squared: 0.5423, Adjusted R-squared: 0.4966
## F-statistic: 11.85 on 1 and 10 DF, p-value: 0.006307
Visualize Overall abundance of Cellvibrionales in environmental samples:
PSColony<- subset_samples(ps_nochloro, Dataset %in% c("Antarctica") & Name != "A30" & Year !=2023)
ps_colony_RA<- transform_sample_counts(PSColony, function(OTU) 100* OTU/sum(OTU))
ps_colony_RA_glomO<- tax_glom(ps_colony_RA, 'Order')
ps_colony_RA_glomO_F = filter_taxa(ps_colony_RA_glomO , function(x) sum(x) > 1, TRUE)
mergemelt<- psmelt(ps_colony_RA_glomO_F)
mergemelt$Order<-str_sub(mergemelt$Order, 5, str_length(mergemelt$Order))
mergemelt$Order[mergemelt$Abundance < 1] <- "z< 1% abund."
spatial_plot <- ggplot(data=mergemelt, aes(x=Sample, y=Abundance, fill=Order))
spatial_plot + geom_bar(aes(), stat="identity", position="stack") +
theme_classic() + theme(legend.position="bottom") + scale_fill_manual(values =c( rep(colors30, 2))) + guides(fill=guide_legend(nrow=5)) + theme(legend.title =element_blank())
Visualize overall abundance of SAR92 in environmental samples:
PSenviro<- subset_samples(ps_nochloro, Dataset %in% c("Antarctica") & Name != "A30" & Year !=2023)
ps_enviro_RA<- transform_sample_counts(PSenviro, function(OTU) 100* OTU/sum(OTU))
ps_enviro_oceano <- subset_taxa(ps_enviro_RA, Order == " o__Cellvibrionales")
ps_enviro_RA_glomG<- tax_glom(ps_enviro_oceano , 'Genus')
ps_enviro_RA_glomG = filter_taxa(ps_enviro_RA_glomG , function(x) sum(x) > 0.1, TRUE)
cv_mergemelt<- psmelt(ps_enviro_RA_glomG)
cv_mergemelt$Order<-str_sub(cv_mergemelt$Order, 5, str_length(cv_mergemelt$Order))
#mergemelt$Order[mergemelt$Abundance < 1] <- "z< 1% abund."
spatial_plot <- ggplot(data=cv_mergemelt, aes(x=Name, y=Abundance, fill=Genus))
spatial_plot + geom_bar(aes(), stat="identity", position="stack") +
theme_classic() + theme(legend.position="bottom") + scale_fill_manual(values =c( rep(colors30, 2))) + guides(fill=guide_legend(nrow=5)) + theme(legend.title =element_blank())
Almost all Cellvibrionales seqs in environmental samples are SAR92
sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.6.3
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] scatterpie_0.2.1 sf_1.0-15 rnaturalearthdata_1.0.0
## [4] rnaturalearth_1.0.1 ggspatial_1.1.9 splitstackshape_1.4.8
## [7] biomformat_1.30.0 ape_5.7-1 reshape2_1.4.4
## [10] scales_1.3.0 patchwork_1.2.0 stringr_1.5.1
## [13] vegan_2.6-4 permute_0.9-7 magrittr_2.0.3
## [16] viridis_0.6.5 viridisLite_0.4.2 gridExtra_2.3
## [19] dplyr_1.1.4 tidyr_1.3.1 ggplot2_3.5.0
## [22] phyloseq_1.46.0 CoDaSeq_0.99.7 car_3.1-2
## [25] carData_3.0-5 ALDEx2_1.34.0 latticeExtra_0.6-30
## [28] lattice_0.22-5 zCompositions_1.5.0-1 truncnorm_1.0-9
## [31] NADA_1.6-1.1 survival_3.5-7 MASS_7.3-60.0.1
## [34] qiime2R_0.99.6
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 rstudioapi_0.15.0
## [3] jsonlite_1.8.8 farver_2.1.1
## [5] rmarkdown_2.25 zlibbioc_1.48.0
## [7] vctrs_0.6.5 multtest_2.58.0
## [9] RCurl_1.98-1.14 terra_1.7-71
## [11] base64enc_0.1-3 htmltools_0.5.7
## [13] S4Arrays_1.2.0 Rhdf5lib_1.24.2
## [15] SparseArray_1.2.4 Formula_1.2-5
## [17] rhdf5_2.46.1 sass_0.4.8
## [19] KernSmooth_2.23-22 bslib_0.6.1
## [21] htmlwidgets_1.6.4 plyr_1.8.9
## [23] cachem_1.0.8 igraph_2.0.1.1
## [25] lifecycle_1.0.4 iterators_1.0.14
## [27] pkgconfig_2.0.3 Matrix_1.6-5
## [29] R6_2.5.1 fastmap_1.1.1
## [31] GenomeInfoDbData_1.2.11 MatrixGenerics_1.14.0
## [33] digest_0.6.34 colorspace_2.1-0
## [35] S4Vectors_0.40.2 Hmisc_5.1-1
## [37] GenomicRanges_1.54.1 labeling_0.4.3
## [39] RcppZiggurat_0.1.6 fansi_1.0.6
## [41] polyclip_1.10-6 httr_1.4.7
## [43] abind_1.4-5 mgcv_1.9-1
## [45] compiler_4.3.2 proxy_0.4-27
## [47] withr_3.0.0 htmlTable_2.4.2
## [49] backports_1.4.1 BiocParallel_1.36.0
## [51] DBI_1.2.0 highr_0.10
## [53] ggforce_0.4.1 DelayedArray_0.28.0
## [55] classInt_0.4-10 units_0.8-5
## [57] tools_4.3.2 foreign_0.8-86
## [59] nnet_7.3-19 glue_1.7.0
## [61] quadprog_1.5-8 nlme_3.1-164
## [63] rhdf5filters_1.14.1 grid_4.3.2
## [65] checkmate_2.3.1 gridBase_0.4-7
## [67] cluster_2.1.6 ade4_1.7-22
## [69] generics_0.1.3 gtable_0.3.4
## [71] class_7.3-22 data.table_1.15.0
## [73] utf8_1.2.4 XVector_0.42.0
## [75] BiocGenerics_0.48.1 foreach_1.5.2
## [77] pillar_1.9.0 splines_4.3.2
## [79] tweenr_2.0.2 deldir_2.0-2
## [81] directlabels_2024.1.21 tidyselect_1.2.0
## [83] Biostrings_2.70.2 knitr_1.45
## [85] IRanges_2.36.0 SummarizedExperiment_1.32.0
## [87] stats4_4.3.2 xfun_0.42
## [89] Biobase_2.62.0 matrixStats_1.2.0
## [91] DT_0.31 stringi_1.8.3
## [93] ggfun_0.1.4 yaml_2.3.8
## [95] evaluate_0.23 codetools_0.2-19
## [97] interp_1.1-6 tibble_3.2.1
## [99] cli_3.6.2 RcppParallel_5.1.7
## [101] rpart_4.1.23 munsell_0.5.0
## [103] jquerylib_0.1.4 Rcpp_1.0.12
## [105] GenomeInfoDb_1.38.6 png_0.1-8
## [107] Rfast_2.1.0 jpeg_0.1-10
## [109] bitops_1.0-7 e1071_1.7-14
## [111] purrr_1.0.2 crayon_1.5.2
## [113] rlang_1.1.3