Setup

import packages and functions, define colors

library(qiime2R)
library(CoDaSeq)
library(phyloseq)
library(ggplot2)
library(tidyr)
library("dplyr")
library("gridExtra")
#library(RCircos)
#library(circlize)
library(viridis)
#library(UpSetR)
#library(knitr)
library(tidyr)
library(magrittr)
library(vegan)
library(stringr)
library(patchwork)

`%ni%` = Negate(`%in%`)

source("~/Desktop/PhaeoPhycosphere/functions/legendplot.R")

colors=c('#e9e9e9','#C14450','#f0b08f','#c2aba6','#60555f','#3c6481','#9fd6e6','#256F64','#63a375')

red<- c("#EB8B8E","#FBD5E2","#E7B6AF","#AC6873", "#D82354")
orange <- c("#FAAA6D","#FECF92")
yellow <- c("#FFC317","#F7F4B7", "#CC9C3C")
green <- c("#16866F","#1E3F1C","#99A339","#516A65","#8BC89F")
blue <- c("#005694","#B7E1DD","#66879E","#1BAAE2","#5FC8D8")
purple <- c("#E7D7CE","#A699A9","#434582","#81347D", "#B5218E")

colors30 <- c(blue, purple, red, yellow, orange, green, "black")

Import Additional functions:

scripts <- c("graphical_methods.R",
             "tree_methods.R",
             "plot_merged_trees.R",
             "specificity_methods.R",
             "ternary_plot.R",
             "richness.R",
             "edgePCA.R",
             "copy_number_correction.R",
             "import_frogs.R",
             "prevalence.R",
             "compute_niche.R")
urls <- paste0("https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/R/", scripts)

for (url in urls) {
  source(url)
}

import data

MERGED run analysis all 16S data from two runs (includes Antarctica single colonies (original and after 1 yr) and Antarctica environmental samples)

Import and format feature table (ASV counts in each sample) and meta date (sample names, )

ps<-qza_to_phyloseq(features="MERGEDtable99.qza")

meta<- read.csv("MERGEDsampleMetaData.txt", sep = "\t", header = TRUE)
row.names(meta)<- meta$SampleID

meta$Year <- as.character(meta$Year)
meta$Station_Year <- paste(meta$Station, meta$Year)

meta<- meta %>% separate(Name, sep="-", into = c("type", "number
                                                "), remove = FALSE)

META <- sample_data(meta)

Import and format SILVA taxonomy:

SILVA 138-99 taxonomy trimmed to match the primers used on samples

#taxonomy <- read.csv("MERGED_taxonomy.csv", stringsAsFactors = FALSE)
taxonomy <- read.csv("silva_primered_MERGED_taxonomy.csv", stringsAsFactors = FALSE)

names(taxonomy) <- c("row", "tax", "Confidence") #change the headers (column names)
row.names(taxonomy) <-taxonomy[[1]] #move the feature ID column to become the row names
taxonomy <- taxonomy[,(-1)] #delete the feature ID  column 
taxonomy <-  separate(taxonomy, tax, c("Domain","Phylum", "Class", "Order", "Family", "Genus", "Species", "D7", "D8", "D9", "D10", "D11", "D12", "D13", "D14"), sep = ";", fill = "right")
taxonomy <- taxonomy[,c(1:7)]

taxonomy$D0 <- with(taxonomy, ifelse(Order == " o__Chloroplast", "Chloroplast", "Bacteria"))

col_order<- c("D0", "Domain","Phylum", "Class", "Order", "Family", "Genus", "Species" )
taxonomy<- taxonomy[, col_order]

taxmat <- as.matrix(taxonomy)
TAX = tax_table(taxmat)

Import and format PhytoREF taxonomy:

PhytoREF trained with same primers

phytoR <- read.table("PhytoRef_tax_str.txt", header = FALSE)

library(splitstackshape)
phytoR_split<- cSplit(phytoR, "V1", "|")

phytoR_split$taxa <- paste(phytoR_split$V1_03, phytoR_split$V1_04, phytoR_split$V1_05, phytoR_split$V1_06 , phytoR_split$V1_07, phytoR_split$V1_08, phytoR_split$V1_09, phytoR_split$V1_10, phytoR_split$V1_11, phytoR_split$V1_12, sep = "; ")

phytoR_split$taxonomy <- paste(phytoR_split$taxa, phytoR_split$V2, sep = " ")

phytoR <- phytoR_split  %>%  dplyr::select(V1_01, taxonomy)

write.table(phytoR, "phytoRef_tax_final.txt", quote = FALSE, sep = "\t", col.names = FALSE, row.names = FALSE)
PRtaxonomy <- read.csv("phytoRef_MERGED_taxonomy.csv", stringsAsFactors = FALSE)
names(PRtaxonomy) <- c("row", "tax", "Confidence") #change the headers (column names)
row.names(PRtaxonomy) <-PRtaxonomy[[1]] #move the feature ID column to become the row names
PRtaxonomy <- PRtaxonomy[,(-1)] #delete the feature ID  column 
PRtaxonomy <-  separate(PRtaxonomy, tax, c("Domain","Phylum", "Class", "Order", "D5", "D6", "D7", "D8", "D9", "D10", "D11", "D12", "D13"), sep = ";", fill = "right")

#taxonomy <- taxonomy[,c(1:7)]
#col_order<- c("D0", "Domain","Phylum", "Class", "Order", "Family", "Genus", "Species" )
#taxonomy<- taxonomy[, col_order]

PRtaxmat <- as.matrix(PRtaxonomy)
PRTAX = tax_table(PRtaxmat)

Make phyloseq object:

ps = merge_phyloseq(ps, TAX, META) 

Chloroplast sequences

Plot relative abundance as stacked bar plot (all samples including chloroplast sequences) to visualize the relative abundance of chloroplast and bacterial sequences in each sample:

ps<- subset_taxa(ps, D0 %in% c("Bacteria", "Chloroplast") )
ps <- subset_samples(ps, Dataset %in% c("PantCol", "Antarctica"))
psra<- transform_sample_counts(ps, function(OTU) 100* OTU/sum(OTU))
glomD1<- tax_glom(psra, 'D0')


taxabarplot<-plot_bar(glomD1, x= "Name", fill = "D0") +  scale_y_continuous(expand = c(0, 0)) + ggtitle("")  + theme(legend.title=element_blank()) + geom_bar(aes(fill=D0), stat="identity", position="stack", width =0.9) +theme_classic() + theme(text = element_text(size=14))+theme(axis.text.x = element_text(angle = 90)) + xlab("Sample") + ylab("Relative Abundance (%)") + theme(text = element_text(size=14)) + scale_fill_manual(values=c("lightgrey", "#7BB03B"), name = "") + xlab("") + facet_grid(~Dataset + Year, scales = "free")

taxabarplot

Supplemental Figure 2 ggsave(“chloroplast_barplot.pdf”, width = 10, height = 5)

Get summary statistics about the amount of chloroplast sequences in single-colony samples (these are all Phaeocystis/host and not data that are particularly useful)”

pantcol_chloro_stats <- subset_samples(glomD1, Dataset == "PantCol")  
chloro_stat_melt <- psmelt(pantcol_chloro_stats) %>% filter(D0 == "Chloroplast")

min(chloro_stat_melt$Abundance)
## [1] 52.67964
mean(chloro_stat_melt$Abundance)
## [1] 72.86961
max(chloro_stat_melt$Abundance)
## [1] 96.43276

Create a phyloseq object with only chloroplast sequences and switch out taxonomy from SILVA to PhytoRef:

ps_chloro <- subset_taxa(ps, Order == " o__Chloroplast" )

ps_chlorotax <- merge_phyloseq(ps_chloro, PRTAX)

Bacterial Sequences

Create a phyloseq object with only bacterial sequences:

ps_nochloro <- subset_taxa(ps, Order != " o__Chloroplast" & Domain != "Unassigned")

Rarefaction Curves

ggrare(ps_nochloro, step = 1000, color = "Dataset", label = "Name", se = FALSE)
## rarefying sample A1-A2-4971
## rarefying sample A10-A14-4971
## rarefying sample A11-A15-4971
## rarefying sample A12-A20-4971
## rarefying sample A2-A5-4971
## rarefying sample A3-A6-4971
## rarefying sample A4-A8-4971
## rarefying sample A5-A9-4971
## rarefying sample A6-A10-4971
## rarefying sample A7-A11-4971
## rarefying sample A8-A12-4971
## rarefying sample A9-A13-4971
## rarefying sample B1-A21-4971
## rarefying sample B10-A30-4971
## rarefying sample B11-A31-4971
## rarefying sample B12-A32-4971
## rarefying sample B2-A22-4971
## rarefying sample B3-A23-4971
## rarefying sample B4-A24-4971
## rarefying sample B5-A25-4971
## rarefying sample B6-A26-4971
## rarefying sample B7-A27-4971
## rarefying sample B8-A28-4971
## rarefying sample B9-A29-4971
## rarefying sample C1-A33-4971
## rarefying sample C2-A34-4971
## rarefying sample C3-A35-4971
## rarefying sample Ant1
## rarefying sample Ant10
## rarefying sample Ant11
## rarefying sample Ant12
## rarefying sample Ant13
## rarefying sample Ant14
## rarefying sample Ant15
## rarefying sample Ant16
## rarefying sample Ant17
## rarefying sample Ant2
## rarefying sample Ant3
## rarefying sample Ant4
## rarefying sample Ant5
## rarefying sample Ant6
## rarefying sample Ant7
## rarefying sample Ant8
## rarefying sample Ant9
## rarefying sample col1
## rarefying sample col10
## rarefying sample col11
## rarefying sample col12
## rarefying sample col13
## rarefying sample col14
## rarefying sample col2
## rarefying sample col3
## rarefying sample col4
## rarefying sample col5
## rarefying sample col6
## rarefying sample col7
## rarefying sample col8
## rarefying sample col9

Rarefaction for Antarctica colonies:

PSColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") &  Name != "A30")

colony_bacteria_rare<- ggrare(PSColony, step = 1000, color = "Station_Year", se = FALSE) + theme_test() + scale_color_manual(values= c("#005694","#FFC317", "#5FC8D8" ))
## rarefying sample A1-A2-4971
## rarefying sample A10-A14-4971
## rarefying sample A11-A15-4971
## rarefying sample A12-A20-4971
## rarefying sample A2-A5-4971
## rarefying sample A3-A6-4971
## rarefying sample A4-A8-4971
## rarefying sample A5-A9-4971
## rarefying sample A6-A10-4971
## rarefying sample A7-A11-4971
## rarefying sample A8-A12-4971
## rarefying sample A9-A13-4971
## rarefying sample B1-A21-4971
## rarefying sample B11-A31-4971
## rarefying sample B12-A32-4971
## rarefying sample B2-A22-4971
## rarefying sample B3-A23-4971
## rarefying sample B4-A24-4971
## rarefying sample B5-A25-4971
## rarefying sample B6-A26-4971
## rarefying sample B7-A27-4971
## rarefying sample B8-A28-4971
## rarefying sample B9-A29-4971
## rarefying sample C1-A33-4971
## rarefying sample C2-A34-4971
## rarefying sample C3-A35-4971
## rarefying sample col1
## rarefying sample col10
## rarefying sample col11
## rarefying sample col12
## rarefying sample col13
## rarefying sample col14
## rarefying sample col2
## rarefying sample col3
## rarefying sample col4
## rarefying sample col5
## rarefying sample col6
## rarefying sample col7
## rarefying sample col8
## rarefying sample col9

colony_bacteria_rare

 ant_enviro<- subset_samples(ps_nochloro, Dataset %in% c("Antarctica") & Year == "2021")

field_bacteria_rare<-ggrare( ant_enviro, step = 1000, color = "type", se = FALSE) + theme_test() + scale_color_manual(values= c("#1E3F1C","#8BC89F"))
## rarefying sample Ant1
## rarefying sample Ant10
## rarefying sample Ant11
## rarefying sample Ant12
## rarefying sample Ant13
## rarefying sample Ant14
## rarefying sample Ant15
## rarefying sample Ant16
## rarefying sample Ant17
## rarefying sample Ant2
## rarefying sample Ant3
## rarefying sample Ant4
## rarefying sample Ant5
## rarefying sample Ant6
## rarefying sample Ant7
## rarefying sample Ant8
## rarefying sample Ant9

field_bacteria_rare

colony_bacteria_rare + field_bacteria_rare 

Supplemental Figure 3

Alpha diversity

ASV Richness

totalOTU <- data.frame(otu_table(ps_nochloro))
totalOTU$rowsu <- rowSums(totalOTU)
totalOTUnotzero <- totalOTU %>% filter(rowsu >1)
dim(totalOTUnotzero)
## [1] 1429   59
plugin <- ps_nochloro %>%
            estimate_richness(measures = "Observed") %$% Observed
Dataset <- ps_nochloro %>% sample_data %$% Dataset
Station <- ps_nochloro %>% sample_data %$% Station
Replicate <- ps_nochloro %>% sample_data %$% Rep
Month <- ps_nochloro %>% sample_data %$% Month
Year <- ps_nochloro %>% sample_data %$% Year

richness<- data.frame(plugin, Dataset, Station, Replicate, Month, Year )
names(richness) <- c("richness", "Dataset", "Station", "Replicate", "Month", "Year")


richness %>%group_by(Dataset, Year) %>% summarize(mean = mean(richness), min = min(richness), max = max(richness))
## # A tibble: 3 × 5
## # Groups:   Dataset [2]
##   Dataset    Year   mean   min   max
##   <chr>      <chr> <dbl> <dbl> <dbl>
## 1 Antarctica 2021  199.    140   317
## 2 PantCol    2021   44.1    28    67
## 3 PantCol    2023   24.9    18    31
RichPlot<- richness %>%  filter(Dataset == "PantCol" ) %>% ggplot( aes(x=Station, y=richness, fill = Year))+ facet_grid(~ Year + Station, scales = "free") +geom_boxplot() + theme_bw() + scale_fill_manual(values = c( "#75D1B7", "#256F64", "black", "grey")) + theme(text = element_text(size=14)) +ylab("Observed Richness") +ggtitle("")+ theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) +xlab("")#+ limy(0, 90)

RichPlot

ggsave(“richness_antarctica_colonies.pdf”, width = 5, height =3)

Figure 2A

Richness summary stats for manuescript: - need total unique ASVs for each sample type plus the mean and range per samples

PSColony_field<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") &  Name != "A30" &Year == 2021)

PSColony_field = filter_taxa(PSColony_field, function(x) sum(x) > 1, TRUE)

PSColony_field
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 241 taxa and 26 samples ]
## sample_data() Sample Data:       [ 26 samples by 13 sample variables ]
## tax_table()   Taxonomy Table:    [ 241 taxa by 8 taxonomic ranks ]

-> 241 unique ASVs across all field-collected colony microbiomes

plugin <- PSColony_field %>%
            estimate_richness(measures = "Observed") %$% Observed
Dataset <- PSColony_field %>% sample_data %$% Dataset
Station <- PSColony_field %>% sample_data %$% Station
Replicate <- PSColony_field %>% sample_data %$% Rep
Month <- PSColony_field %>% sample_data %$% Month
Year <- PSColony_field %>% sample_data %$% Year

richness<- data.frame(plugin, Dataset, Station, Replicate, Month, Year )
names(richness) <- c("richness", "Dataset", "Station", "Replicate", "Month", "Year")

min(richness$richness)
## [1] 28
mean(richness$richness)
## [1] 43.80769
max(richness$richness)
## [1] 67

^ min, mean, max per field-collected colony

Cultured-colonies:

PSColony_culture<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") &  Name != "A30" & Year == 2023)

PSColony_culture = filter_taxa(PSColony_culture, function(x) sum(x) > 1, TRUE)

PSColony_culture
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 74 taxa and 14 samples ]
## sample_data() Sample Data:       [ 14 samples by 13 sample variables ]
## tax_table()   Taxonomy Table:    [ 74 taxa by 8 taxonomic ranks ]

-> 74 ASVs total across all cultured colonies

plugin <- PSColony_culture %>%
            estimate_richness(measures = "Observed") %$% Observed
Dataset <- PSColony_culture %>% sample_data %$% Dataset
Station <- PSColony_culture %>% sample_data %$% Station
Replicate <- PSColony_culture %>% sample_data %$% Rep
Month <- PSColony_culture %>% sample_data %$% Month
Year <- PSColony_culture %>% sample_data %$% Year

richness<- data.frame(plugin, Dataset, Station, Replicate, Month, Year )
names(richness) <- c("richness", "Dataset", "Station", "Replicate", "Month", "Year")

min(richness$richness)
## [1] 18
mean(richness$richness)
## [1] 24.92857
max(richness$richness)
## [1] 31

^ min, mean, and max ASVs counts in cultured-colony microbiomes

Beta Diversity

Bar Plots

First look Barplot (at Order level):

ps_nochloro_RA<- transform_sample_counts(ps_nochloro, function(OTU) 100* OTU/sum(OTU))

ps_nochloro_RA_glomO<- tax_glom(ps_nochloro_RA, 'Order')

taxabarplot<-plot_bar(ps_nochloro_RA_glomO, x= "Name", fill = "Order") +  scale_y_continuous(expand = c(0, 0)) + ggtitle("")  + theme(legend.title=element_blank()) + geom_bar(aes(fill=Order), stat="identity", position="stack", width =0.9) +theme_classic() + theme(text = element_text(size=14))+theme(axis.text.x = element_text(angle = 90)) + xlab("Sample") + ylab("Relative Abundance (%)") + theme(text = element_text(size=14)) + scale_fill_manual(values= rep( colors, 30))+ xlab("") + facet_grid(~Dataset, scales= "free_x")

taxabarplot+ theme(legend.position="none")

Colonies barplot

PSColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") &  Name != "A30")

ps_colony_RA<- transform_sample_counts(PSColony, function(OTU) 100* OTU/sum(OTU))

ps_colony_RA_glomO<- tax_glom(ps_colony_RA, 'Order')

ps_colony_RA_glomO_F = filter_taxa(ps_colony_RA_glomO , function(x) sum(x) > 1, TRUE)

mergemelt<- psmelt(ps_colony_RA_glomO_F) 
mergemelt$Order<-str_sub(mergemelt$Order, 5, str_length(mergemelt$Order))
mergemelt$Order[mergemelt$Abundance < 1] <- "z< 1% abund."

spatial_plot <- ggplot(data=mergemelt, aes(x=Name, y=Abundance, fill=Order)) + facet_grid(~ Year + Station, scales = "free")

spatial_plot + geom_bar(aes(), stat="identity", position="stack") + 
  theme_classic() + theme(legend.position="bottom") + scale_fill_manual(values =c( rep(colors30, 2))) + guides(fill=guide_legend(nrow=5)) + theme(legend.title =element_blank())

ggsave(“antarctica_colonies.pdf”, width = 5, height =5)

Figure 2B

Ordinations

PSColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol")  &  Name != "A30")

OTU4clr<- data.frame(t(data.frame(otu_table(PSColony))))
row.names(OTU4clr) <- str_remove(row.names(OTU4clr), "X")
row.names(OTU4clr) <- gsub("\\.", "-", row.names(OTU4clr))
OTUs.clr <- codaSeq.clr(OTU4clr + 0.5, samples.by.row=TRUE)
OTU2 <- otu_table(as.matrix(OTUs.clr), taxa_are_rows = FALSE)

psCLR <- phyloseq(OTU2,TAX,META)

ordu = ordinate(psCLR, "PCoA", "euclidean")

p<-plot_ordination(psCLR, ordu)+theme_bw()  +  theme(text = element_text(size=14)) +  geom_hline(yintercept = 0, linetype = "dashed", color = "lightgrey") +  geom_vline(xintercept = 0, linetype = "dashed", color = "lightgrey") + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) +geom_point(aes(fill=Year, shape = Station), size =3)  + scale_shape_manual(values= c(21,24)) +scale_fill_manual(values=colors[-1]) +guides(fill = guide_legend(override.aes=list(shape=21)))
p

ggsave(“ant_col_PCoA.pdf”, width = 5, height =3)

Supplemental Figure 4

PERMANOVA by year (field-collected v. cultured colony microbiomes)

OTUs<- data.frame(otu_table(psCLR))
meta4year <- meta[row.names(meta) %in% row.names(OTUs),]


meta4year$Sample.Type <- factor(meta4year$Year , levels = c("2021", "2023"))

set.seed(1)
adonis2(vegdist(OTUs, method = "euclidean") ~ Sample.Type, data = meta4year)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = vegdist(OTUs, method = "euclidean") ~ Sample.Type, data = meta4year)
##             Df SumOfSqs      R2      F Pr(>F)    
## Sample.Type  1   3714.3 0.53776 44.209  0.001 ***
## Residual    38   3192.6 0.46224                  
## Total       39   6906.9 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Field-collected and cultured colony microbiomes are significantly different

PCoA of just field-collected colonies to check for differences between stations:

AntColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol")  &  Name != "A30" & Year == "2021")
OTU4clr<- data.frame(t(data.frame(otu_table(AntColony))))
row.names(OTU4clr) <- str_remove(row.names(OTU4clr), "X")
row.names(OTU4clr) <- gsub("\\.", "-", row.names(OTU4clr))
OTUs.clr <- codaSeq.clr(OTU4clr + 0.5, samples.by.row=TRUE)
OTU2 <- otu_table(as.matrix(OTUs.clr), taxa_are_rows = FALSE)

psCLR <- phyloseq(OTU2,TAX,META)

ordu = ordinate(psCLR, "PCoA", "euclidean")

p<-plot_ordination(psCLR, ordu)+theme_bw()  +  theme(text = element_text(size=14)) +  geom_hline(yintercept = 0, linetype = "dashed", color = "lightgrey") +  geom_vline(xintercept = 0, linetype = "dashed", color = "lightgrey") + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) +geom_point(aes(fill=Station, shape = Dataset), size =3)  + scale_shape_manual(values= c(21,21)) +scale_fill_manual(values=colors[-1]) +guides(fill = guide_legend(override.aes=list(shape=21)))
p

PERMANOVA by station :

OTUs<- data.frame(otu_table(psCLR))
meta4stat <- meta[row.names(meta) %in% row.names(OTUs),]


meta4stat$Sample.Type <- factor(meta4stat$Station , levels = c("400", "600"))

set.seed(1)
adonis2(vegdist(OTUs, method = "euclidean") ~ Sample.Type, data = meta4stat)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = vegdist(OTUs, method = "euclidean") ~ Sample.Type, data = meta4stat)
##             Df SumOfSqs      R2      F Pr(>F)  
## Sample.Type  1   158.22 0.05458 1.3855  0.058 .
## Residual    24  2740.78 0.94542                
## Total       25  2899.00 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Stations are not significantly different

Heat plots of ASVs by order

Prepare data frame:

AntColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") & Region == "Antarctica" &  Name != "A30")
nchF = filter_taxa(AntColony, function(x) sum(x) > 1, TRUE)
nchfffra<- transform_sample_counts(nchF, function(OTU) 100* OTU/sum(OTU))
a_ra <- psmelt(nchfffra)

Alteromonadales

altero<- subset_taxa(nchfffra, Order == " o__Alteromonadales")

sw_ra <- psmelt(altero)

sw_long<- sw_ra %>% dplyr::select(OTU, Name, Order, Family, Genus, Abundance, Station)

sw_long<- sw_long[rev(order(sw_long$Genus)),]

factord<- unique(sw_long$OTU)

sw_long$OTU <- factor(sw_long$OTU, levels = factord)

sw_long$Genus<-str_sub(sw_long$Genus, 5, str_length(sw_long$Genus))

heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
    geom_tile(aes( fill=Abundance)) +
    theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")  + 
    scale_size(name = "Relative\nabundance")+ facet_grid(~Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$Genus) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c()


heat_plot

Supplemental Figure 5

Oceanospirillales

oceano<- subset_taxa(nchfffra, Order == " o__Oceanospirillales")

sw_ra <- psmelt(oceano)

sw_long<- sw_ra %>% dplyr::select(OTU, Name, Order, Family, Genus, Abundance, Station, Year)

sw_long<- sw_long[rev(order(sw_long$Genus)),]

factord<- unique(sw_long$OTU)

sw_long$OTU <- factor(sw_long$OTU, levels = factord)

sw_long$Genus<-str_sub(sw_long$Genus, 5, str_length(sw_long$Genus))

heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
    geom_tile(aes( fill=Abundance)) +
    theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")  + 
    scale_size(name = "Relative\nabundance")+ facet_grid(~Year+Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$OTU) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c()


heat_plot

heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
    geom_tile(aes( fill=Abundance)) +
    theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")  + 
    scale_size(name = "Relative\nabundance")+ facet_grid(~Year+Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$Genus) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c()


heat_plot

ggsave(“oceano_heatplot.pdf”, width = 7, height = 5)

Supplemental FIgure 8

Rhodobacterales

rhodo<- subset_taxa(nchfffra, Order == " o__Rhodobacterales")

sw_ra <- psmelt(rhodo)

sw_long<- sw_ra %>% dplyr::select(OTU, Name, Order, Family, Genus, Abundance, Station, Year)

sw_long<- sw_long[rev(order(sw_long$Genus)),]

factord<- unique(sw_long$OTU)

sw_long$OTU <- factor(sw_long$OTU, levels = factord)

sw_long$Genus<-str_sub(sw_long$Genus, 5, str_length(sw_long$Genus))

heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
    geom_tile(aes( fill=Abundance)) +
    theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")  + 
    scale_size(name = "Relative\nabundance")+ facet_grid(~Year+Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$Genus) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c()


heat_plot

ggsave(“Rhodo_heatplot.pdf”, width = 7, height = 5) Supplemental Figure 7

sulfito <- sw_long %>% filter(Year == "2023") %>% filter(OTU == "8ee2658ac7dd8e0f20d396c3d3e1a539")

min(sulfito$Abundance)
## [1] 12.24997
mean(sulfito$Abundance)
## [1] 22.04735
max(sulfito$Abundance)
## [1] 30.79332

^ summary stats for sulfitobacter rel abunandance in cultured-colony microbiomes

Cellvibrionales

cellv<- subset_taxa(nchfffra, Order == " o__Cellvibrionales")

sw_ra <- psmelt(cellv)

sw_long<- sw_ra %>% dplyr::select(OTU, Name, Order, Family, Genus, Abundance, Station, Year)

sw_long<- sw_long[rev(order(sw_long$Genus)),]

factord<- unique(sw_long$OTU)

sw_long$OTU <- factor(sw_long$OTU, levels = factord)

sw_long$Genus<-str_sub(sw_long$Genus, 5, str_length(sw_long$Genus))

heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
    geom_tile(aes( fill=Abundance)) +
    theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")  + 
    scale_size(name = "Relative\nabundance")+ facet_grid(~Year+Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$Genus) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c()


heat_plot

heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
    geom_tile(aes( fill=Abundance)) +
    theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")  + 
    scale_size(name = "Relative\nabundance")+ facet_grid(~Year+Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$OTU) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c()


heat_plot

bc13a8f6afa703399c40d5879cc68221 = Sar92 clade ASV <- get summary stats

sw_long %>%  filter(OTU == "bc13a8f6afa703399c40d5879cc68221") %>%  summarize(mean = mean(Abundance), min = min(Abundance), max = max(Abundance))
##      mean min      max
## 1 2.78122   0 18.11352

Caulobacterialaes

caulo<- subset_taxa(nchfffra, Order == " o__Caulobacterales")

sw_ra <- psmelt(caulo)

sw_long<- sw_ra %>% dplyr::select(OTU, Name, Order, Family, Genus, Abundance, Station, Year)

sw_long<- sw_long[rev(order(sw_long$Genus)),]

factord<- unique(sw_long$OTU)

sw_long$OTU <- factor(sw_long$OTU, levels = factord)

sw_long$Genus<-str_sub(sw_long$Genus, 5, str_length(sw_long$Genus))

heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
    geom_tile(aes( fill=Abundance)) +
    theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")  + 
    scale_size(name = "Relative\nabundance")+ facet_grid(~Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$Genus) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c() +facet_grid(~Year+Station, scales = "free_x", space = "free")


heat_plot

algimonas <- sw_long %>% filter(Year =="2023") %>% filter(OTU %in% c("e7f391400f0a3e23b1c732d60e33016c", "715318dadbd602bb7863947527c67e3e")) %>% group_by(Name) %>% summarize(total = sum(Abundance))

min(algimonas$total)
## [1] 6.431744
mean(algimonas$total)
## [1] 18.20647
max(algimonas$total)
## [1] 29.7241
sd(algimonas$total)
## [1] 6.349521

^ summary stats for algimonas in cultured-colony microbiomes

Sphingomonadales

sphingo<- subset_taxa(nchfffra, Order == " o__Sphingomonadales")

sw_ra <- psmelt(sphingo)

sw_long<- sw_ra %>% dplyr::select(OTU, Name, Order, Family, Genus, Abundance, Station, Year)

sw_long<- sw_long[rev(order(sw_long$Genus)),]

factord<- unique(sw_long$OTU)

sw_long$OTU <- factor(sw_long$OTU, levels = factord)

sw_long$Genus<-str_sub(sw_long$Genus, 5, str_length(sw_long$Genus))

heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
    geom_tile(aes( fill=Abundance)) +
    theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")  + 
    scale_size(name = "Relative\nabundance")+ facet_grid(~Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$Genus) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c() +facet_grid(~Year+Station, scales = "free_x", space = "free")


heat_plot

heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
    geom_tile(aes( fill=Abundance)) +
    theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")  + 
    scale_size(name = "Relative\nabundance")+ facet_grid(~Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$OTU) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c() +facet_grid(~Year+Station, scales = "free_x", space = "free")


heat_plot

Blast results for the the Sphingomonas ASV abundant in field-collected colony microbiomes:

f1ea98d49baf3dd5dd4b2456017e5d48 GCAGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCAATGCCGCGTGAGTGATGAAGGCCTTAGGGTTGTAAAGCTCTTTTACCAGGGATGATAATGACAGTACCTGGAGAATAAGCTCCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGAGCTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGCTTTTCAAGTCAGGGGTGAAATCCCGGGGCTCAACCCCGGAACTGCCCTTGAAACTGGATGGCTAGAATACTGGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAAGAACACCAGTGGCGAAGGCGACTCACTGGACAGTTATTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATAC

Erythrobacter - previously found in marine sediment

Antarctica environmental mapping

Chloroplasts geoplot

OTU4chloro<- (data.frame(otu_table(ps_chloro)))
names(OTU4chloro) <- gsub("\\.", "-", names(OTU4chloro))

PRtaxdf <- PRtaxonomy %>% filter(row.names(PRtaxonomy) %in% row.names(OTU4chloro))

PRtaxmat <- as.matrix(PRtaxdf)
PRTAX = tax_table(PRtaxmat)

OTU2 <- otu_table(as.matrix(OTU4chloro), taxa_are_rows = TRUE)

ps_chlorotax  <- phyloseq(OTU2,PRTAX,META)

ps_chloro_RA<- transform_sample_counts(ps_chlorotax, function(OTU) 100* OTU/sum(OTU))

ps_chloro_RA_glomO<- tax_glom(ps_chloro_RA, 'Order')
pouch_col_chloro <- subset_samples(ps_chloro_RA_glomO, Dataset %in% c("Antarctica") & Year == "2021")
nchF = filter_taxa(pouch_col_chloro, function(x) sum(x) > 1, TRUE)
a_ra <- psmelt(nchF)

a4long<- a_ra %>%  dplyr::select(OTU, Sample, Name, Order, Class, Abundance, Station, Year)

a4long<- a4long[rev(order(a4long$Order)),]

factord<- unique(a4long$OTU)

a4long$OTU <- factor(a4long$OTU, levels = factord)
extraction_log<- read.csv("antRNA_extractions .csv")

FTextractions <- extraction_log %>% filter(SampleType =="FT")
CTDextractions <- extraction_log %>% filter(SampleType =="CTD")

ftlog <- read.csv("ftlog.csv")
ftlog$Name<- paste0("FT-", ftlog$Sample)
ctdlog <- read.csv("ctdlog.csv") %>% filter(Depth == 0)
ctdlog$Name <- paste0("CTD-", ctdlog$Sample_ID)

ftlog_notprocessed<- ftlog %>% select(Name, start_lat_degdec, start_long_degdec) %>% filter(Name %ni% a4long$Name)

names(ftlog_notprocessed)<- c("Name", "Lat", "Long")

ftlog_vols <- ftlog %>% select(Name, start_lat_degdec, start_long_degdec, vol_start, vol_end) %>% filter(Name %in% a4long$Name)

ftlog_vols$vol <- ftlog_vols$vol_end - ftlog_vols$vol_start

min(ftlog_vols$vol)
## [1] 5.3
mean(ftlog_vols$vol)
## [1] 11.13333
max(ftlog_vols$vol)
## [1] 18
ftlog<- ftlog %>% select(Name, start_lat_degdec, start_long_degdec) %>% filter(Name %in% a4long$Name)


names(ftlog)<- c("Name", "Lat", "Long")


ctdlog<- ctdlog %>% select(Name, Lat, Long) %>% filter(Name %in% a4long$Name)

sample_locs <- rbind(ftlog, ctdlog) 

a4long_locs<- left_join(a4long, sample_locs, by = "Name")

wider_a4long_locs<- a4long_locs %>% select(Name, Order, Abundance, Lat, Long) %>% pivot_wider( names_from = c("Order"), values_from = c("Abundance"))

^ volumes filtered per flow-through/underway sample

library(ggspatial)
library(rnaturalearth)
library(rnaturalearthdata)
## 
## Attaching package: 'rnaturalearthdata'
## The following object is masked from 'package:rnaturalearth':
## 
##     countries110
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(scatterpie)

scatter_colors <- c("#ef3c23",  "#cfe5cc", "#fac92c" ,"darkgrey", "#215F38", "#0181BB")

world <- ne_countries(scale = "medium", returnclass = "sf")

strainlist <- unique(a4long_locs$Order)
rna <- read.csv("ctdlog.csv")

FT_scatterpie_plot<-ggplot(data = world) + geom_sf(lwd = 0) + theme_void() + geom_scatterpie(data=wider_a4long_locs %>% filter(str_detect(Name, "FT")),aes(x=Long, y=Lat, group = Name, r=0.2), cols = strainlist, color=NA, alpha=.8 ) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + xlab("")+ ylab("")+ggtitle("Underway")  + 
  scale_fill_manual(values = scatter_colors) + 
  coord_sf(xlim = c(-70, -65), ylim = c(-66, -62), expand = TRUE)+ theme(legend.position = "bottom") + 
  geom_point(data = ftlog_notprocessed, aes (x= Long, y= Lat), color = "red", size = 0.1) +
  geom_point(data = (rna %>% filter(Study == "Grid")), aes (x= Long, y= Lat), color = "black", size = 0.2) 
CTD_scatterpie_plot<-ggplot(data = world) + geom_sf(lwd = 0) + theme_void() + geom_scatterpie(data=wider_a4long_locs %>% filter(str_detect(Name, "CTD")),aes(x=Long, y=Lat, group = Name, r=0.5), cols = strainlist, color=NA, alpha=.8 ) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + xlab("")+ ylab("")+ggtitle("CTD/Niskin")  + scale_fill_manual(values = scatter_colors) + coord_sf(xlim = c(-73, -56), ylim = c(-68, -60), expand = TRUE) + theme(legend.position = "none") + geom_point(data = (rna %>% filter(Study == "Grid")), aes (x= Long, y= Lat), color = "black", size = 0.1)
  CTD_scatterpie_plot + FT_scatterpie_plot +plot_layout(guides = "collect") &
  theme(legend.position='bottom')

Figure 3 A-B

Bacteria geoplot scatterpie

PSColony<- subset_samples(ps_nochloro, Dataset %in% c("Antarctica") &  Name != "A30" & Year !=2023)

ps_colony_RA<- transform_sample_counts(PSColony, function(OTU) 100* OTU/sum(OTU))

ps_colony_RA_glomO<- tax_glom(ps_colony_RA, 'Order')

ps_colony_RA_glomO_F = filter_taxa(ps_colony_RA_glomO , function(x) sum(x) > 1, TRUE)

mergemelt<- psmelt(ps_colony_RA_glomO_F) 
mergemelt$Order<-str_sub(mergemelt$Order, 5, str_length(mergemelt$Order))
mergemelt$Order[mergemelt$Abundance < 1] <- "z< 1% abund."
bact_a4long_locs<- left_join(mergemelt, sample_locs, by = "Name")

bact_wider_a4long_locs<- bact_a4long_locs %>% select(Name, Order, Abundance, Lat, Long) %>% group_by(Name, Order, Lat, Long) %>% summarize(Abundance = sum(Abundance)) %>% ungroup() %>%  pivot_wider( names_from = c("Order"), values_from = c("Abundance"), values_fill = 0)
scatter_colors <- c(colors30[1:3], "#F95700", colors30[7:8],  colors30[12], colors30[15:16],  colors30[21])

strainlist <- c("Flavobacteriales", "Oceanospirillales" ,"Alteromonadales"  , "SAR11_clade", "Thiomicrospirales", "Cellvibrionales"  , "Rhodobacterales"  , "SAR86_clade","Burkholderiales",  "Chitinophagales" )    

bact_wider_a4long_locs<- data.frame(bact_wider_a4long_locs)

FT_scatterpie_plot<-ggplot(data = world) + geom_sf(lwd = 0) + theme_void() + geom_scatterpie(data=bact_wider_a4long_locs %>% filter(str_detect(Name, "FT")),aes(x=Long, y=Lat, group = Name, r=0.2), cols = strainlist, color=NA, alpha=.8 ) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + xlab("")+ ylab("")+ggtitle("Flow-through")  + 
  scale_fill_manual(values = scatter_colors) + 
  coord_sf(xlim = c(-70, -65), ylim = c(-66, -62), expand = TRUE) + theme(legend.position = "bottom") + 
  geom_point(data = (rna %>% filter(Study == "Grid")), aes (x= Long, y= Lat), color = "black", size = 0.2) 
CTD_scatterpie_plot<-ggplot(data = world) + geom_sf(lwd = 0) + theme_void() + geom_scatterpie(data=bact_wider_a4long_locs %>% filter(str_detect(Name, "CTD")),aes(x=Long, y=Lat, group = Name, r=0.5), cols = strainlist, color=NA, alpha=.8 ) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + xlab("")+ ylab("")+ggtitle("CTD/Niskin")  + scale_fill_manual(values = scatter_colors) + coord_sf(xlim = c(-73, -56), ylim = c(-68, -60), expand = TRUE) + theme(legend.position = "none") + geom_point(data = (rna %>% filter(Study == "Grid")), aes (x= Long, y= Lat), color = "black", size = 0.1)
  CTD_scatterpie_plot + FT_scatterpie_plot + plot_layout(guides = "collect") &
  theme(legend.position='bottom')

Figure 3 C-D

Correlations

Alteromonadales

PSColony4corr<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") &  Name != "A30" & Year !=2023)

ps_colony_RA4corr<- transform_sample_counts(PSColony4corr, function(OTU) 100* OTU/sum(OTU))

ps_colony_RA4corr <- filter_taxa(ps_colony_RA4corr, function(x) sum(x) > 1, TRUE)


melted_Colony4Corr <- psmelt(ps_colony_RA4corr) 

colAlteromonas4orr <- melted_Colony4Corr %>% filter(Order == " o__Alteromonadales")

AlteromonasASVs <- unique(colAlteromonas4orr$OTU)



Ant4corr<- subset_samples(ps_nochloro, Dataset %in% c("Antarctica") &  Name != "A30" & Year !=2023)

Ant4corr<- transform_sample_counts(Ant4corr, function(OTU) 100* OTU/sum(OTU))

melted_Ant4corrr <- psmelt(Ant4corr) 

altero_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% AlteromonasASVs)
#pivot wider
wider_altero<- altero_Ant4corrr %>% select(Name, OTU, Abundance) %>% pivot_wider( names_from = c("OTU"), values_from = c("Abundance"))

#join with Phaeo abundance
longer_altero<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(wider_altero) %>% pivot_longer(!c(Name, " Prymnesiophyceae"), names_to = "bact_ASV", values_to = "rel_abund")
## Joining with `by = join_by(Name)`
names(longer_altero)[2] <- "Prymnesiophyceae"

Correlation between all ALteromonadales ASVs in both colony and environmental samples to phaeocystis ASVs in environmental samples:

longer_altero %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>%  ggplot(aes(x=Prymnesiophyceae, y=rel_abund, color = source)) + geom_point() +facet_wrap(~bact_ASV)

The most abundant colony-associated ASV:

longer_altero %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% filter(bact_ASV =="6c1096cb4d53196f3b70850af15c7b35") %>%  ggplot(aes(x=Prymnesiophyceae, y=rel_abund, color = source)) + geom_point() +facet_wrap(~bact_ASV)

####Paraglaciecola Sum up all paraglaciecola ASVs and plot:

altero_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% AlteromonasASVs)


summed_genera <- altero_Ant4corrr %>%  group_by(Name, Genus) %>% summarize(total_abund = sum(Abundance)) %>% filter( Genus == " g__Paraglaciecola")
## `summarise()` has grouped output by 'Name'. You can override using the
## `.groups` argument.
#join with Phaeo abundance
longer_altero<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(summed_genera)
## Joining with `by = join_by(Name)`
names(longer_altero)[2] <- "Prymnesiophyceae"

Paraglaciecola <-longer_altero %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>%  ggplot(aes(x=Prymnesiophyceae, y=total_abund, fill = source, shape = source)) + geom_point(size = 3)  + scale_shape_manual(values = c(21, 24)) +theme_test() + ggtitle("Paraglaciecola sp.") + geom_smooth(method = "lm", se = FALSE, aes(color = source)) + scale_fill_manual(values = c("lightgrey", "black" )) + scale_color_manual(values = c("darkgrey", "black" ))


Paraglaciecola 
## `geom_smooth()` using formula = 'y ~ x'

CTD lm and significance:

data = longer_altero %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "CTD")

summary(lm(formula = total_abund ~ Prymnesiophyceae, data = data ))
## 
## Call:
## lm(formula = total_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##        1        2        3        4        5 
##  0.09972 -0.27477  1.87663 -2.75799  1.05640 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -1.18753    1.69346  -0.701    0.534  
## Prymnesiophyceae  0.14951    0.04697   3.183    0.050 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.027 on 3 degrees of freedom
## Multiple R-squared:  0.7716, Adjusted R-squared:  0.6954 
## F-statistic: 10.13 on 1 and 3 DF,  p-value: 0.04997

Flowthrough lm and significance:

data = longer_altero %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "FT")

summary(lm(formula = total_abund ~ Prymnesiophyceae, data = data ))
## 
## Call:
## lm(formula = total_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.149 -3.813 -0.034  1.679  8.334 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       -6.3266     4.1028  -1.542   0.1541   
## Prymnesiophyceae   0.3645     0.1091   3.339   0.0075 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.098 on 10 degrees of freedom
## Multiple R-squared:  0.5272, Adjusted R-squared:  0.4799 
## F-statistic: 11.15 on 1 and 10 DF,  p-value: 0.007501

Colwellia

Sum up Colwellia ASVs and plot:

altero_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% AlteromonasASVs)


summed_genera <- altero_Ant4corrr %>%  group_by(Name, Genus) %>% summarize(total_abund = sum(Abundance)) %>% filter( Genus == " g__Colwellia")
## `summarise()` has grouped output by 'Name'. You can override using the
## `.groups` argument.
#join with Phaeo abundance
longer_altero<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(summed_genera)
## Joining with `by = join_by(Name)`
names(longer_altero)[2] <- "Prymnesiophyceae"

Colwellia <- longer_altero %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>%  ggplot(aes(x=Prymnesiophyceae, y=total_abund, fill = source, shape = source)) + geom_point(size = 3)  + scale_shape_manual(values = c(21, 24)) +theme_test() + ggtitle("Colwellia sp.") + scale_fill_manual(values = c("lightgrey", "black" )) + scale_color_manual(values = c("darkgrey", "black" ))

Colwellia

FT-10 53.369710 g__Colwellia 7.29 % (end of 600 line) FT-18 16.081232 g__Colwellia 13.48 % (landward end of flowthroughs on 400 line) FT-7 16.412716 g__Colwellia 19.24 % (landward end of flowthroughs on 600 line)

REMEMBER - field is RNA = activity - Colwellia is more ACTIVE at lower phaeo sites?

Visualize %Paraglaciecola/Colwellia in each colony - Plot rel abund of all alteronmonadales genera for field colonies:

PSColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") &  Name != "A30" &Year == 2021)

ps_colony_RA<- transform_sample_counts(PSColony, function(OTU) 100* OTU/sum(OTU))

ps_colony_RA <- subset_taxa(ps_colony_RA, Order == " o__Alteromonadales")

ps_colony_RA_glomG<- tax_glom(ps_colony_RA, 'Genus')

ps_colony_RA_glomG_F = filter_taxa(ps_colony_RA_glomG , function(x) sum(x) > 1, TRUE)

mergemelt<- psmelt(ps_colony_RA_glomG_F) 
mergemelt$Genus<-str_sub(mergemelt$Genus, 5, str_length(mergemelt$Genus))
#mergemelt$Genus[mergemelt$Abundance < 1] <- "z< 1% abund."

spatial_plot <- ggplot(data=mergemelt, aes(x=Name, y=Abundance, fill=Genus)) + facet_grid(~ Station, scales = "free")

spatial_plot + geom_bar(aes(), stat="identity", position="stack") + 
  theme_classic() + theme(legend.position="bottom") + scale_fill_manual(values =c( "#262626", "#d0cef3","#e99eb2", "#ffc100", "#fafff1" )) + guides(fill=guide_legend(nrow=1)) + theme(legend.title =element_blank()) 

ggsave(“Alteromonas_barplot.pdf”, width = 7, height = 4) Supplemental Figure 6

Oceanospirillales

PSColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") &  Name != "A30" &Year == 2021)

ps_colony_RA<- transform_sample_counts(PSColony, function(OTU) 100* OTU/sum(OTU))

ps_colony_RA <- subset_taxa(ps_colony_RA, Order == " o__Oceanospirillales")

ps_colony_RA_glomF<- tax_glom(ps_colony_RA, 'Family')

ps_colony_RA_glomG_F = filter_taxa(ps_colony_RA_glomF , function(x) sum(x) > 1, TRUE)

mergemelt<- psmelt(ps_colony_RA_glomG_F) 
mergemelt$Family<-str_sub(mergemelt$Family, 5, str_length(mergemelt$Family))
#mergemelt$Genus[mergemelt$Abundance < 1] <- "z< 1% abund."

spatial_plot <- ggplot(data=mergemelt, aes(x=Name, y=Abundance, fill=Family)) + facet_grid(~ Station, scales = "free")

spatial_plot + geom_bar(aes(), stat="identity", position="stack") + 
  theme_classic() + theme(legend.position="bottom") + scale_fill_manual(values =c( "#262626", "#d0cef3","#e99eb2", "#ffc100", "#fafff1" )) + guides(fill=guide_legend(nrow=1)) + theme(legend.title =element_blank()) 

ggsave(“Oceano_barplot.pdf”, width = 7, height = 4) Supplemental Figure 9A

PSColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") &  Name != "A30" &Year == 2021)

ps_colony_RA<- transform_sample_counts(PSColony, function(OTU) 100* OTU/sum(OTU))

ps_colony_RA <- subset_taxa(ps_colony_RA, Order == " o__Oceanospirillales")

#ps_colony_RA_glomF<- tax_glom(ps_colony_RA, 'Family')

ps_colony_RA_F = filter_taxa(ps_colony_RA , function(x) sum(x) > 1, TRUE)

mergemelt<- psmelt(ps_colony_RA_F) 

mergemelt$Family<-str_sub(mergemelt$Family, 5, str_length(mergemelt$Family))
#mergemelt$Genus[mergemelt$Abundance < 1] <- "z< 1% abund."

mergemelt <- mergemelt %>% filter(OTU %in% c("90fd832464c5fbc417533484304f566f", "3f64882f4221742c1cfa89d89cfe7975"))

spatial_plot <- ggplot(data=mergemelt, aes(x=Name, y=Abundance, fill=OTU)) + facet_grid(~ Station, scales = "free")

spatial_plot + geom_bar(aes(), stat="identity", position="stack") + 
  theme_classic() + theme(legend.position="bottom") + scale_fill_manual(values =c( "hotpink" ,"pink")) + guides(fill=guide_legend(nrow=1)) + theme(legend.title =element_blank()) 

ggsave(“Oceano_barplot_ASVs.pdf”, width = 7, height = 4) Supplemental Figure 9B

col4corr_oceano <- melted_Colony4Corr %>% filter(Order == " o__Oceanospirillales")

oceanoASVs <- unique(col4corr_oceano$OTU)


oceano_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% oceanoASVs)
#pivot wider
wider_oceano<- oceano_Ant4corrr%>% select(Name, OTU, Abundance) %>% pivot_wider( names_from = c("OTU"), values_from = c("Abundance"))

#join with Phaeo abundance
longer<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(wider_oceano) %>% pivot_longer(!c(Name, " Prymnesiophyceae"), names_to = "bact_ASV", values_to = "rel_abund")
## Joining with `by = join_by(Name)`
names(longer)[2] <- "Prymnesiophyceae"


longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>%  ggplot(aes(x=Prymnesiophyceae, y=rel_abund, color = source)) + geom_point() +facet_wrap(~bact_ASV)

mostly driven by 90fd … and d536 90fd832464c5fbc417533484304f566f d536522dff81586b788ee4acdbadb881

The colony-associated Oceanospirillales was dominated by two ASVs 90fd832464c5fbc417533484304f566f (uncultured) & 3f64882f4221742c1cfa89d89cfe7975 (Profundimonas sp., family: Nitrincolaceae)

Environmentally abundant = 90fd832464c5fbc417533484304f566f (uncultured, family: Nitrincolaceae) - most abundant by a long shot (~5-25% of enviro samples) &

d536522dff81586b788ee4acdbadb881 (0-5.5ish of enviro samples) (uncultured, family: Nitrincolaceae)

col4corr_oceano <- melted_Colony4Corr %>% filter(Order == " o__Oceanospirillales") %>%  filter(OTU  %in% c("90fd832464c5fbc417533484304f566f","3f64882f4221742c1cfa89d89cfe7975","d536522dff81586b788ee4acdbadb881" ) )

oceanoASVs <- unique(col4corr_oceano$OTU)


oceano_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% oceanoASVs)
#pivot wider
wider_oceano<- oceano_Ant4corrr%>% select(Name, OTU, Abundance) %>% pivot_wider( names_from = c("OTU"), values_from = c("Abundance"))

#join with Phaeo abundance
longer<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(wider_oceano) %>% pivot_longer(!c(Name, " Prymnesiophyceae"), names_to = "bact_ASV", values_to = "rel_abund")
## Joining with `by = join_by(Name)`
names(longer)[2] <- "Prymnesiophyceae"


longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>%  ggplot(aes(x=Prymnesiophyceae, y=rel_abund, fill = source)) +facet_wrap(~bact_ASV) + geom_point(size = 3, shape = 21)  + theme_test() + ggtitle("Oceanospirillales") + geom_smooth(method = "lm", se = FALSE, aes(color = source)) + scale_fill_manual(values = c("#00A4CC", "#F95700" )) + scale_color_manual(values = c("#00A4CC", "#F95700" ))
## `geom_smooth()` using formula = 'y ~ x'

ggsave(“oceano_phaeo_correlations.pdf”, width =8.5, height = 3.5) Supplemental Figure 10

filter(OTU %in% c(“90fd832464c5fbc417533484304f566f”,“3f64882f4221742c1cfa89d89cfe7975”,“d536522dff81586b788ee4acdbadb881” ) )

CTD

data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "CTD") %>% filter(bact_ASV %in% c("3f64882f4221742c1cfa89d89cfe7975"))

summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))
## 
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##        1        2        3        4        5 
## -0.03678 -0.07714  0.27464 -0.06177 -0.09894 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -0.080436   0.149688  -0.537   0.6283  
## Prymnesiophyceae  0.016111   0.004152   3.881   0.0303 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1792 on 3 degrees of freedom
## Multiple R-squared:  0.8339, Adjusted R-squared:  0.7785 
## F-statistic: 15.06 on 1 and 3 DF,  p-value: 0.03031

FT

data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "FT") %>% filter(bact_ASV %in% c("3f64882f4221742c1cfa89d89cfe7975"))

summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))
## 
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39245 -0.19911 -0.02003  0.12273  0.64741 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      -0.496254   0.225084  -2.205  0.05202 . 
## Prymnesiophyceae  0.022874   0.005988   3.820  0.00337 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2797 on 10 degrees of freedom
## Multiple R-squared:  0.5934, Adjusted R-squared:  0.5527 
## F-statistic: 14.59 on 1 and 10 DF,  p-value: 0.003374

CTD

data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "CTD") %>% filter(bact_ASV %in% c("90fd832464c5fbc417533484304f566f"))

summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))
## 
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##       1       2       3       4       5 
##  1.1352 -2.4046  4.2002 -3.0602  0.1293 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        7.3459     2.8160   2.609   0.0798 .
## Prymnesiophyceae   0.2891     0.0781   3.701   0.0343 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.371 on 3 degrees of freedom
## Multiple R-squared:  0.8203, Adjusted R-squared:  0.7605 
## F-statistic:  13.7 on 1 and 3 DF,  p-value: 0.03425

FT

data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "FT") %>% filter(bact_ASV %in% c("90fd832464c5fbc417533484304f566f"))

summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))
## 
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.1933 -1.6592  0.6824  2.1373  3.4966 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       5.17810    2.48608   2.083   0.0639 .
## Prymnesiophyceae  0.19804    0.06614   2.994   0.0135 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.089 on 10 degrees of freedom
## Multiple R-squared:  0.4727, Adjusted R-squared:   0.42 
## F-statistic: 8.966 on 1 and 10 DF,  p-value: 0.01347

CTD

data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "CTD") %>% filter(bact_ASV %in% c("d536522dff81586b788ee4acdbadb881"))

summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))
## 
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##       1       2       3       4       5 
## -0.5064 -0.4827  0.6102  2.7111 -2.3323 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)      0.005792   1.781907   0.003    0.998
## Prymnesiophyceae 0.075820   0.049420   1.534    0.223
## 
## Residual standard error: 2.133 on 3 degrees of freedom
## Multiple R-squared:  0.4396, Adjusted R-squared:  0.2529 
## F-statistic: 2.354 on 1 and 3 DF,  p-value: 0.2225

FT

data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "FT") %>% filter(bact_ASV %in% c("d536522dff81586b788ee4acdbadb881"))

summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))
## 
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6077 -0.3321 -0.1682  0.3708  2.4802 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -0.72098    0.85415  -0.844   0.4184  
## Prymnesiophyceae  0.06464    0.02272   2.845   0.0174 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.061 on 10 degrees of freedom
## Multiple R-squared:  0.4473, Adjusted R-squared:  0.392 
## F-statistic: 8.093 on 1 and 10 DF,  p-value: 0.0174

sum_up

col4corr_oceano <- melted_Colony4Corr %>% filter(Order == " o__Oceanospirillales")

oceanoASVs <- unique(col4corr_oceano$OTU)


oceano_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% oceanoASVs) %>% group_by(Name) %>% summarize(total_abund = sum(Abundance))
#pivot wider
#wider_oceano<- oceano_Ant4corrr%>% select(Name, OTU, Abundance) %>% pivot_wider( names_from = c("OTU"), values_from = c("Abundance"))

#join with Phaeo abundance
longer<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(oceano_Ant4corrr) %>% pivot_longer(!c(Name, " Prymnesiophyceae"), names_to = "bact_ASV", values_to = "rel_abund")
## Joining with `by = join_by(Name)`
names(longer)[2] <- "Prymnesiophyceae"


Oceanospirillales<- longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>%  ggplot(aes(x=Prymnesiophyceae, y=rel_abund, fill = source, shape = source)) + geom_point(size = 3)  + scale_shape_manual(values = c(21, 24)) +theme_test() + ggtitle("Oceanospirillales") + geom_smooth(method = "lm", se = FALSE, aes(color = source)) + scale_fill_manual(values = c("lightgrey", "black" )) + scale_color_manual(values = c("darkgrey", "black" ))

Oceanospirillales
## `geom_smooth()` using formula = 'y ~ x'

CTD

data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "CTD")

summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))
## 
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##       1       2       3       4       5 
##  0.6289 -2.9977  5.0767 -0.4163 -2.2916 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       7.30982    3.07224   2.379   0.0977 .
## Prymnesiophyceae  0.38046    0.08521   4.465   0.0209 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.678 on 3 degrees of freedom
## Multiple R-squared:  0.8692, Adjusted R-squared:  0.8256 
## F-statistic: 19.94 on 1 and 3 DF,  p-value: 0.02092

FT

data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "FT")

summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))
## 
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.5743 -1.1521  0.0937  0.6093  5.1538 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       9.84218    2.59413   3.794  0.00352 **
## Prymnesiophyceae  0.16548    0.06901   2.398  0.03746 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.223 on 10 degrees of freedom
## Multiple R-squared:  0.3651, Adjusted R-squared:  0.3016 
## F-statistic: 5.749 on 1 and 10 DF,  p-value: 0.03746
Paraglaciecola + Colwellia + Oceanospirillales + plot_layout(guides = "collect")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

ggsave(“phaeo_bact_correlations.pdf”, width =8.5, height = 3.5)

Figure 3C

Sphingomonadales

col4corr_sphing <- melted_Colony4Corr %>% filter(Order == " o__Sphingomonadales")

sphingASVs <- unique(col4corr_sphing$OTU)


sphing_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% sphingASVs)
#pivot wider
wider_sphing<- sphing_Ant4corrr%>% select(Name, OTU, Abundance) %>% pivot_wider( names_from = c("OTU"), values_from = c("Abundance"))

#join with Phaeo abundance
longer<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(wider_sphing) %>% pivot_longer(!c(Name, " Prymnesiophyceae"), names_to = "bact_ASV", values_to = "rel_abund")
## Joining with `by = join_by(Name)`
names(longer)[2] <- "Prymnesiophyceae"


longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>%  ggplot(aes(x=Prymnesiophyceae, y=rel_abund, color = source)) + geom_point() +facet_wrap(~bact_ASV)

The colony-associated Sphingomonas ASVs are not detected in seawater samples…

PSColony<- subset_samples(ps_nochloro, Dataset %in% c("Antarctica") &  Name != "A30" & Year !=2023)

ps_colony_RA<- transform_sample_counts(PSColony, function(OTU) 100* OTU/sum(OTU))

ps_colony_RA_glomO<- tax_glom(ps_colony_RA, 'Order')



mergemelt<- psmelt(ps_colony_RA_glomO) 
mergemelt$Order<-str_sub(mergemelt$Order, 5, str_length(mergemelt$Order))

mergemelt <- mergemelt %>% filter(Order == "Sphingobacteriales")

sphingo_sums <- mergemelt %>% group_by(Sample, Name, Station) %>% summarize(total = sum(Abundance))  %>% filter(total > 0 )
## `summarise()` has grouped output by 'Sample', 'Name'. You can override using
## the `.groups` argument.
sphingo_sums
## # A tibble: 6 × 4
## # Groups:   Sample, Name [6]
##   Sample Name   Station       total
##   <chr>  <chr>  <chr>         <dbl>
## 1 Ant1   FT-10  600         0.00374
## 2 Ant12  FT-20  400-line    0.0383 
## 3 Ant17  CTD-26 600-line    0.00819
## 4 Ant2   FT-21  400-line    0.0160 
## 5 Ant6   FT-9   600-line    0.00795
## 6 Ant8   FT-13  600-500tran 0.0195
max(sphingo_sums$total)
## [1] 0.03833988

Sphingomonas as a group is not detected or extremely low abundance in all environmental samples…

Cellvibrionales (SAR92)

col4corr_oceano <- melted_Colony4Corr %>% filter(Order == " o__Cellvibrionales")

oceanoASVs <- unique(col4corr_oceano$OTU)


oceano_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% oceanoASVs)
#pivot wider
wider_oceano<- oceano_Ant4corrr%>% select(Name, OTU, Abundance) %>% pivot_wider( names_from = c("OTU"), values_from = c("Abundance"))

#join with Phaeo abundance
longer<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(wider_oceano) %>% pivot_longer(!c(Name, " Prymnesiophyceae"), names_to = "bact_ASV", values_to = "rel_abund")
## Joining with `by = join_by(Name)`
names(longer)[2] <- "Prymnesiophyceae"


longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>%  ggplot(aes(x=Prymnesiophyceae, y=rel_abund, color = source)) + geom_point() +facet_wrap(~bact_ASV)

bc13a8f6afa703399c40d5879cc68221

oceano_Ant4corrr <- melted_Ant4corrr %>% filter(OTU == "bc13a8f6afa703399c40d5879cc68221")
#pivot wider
wider_oceano<- oceano_Ant4corrr%>% select(Name, OTU, Abundance) %>% pivot_wider( names_from = c("OTU"), values_from = c("Abundance"))

#join with Phaeo abundance
longer<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(wider_oceano) %>% pivot_longer(!c(Name, " Prymnesiophyceae"), names_to = "bact_ASV", values_to = "rel_abund")
## Joining with `by = join_by(Name)`
names(longer)[2] <- "Prymnesiophyceae"


cv<- longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>%  ggplot(aes(x=Prymnesiophyceae, y=rel_abund, fill = source)) + geom_point(size = 3, shape = 21)  + theme_test() + ggtitle("SAR92") + geom_smooth(method = "lm", se = FALSE, aes(color = source)) + scale_fill_manual(values = c("#00A4CC", "#F95700" )) + scale_color_manual(values = c("#00A4CC", "#F95700" ))

cv
## `geom_smooth()` using formula = 'y ~ x'

min(longer$rel_abund)
## [1] 0.6245234
max(longer$rel_abund)
## [1] 8.160423
mean(longer$rel_abund)
## [1] 3.495746

CTD

data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "CTD")

summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))
## 
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##       1       2       3       4       5 
## -1.5689  0.2756  1.2457  2.9162 -2.8686 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)       2.20253    2.20070   1.001    0.391
## Prymnesiophyceae  0.06870    0.06104   1.126    0.342
## 
## Residual standard error: 2.635 on 3 degrees of freedom
## Multiple R-squared:  0.2969, Adjusted R-squared:  0.06256 
## F-statistic: 1.267 on 1 and 3 DF,  p-value: 0.3423

FT

data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "FT")

summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))
## 
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.4769 -0.4509 -0.1104  0.3728  1.8167 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       0.61598    0.79262   0.777  0.45506   
## Prymnesiophyceae  0.07258    0.02109   3.442  0.00631 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9849 on 10 degrees of freedom
## Multiple R-squared:  0.5423, Adjusted R-squared:  0.4966 
## F-statistic: 11.85 on 1 and 10 DF,  p-value: 0.006307

Visualize Overall abundance of Cellvibrionales in environmental samples:

PSColony<- subset_samples(ps_nochloro, Dataset %in% c("Antarctica") &  Name != "A30" & Year !=2023)

ps_colony_RA<- transform_sample_counts(PSColony, function(OTU) 100* OTU/sum(OTU))

ps_colony_RA_glomO<- tax_glom(ps_colony_RA, 'Order')

ps_colony_RA_glomO_F = filter_taxa(ps_colony_RA_glomO , function(x) sum(x) > 1, TRUE)

mergemelt<- psmelt(ps_colony_RA_glomO_F) 
mergemelt$Order<-str_sub(mergemelt$Order, 5, str_length(mergemelt$Order))
mergemelt$Order[mergemelt$Abundance < 1] <- "z< 1% abund."

spatial_plot <- ggplot(data=mergemelt, aes(x=Sample, y=Abundance, fill=Order)) 

spatial_plot + geom_bar(aes(), stat="identity", position="stack") + 
  theme_classic() + theme(legend.position="bottom") + scale_fill_manual(values =c( rep(colors30, 2))) + guides(fill=guide_legend(nrow=5)) + theme(legend.title =element_blank())

Visualize overall abundance of SAR92 in environmental samples:

PSenviro<- subset_samples(ps_nochloro, Dataset %in% c("Antarctica") &  Name != "A30" & Year !=2023)

ps_enviro_RA<- transform_sample_counts(PSenviro, function(OTU) 100* OTU/sum(OTU))

ps_enviro_oceano <- subset_taxa(ps_enviro_RA, Order == " o__Cellvibrionales")

ps_enviro_RA_glomG<- tax_glom(ps_enviro_oceano , 'Genus')

ps_enviro_RA_glomG = filter_taxa(ps_enviro_RA_glomG , function(x) sum(x) > 0.1, TRUE)

cv_mergemelt<- psmelt(ps_enviro_RA_glomG) 
cv_mergemelt$Order<-str_sub(cv_mergemelt$Order, 5, str_length(cv_mergemelt$Order))
#mergemelt$Order[mergemelt$Abundance < 1] <- "z< 1% abund."

spatial_plot <- ggplot(data=cv_mergemelt, aes(x=Name, y=Abundance, fill=Genus)) 

spatial_plot + geom_bar(aes(), stat="identity", position="stack") + 
  theme_classic() + theme(legend.position="bottom") + scale_fill_manual(values =c( rep(colors30, 2))) + guides(fill=guide_legend(nrow=5)) + theme(legend.title =element_blank())

Almost all Cellvibrionales seqs in environmental samples are SAR92

Session Info:

sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.6.3
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] scatterpie_0.2.1        sf_1.0-15               rnaturalearthdata_1.0.0
##  [4] rnaturalearth_1.0.1     ggspatial_1.1.9         splitstackshape_1.4.8  
##  [7] biomformat_1.30.0       ape_5.7-1               reshape2_1.4.4         
## [10] scales_1.3.0            patchwork_1.2.0         stringr_1.5.1          
## [13] vegan_2.6-4             permute_0.9-7           magrittr_2.0.3         
## [16] viridis_0.6.5           viridisLite_0.4.2       gridExtra_2.3          
## [19] dplyr_1.1.4             tidyr_1.3.1             ggplot2_3.5.0          
## [22] phyloseq_1.46.0         CoDaSeq_0.99.7          car_3.1-2              
## [25] carData_3.0-5           ALDEx2_1.34.0           latticeExtra_0.6-30    
## [28] lattice_0.22-5          zCompositions_1.5.0-1   truncnorm_1.0-9        
## [31] NADA_1.6-1.1            survival_3.5-7          MASS_7.3-60.0.1        
## [34] qiime2R_0.99.6         
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3          rstudioapi_0.15.0          
##   [3] jsonlite_1.8.8              farver_2.1.1               
##   [5] rmarkdown_2.25              zlibbioc_1.48.0            
##   [7] vctrs_0.6.5                 multtest_2.58.0            
##   [9] RCurl_1.98-1.14             terra_1.7-71               
##  [11] base64enc_0.1-3             htmltools_0.5.7            
##  [13] S4Arrays_1.2.0              Rhdf5lib_1.24.2            
##  [15] SparseArray_1.2.4           Formula_1.2-5              
##  [17] rhdf5_2.46.1                sass_0.4.8                 
##  [19] KernSmooth_2.23-22          bslib_0.6.1                
##  [21] htmlwidgets_1.6.4           plyr_1.8.9                 
##  [23] cachem_1.0.8                igraph_2.0.1.1             
##  [25] lifecycle_1.0.4             iterators_1.0.14           
##  [27] pkgconfig_2.0.3             Matrix_1.6-5               
##  [29] R6_2.5.1                    fastmap_1.1.1              
##  [31] GenomeInfoDbData_1.2.11     MatrixGenerics_1.14.0      
##  [33] digest_0.6.34               colorspace_2.1-0           
##  [35] S4Vectors_0.40.2            Hmisc_5.1-1                
##  [37] GenomicRanges_1.54.1        labeling_0.4.3             
##  [39] RcppZiggurat_0.1.6          fansi_1.0.6                
##  [41] polyclip_1.10-6             httr_1.4.7                 
##  [43] abind_1.4-5                 mgcv_1.9-1                 
##  [45] compiler_4.3.2              proxy_0.4-27               
##  [47] withr_3.0.0                 htmlTable_2.4.2            
##  [49] backports_1.4.1             BiocParallel_1.36.0        
##  [51] DBI_1.2.0                   highr_0.10                 
##  [53] ggforce_0.4.1               DelayedArray_0.28.0        
##  [55] classInt_0.4-10             units_0.8-5                
##  [57] tools_4.3.2                 foreign_0.8-86             
##  [59] nnet_7.3-19                 glue_1.7.0                 
##  [61] quadprog_1.5-8              nlme_3.1-164               
##  [63] rhdf5filters_1.14.1         grid_4.3.2                 
##  [65] checkmate_2.3.1             gridBase_0.4-7             
##  [67] cluster_2.1.6               ade4_1.7-22                
##  [69] generics_0.1.3              gtable_0.3.4               
##  [71] class_7.3-22                data.table_1.15.0          
##  [73] utf8_1.2.4                  XVector_0.42.0             
##  [75] BiocGenerics_0.48.1         foreach_1.5.2              
##  [77] pillar_1.9.0                splines_4.3.2              
##  [79] tweenr_2.0.2                deldir_2.0-2               
##  [81] directlabels_2024.1.21      tidyselect_1.2.0           
##  [83] Biostrings_2.70.2           knitr_1.45                 
##  [85] IRanges_2.36.0              SummarizedExperiment_1.32.0
##  [87] stats4_4.3.2                xfun_0.42                  
##  [89] Biobase_2.62.0              matrixStats_1.2.0          
##  [91] DT_0.31                     stringi_1.8.3              
##  [93] ggfun_0.1.4                 yaml_2.3.8                 
##  [95] evaluate_0.23               codetools_0.2-19           
##  [97] interp_1.1-6                tibble_3.2.1               
##  [99] cli_3.6.2                   RcppParallel_5.1.7         
## [101] rpart_4.1.23                munsell_0.5.0              
## [103] jquerylib_0.1.4             Rcpp_1.0.12                
## [105] GenomeInfoDb_1.38.6         png_0.1-8                  
## [107] Rfast_2.1.0                 jpeg_0.1-10                
## [109] bitops_1.0-7                e1071_1.7-14               
## [111] purrr_1.0.2                 crayon_1.5.2               
## [113] rlang_1.1.3