Setup

import packages and functions, define colors

library(qiime2R)
library(CoDaSeq)
library(phyloseq)
library(ggplot2)
library(tidyr)
library("dplyr")
library("gridExtra")
#library(RCircos)
#library(circlize)
library(viridis)
#library(UpSetR)
#library(knitr)
library(tidyr)
library(magrittr)
library(vegan)
library(stringr)
library(patchwork)

`%ni%` = Negate(`%in%`)

source("~/Desktop/PhaeoPhycosphere/functions/legendplot.R")

colors=c('#e9e9e9','#C14450','#f0b08f','#c2aba6','#60555f','#3c6481','#9fd6e6','#256F64','#63a375')

red<- c("#EB8B8E","#FBD5E2","#E7B6AF","#AC6873", "#D82354")
orange <- c("#FAAA6D","#FECF92")
yellow <- c("#FFC317","#F7F4B7", "#CC9C3C")
green <- c("#16866F","#1E3F1C","#99A339","#516A65","#8BC89F")
blue <- c("#005694","#B7E1DD","#66879E","#1BAAE2","#5FC8D8")
purple <- c("#E7D7CE","#A699A9","#434582","#81347D", "#B5218E")

colors30 <- c(blue, purple, red, yellow, orange, green, "black")

Import Additional functions:

scripts <- c("graphical_methods.R",
             "tree_methods.R",
             "plot_merged_trees.R",
             "specificity_methods.R",
             "ternary_plot.R",
             "richness.R",
             "edgePCA.R",
             "copy_number_correction.R",
             "import_frogs.R",
             "prevalence.R",
             "compute_niche.R")
urls <- paste0("https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/R/", scripts)

for (url in urls) {
  source(url)
}

import data

MERGED run analysis all 16S data from two runs (includes Antarctica single colonies (original and after 1 yr) and Antarctica environmental samples)

Import and format feature table (ASV counts in each sample) and meta date (sample names, )

ps<-qza_to_phyloseq(features="MERGEDtable99.qza")

meta<- read.csv("MERGEDsampleMetaData.txt", sep = "\t", header = TRUE)
row.names(meta)<- meta$SampleID

meta$Year <- as.character(meta$Year)
meta$Station_Year <- paste(meta$Station, meta$Year)

meta<- meta %>% separate(Name, sep="-", into = c("type", "number
                                                "), remove = FALSE)

META <- sample_data(meta)

Import and format SILVA taxonomy:

SILVA 138-99 taxonomy trimmed to match the primers used on samples

#taxonomy <- read.csv("MERGED_taxonomy.csv", stringsAsFactors = FALSE)
taxonomy <- read.csv("silva_primered_MERGED_taxonomy.csv", stringsAsFactors = FALSE)

names(taxonomy) <- c("row", "tax", "Confidence") #change the headers (column names)
row.names(taxonomy) <-taxonomy[[1]] #move the feature ID column to become the row names
taxonomy <- taxonomy[,(-1)] #delete the feature ID  column 
taxonomy <-  separate(taxonomy, tax, c("Domain","Phylum", "Class", "Order", "Family", "Genus", "Species", "D7", "D8", "D9", "D10", "D11", "D12", "D13", "D14"), sep = ";", fill = "right")
taxonomy <- taxonomy[,c(1:7)]

taxonomy$D0 <- with(taxonomy, ifelse(Order == " o__Chloroplast", "Chloroplast", "Bacteria"))

col_order<- c("D0", "Domain","Phylum", "Class", "Order", "Family", "Genus", "Species" )
taxonomy<- taxonomy[, col_order]

taxmat <- as.matrix(taxonomy)
TAX = tax_table(taxmat)

Import and format PhytoREF taxonomy:

PhytoREF trained with same primers

phytoR <- read.table("PhytoRef_tax_str.txt", header = FALSE)

library(splitstackshape)
phytoR_split<- cSplit(phytoR, "V1", "|")

phytoR_split$taxa <- paste(phytoR_split$V1_03, phytoR_split$V1_04, phytoR_split$V1_05, phytoR_split$V1_06 , phytoR_split$V1_07, phytoR_split$V1_08, phytoR_split$V1_09, phytoR_split$V1_10, phytoR_split$V1_11, phytoR_split$V1_12, sep = "; ")

phytoR_split$taxonomy <- paste(phytoR_split$taxa, phytoR_split$V2, sep = " ")

phytoR <- phytoR_split  %>%  dplyr::select(V1_01, taxonomy)

write.table(phytoR, "phytoRef_tax_final.txt", quote = FALSE, sep = "\t", col.names = FALSE, row.names = FALSE)

PRtaxonomy <- read.csv("phytoRef_MERGED_taxonomy.csv", stringsAsFactors = FALSE)
names(PRtaxonomy) <- c("row", "tax", "Confidence") #change the headers (column names)
row.names(PRtaxonomy) <-PRtaxonomy[[1]] #move the feature ID column to become the row names
PRtaxonomy <- PRtaxonomy[,(-1)] #delete the feature ID  column 
PRtaxonomy <-  separate(PRtaxonomy, tax, c("Domain","Phylum", "Class", "Order", "D5", "D6", "D7", "D8", "D9", "D10", "D11", "D12", "D13"), sep = ";", fill = "right")

#taxonomy <- taxonomy[,c(1:7)]
#col_order<- c("D0", "Domain","Phylum", "Class", "Order", "Family", "Genus", "Species" )
#taxonomy<- taxonomy[, col_order]

PRtaxmat <- as.matrix(PRtaxonomy)
PRTAX = tax_table(PRtaxmat)

Make phyloseq object:

ps = merge_phyloseq(ps, TAX, META)

Chloroplast sequences

Plot relative abundance as stacked bar plot (all samples including chloroplast sequences) to visualize the relative abundance of chloroplast and bacterial sequences in each sample:

ps<- subset_taxa(ps, D0 %in% c("Bacteria", "Chloroplast") )
ps <- subset_samples(ps, Dataset %in% c("PantCol", "Antarctica"))
psra<- transform_sample_counts(ps, function(OTU) 100* OTU/sum(OTU))
glomD1<- tax_glom(psra, 'D0')


taxabarplot<-plot_bar(glomD1, x= "Name", fill = "D0") +  scale_y_continuous(expand = c(0, 0)) + ggtitle("")  + theme(legend.title=element_blank()) + geom_bar(aes(fill=D0), stat="identity", position="stack", width =0.9) +theme_classic() + theme(text = element_text(size=14))+theme(axis.text.x = element_text(angle = 90)) + xlab("Sample") + ylab("Relative Abundance (%)") + theme(text = element_text(size=14)) + scale_fill_manual(values=c("lightgrey", "#7BB03B"), name = "") + xlab("") + facet_grid(~Dataset + Year, scales = "free")

taxabarplot

Supplemental Figure 2 ggsave(“chloroplast_barplot.pdf”, width = 10, height = 5)

Get summary statistics about the amount of chloroplast sequences in single-colony samples (these are all Phaeocystis/host and not data that are particularly useful)”

pantcol_chloro_stats <- subset_samples(glomD1, Dataset == "PantCol")  
chloro_stat_melt <- psmelt(pantcol_chloro_stats) %>% filter(D0 == "Chloroplast")

min(chloro_stat_melt$Abundance)

## [1] 52.67964

mean(chloro_stat_melt$Abundance)

## [1] 72.86961

max(chloro_stat_melt$Abundance)

## [1] 96.43276

Create a phyloseq object with only chloroplast sequences and switch out taxonomy from SILVA to PhytoRef:

ps_chloro <- subset_taxa(ps, Order == " o__Chloroplast" )

ps_chlorotax <- merge_phyloseq(ps_chloro, PRTAX)

Bacterial Sequences

Create a phyloseq object with only bacterial sequences:

ps_nochloro <- subset_taxa(ps, Order != " o__Chloroplast" & Domain != "Unassigned")

Rarefaction Curves

ggrare(ps_nochloro, step = 1000, color = "Dataset", label = "Name", se = FALSE)

## rarefying sample A1-A2-4971
## rarefying sample A10-A14-4971
## rarefying sample A11-A15-4971
## rarefying sample A12-A20-4971
## rarefying sample A2-A5-4971
## rarefying sample A3-A6-4971
## rarefying sample A4-A8-4971
## rarefying sample A5-A9-4971
## rarefying sample A6-A10-4971
## rarefying sample A7-A11-4971
## rarefying sample A8-A12-4971
## rarefying sample A9-A13-4971
## rarefying sample B1-A21-4971
## rarefying sample B10-A30-4971
## rarefying sample B11-A31-4971
## rarefying sample B12-A32-4971
## rarefying sample B2-A22-4971
## rarefying sample B3-A23-4971
## rarefying sample B4-A24-4971
## rarefying sample B5-A25-4971
## rarefying sample B6-A26-4971
## rarefying sample B7-A27-4971
## rarefying sample B8-A28-4971
## rarefying sample B9-A29-4971
## rarefying sample C1-A33-4971
## rarefying sample C2-A34-4971
## rarefying sample C3-A35-4971
## rarefying sample Ant1
## rarefying sample Ant10
## rarefying sample Ant11
## rarefying sample Ant12
## rarefying sample Ant13
## rarefying sample Ant14
## rarefying sample Ant15
## rarefying sample Ant16
## rarefying sample Ant17
## rarefying sample Ant2
## rarefying sample Ant3
## rarefying sample Ant4
## rarefying sample Ant5
## rarefying sample Ant6
## rarefying sample Ant7
## rarefying sample Ant8
## rarefying sample Ant9
## rarefying sample col1
## rarefying sample col10
## rarefying sample col11
## rarefying sample col12
## rarefying sample col13
## rarefying sample col14
## rarefying sample col2
## rarefying sample col3
## rarefying sample col4
## rarefying sample col5
## rarefying sample col6
## rarefying sample col7
## rarefying sample col8
## rarefying sample col9

Rarefaction for Antarctica colonies:

PSColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") &  Name != "A30")

colony_bacteria_rare<- ggrare(PSColony, step = 1000, color = "Station_Year", se = FALSE) + theme_test() + scale_color_manual(values= c("#005694","#FFC317", "#5FC8D8" ))

## rarefying sample A1-A2-4971
## rarefying sample A10-A14-4971
## rarefying sample A11-A15-4971
## rarefying sample A12-A20-4971
## rarefying sample A2-A5-4971
## rarefying sample A3-A6-4971
## rarefying sample A4-A8-4971
## rarefying sample A5-A9-4971
## rarefying sample A6-A10-4971
## rarefying sample A7-A11-4971
## rarefying sample A8-A12-4971
## rarefying sample A9-A13-4971
## rarefying sample B1-A21-4971
## rarefying sample B11-A31-4971
## rarefying sample B12-A32-4971
## rarefying sample B2-A22-4971
## rarefying sample B3-A23-4971
## rarefying sample B4-A24-4971
## rarefying sample B5-A25-4971
## rarefying sample B6-A26-4971
## rarefying sample B7-A27-4971
## rarefying sample B8-A28-4971
## rarefying sample B9-A29-4971
## rarefying sample C1-A33-4971
## rarefying sample C2-A34-4971
## rarefying sample C3-A35-4971
## rarefying sample col1
## rarefying sample col10
## rarefying sample col11
## rarefying sample col12
## rarefying sample col13
## rarefying sample col14
## rarefying sample col2
## rarefying sample col3
## rarefying sample col4
## rarefying sample col5
## rarefying sample col6
## rarefying sample col7
## rarefying sample col8
## rarefying sample col9

colony_bacteria_rare

 ant_enviro<- subset_samples(ps_nochloro, Dataset %in% c("Antarctica") & Year == "2021")

field_bacteria_rare<-ggrare( ant_enviro, step = 1000, color = "type", se = FALSE) + theme_test() + scale_color_manual(values= c("#1E3F1C","#8BC89F"))

## rarefying sample Ant1
## rarefying sample Ant10
## rarefying sample Ant11
## rarefying sample Ant12
## rarefying sample Ant13
## rarefying sample Ant14
## rarefying sample Ant15
## rarefying sample Ant16
## rarefying sample Ant17
## rarefying sample Ant2
## rarefying sample Ant3
## rarefying sample Ant4
## rarefying sample Ant5
## rarefying sample Ant6
## rarefying sample Ant7
## rarefying sample Ant8
## rarefying sample Ant9

field_bacteria_rare

colony_bacteria_rare + field_bacteria_rare

Supplemental Figure 3

Alpha diversity

ASV Richness

totalOTU <- data.frame(otu_table(ps_nochloro))
totalOTU$rowsu <- rowSums(totalOTU)
totalOTUnotzero <- totalOTU %>% filter(rowsu >1)
dim(totalOTUnotzero)

## [1] 1429   59

plugin <- ps_nochloro %>%
            estimate_richness(measures = "Observed") %$% Observed
Dataset <- ps_nochloro %>% sample_data %$% Dataset
Station <- ps_nochloro %>% sample_data %$% Station
Replicate <- ps_nochloro %>% sample_data %$% Rep
Month <- ps_nochloro %>% sample_data %$% Month
Year <- ps_nochloro %>% sample_data %$% Year

richness<- data.frame(plugin, Dataset, Station, Replicate, Month, Year )
names(richness) <- c("richness", "Dataset", "Station", "Replicate", "Month", "Year")


richness %>%group_by(Dataset, Year) %>% summarize(mean = mean(richness), min = min(richness), max = max(richness))

## # A tibble: 3 × 5
## # Groups:   Dataset [2]
##   Dataset    Year   mean   min   max
##   <chr>      <chr> <dbl> <dbl> <dbl>
## 1 Antarctica 2021  199.    140   317
## 2 PantCol    2021   44.1    28    67
## 3 PantCol    2023   24.9    18    31

RichPlot<- richness %>%  filter(Dataset == "PantCol" ) %>% ggplot( aes(x=Station, y=richness, fill = Year))+ facet_grid(~ Year + Station, scales = "free") +geom_boxplot() + theme_bw() + scale_fill_manual(values = c( "#75D1B7", "#256F64", "black", "grey")) + theme(text = element_text(size=14)) +ylab("Observed Richness") +ggtitle("")+ theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) +xlab("")#+ limy(0, 90)

RichPlot

ggsave(“richness_antarctica_colonies.pdf”, width = 5, height =3)

Figure 2A

Richness summary stats for manuescript: - need total unique ASVs for each sample type plus the mean and range per samples

PSColony_field<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") &  Name != "A30" &Year == 2021)

PSColony_field = filter_taxa(PSColony_field, function(x) sum(x) > 1, TRUE)

PSColony_field

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 241 taxa and 26 samples ]
## sample_data() Sample Data:       [ 26 samples by 13 sample variables ]
## tax_table()   Taxonomy Table:    [ 241 taxa by 8 taxonomic ranks ]

-> 241 unique ASVs across all field-collected colony microbiomes

plugin <- PSColony_field %>%
            estimate_richness(measures = "Observed") %$% Observed
Dataset <- PSColony_field %>% sample_data %$% Dataset
Station <- PSColony_field %>% sample_data %$% Station
Replicate <- PSColony_field %>% sample_data %$% Rep
Month <- PSColony_field %>% sample_data %$% Month
Year <- PSColony_field %>% sample_data %$% Year

richness<- data.frame(plugin, Dataset, Station, Replicate, Month, Year )
names(richness) <- c("richness", "Dataset", "Station", "Replicate", "Month", "Year")

min(richness$richness)

## [1] 28

mean(richness$richness)

## [1] 43.80769

max(richness$richness)

## [1] 67

^ min, mean, max per field-collected colony

Cultured-colonies:

PSColony_culture<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") &  Name != "A30" & Year == 2023)

PSColony_culture = filter_taxa(PSColony_culture, function(x) sum(x) > 1, TRUE)

PSColony_culture

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 74 taxa and 14 samples ]
## sample_data() Sample Data:       [ 14 samples by 13 sample variables ]
## tax_table()   Taxonomy Table:    [ 74 taxa by 8 taxonomic ranks ]

-> 74 ASVs total across all cultured colonies

plugin <- PSColony_culture %>%
            estimate_richness(measures = "Observed") %$% Observed
Dataset <- PSColony_culture %>% sample_data %$% Dataset
Station <- PSColony_culture %>% sample_data %$% Station
Replicate <- PSColony_culture %>% sample_data %$% Rep
Month <- PSColony_culture %>% sample_data %$% Month
Year <- PSColony_culture %>% sample_data %$% Year

richness<- data.frame(plugin, Dataset, Station, Replicate, Month, Year )
names(richness) <- c("richness", "Dataset", "Station", "Replicate", "Month", "Year")

min(richness$richness)

## [1] 18

mean(richness$richness)

## [1] 24.92857

max(richness$richness)

## [1] 31

^ min, mean, and max ASVs counts in cultured-colony microbiomes

Beta Diversity

Bar Plots

First look Barplot (at Order level):

ps_nochloro_RA<- transform_sample_counts(ps_nochloro, function(OTU) 100* OTU/sum(OTU))

ps_nochloro_RA_glomO<- tax_glom(ps_nochloro_RA, 'Order')

taxabarplot<-plot_bar(ps_nochloro_RA_glomO, x= "Name", fill = "Order") +  scale_y_continuous(expand = c(0, 0)) + ggtitle("")  + theme(legend.title=element_blank()) + geom_bar(aes(fill=Order), stat="identity", position="stack", width =0.9) +theme_classic() + theme(text = element_text(size=14))+theme(axis.text.x = element_text(angle = 90)) + xlab("Sample") + ylab("Relative Abundance (%)") + theme(text = element_text(size=14)) + scale_fill_manual(values= rep( colors, 30))+ xlab("") + facet_grid(~Dataset, scales= "free_x")

taxabarplot+ theme(legend.position="none")

Colonies barplot

PSColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") &  Name != "A30")

ps_colony_RA<- transform_sample_counts(PSColony, function(OTU) 100* OTU/sum(OTU))

ps_colony_RA_glomO<- tax_glom(ps_colony_RA, 'Order')

ps_colony_RA_glomO_F = filter_taxa(ps_colony_RA_glomO , function(x) sum(x) > 1, TRUE)

mergemelt<- psmelt(ps_colony_RA_glomO_F) 
mergemelt$Order<-str_sub(mergemelt$Order, 5, str_length(mergemelt$Order))
mergemelt$Order[mergemelt$Abundance < 1] <- "z< 1% abund."

spatial_plot <- ggplot(data=mergemelt, aes(x=Name, y=Abundance, fill=Order)) + facet_grid(~ Year + Station, scales = "free")

spatial_plot + geom_bar(aes(), stat="identity", position="stack") + 
  theme_classic() + theme(legend.position="bottom") + scale_fill_manual(values =c( rep(colors30, 2))) + guides(fill=guide_legend(nrow=5)) + theme(legend.title =element_blank())

ggsave(“antarctica_colonies.pdf”, width = 5, height =5)

Figure 2B

Ordinations

PSColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol")  &  Name != "A30")

OTU4clr<- data.frame(t(data.frame(otu_table(PSColony))))
row.names(OTU4clr) <- str_remove(row.names(OTU4clr), "X")
row.names(OTU4clr) <- gsub("\\.", "-", row.names(OTU4clr))
OTUs.clr <- codaSeq.clr(OTU4clr + 0.5, samples.by.row=TRUE)
OTU2 <- otu_table(as.matrix(OTUs.clr), taxa_are_rows = FALSE)

psCLR <- phyloseq(OTU2,TAX,META)

ordu = ordinate(psCLR, "PCoA", "euclidean")

p<-plot_ordination(psCLR, ordu)+theme_bw()  +  theme(text = element_text(size=14)) +  geom_hline(yintercept = 0, linetype = "dashed", color = "lightgrey") +  geom_vline(xintercept = 0, linetype = "dashed", color = "lightgrey") + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) +geom_point(aes(fill=Year, shape = Station), size =3)  + scale_shape_manual(values= c(21,24)) +scale_fill_manual(values=colors[-1]) +guides(fill = guide_legend(override.aes=list(shape=21)))
p

ggsave(“ant_col_PCoA.pdf”, width = 5, height =3)

Supplemental Figure 4

PERMANOVA by year (field-collected v. cultured colony microbiomes)

OTUs<- data.frame(otu_table(psCLR))
meta4year <- meta[row.names(meta) %in% row.names(OTUs),]


meta4year$Sample.Type <- factor(meta4year$Year , levels = c("2021", "2023"))

set.seed(1)
adonis2(vegdist(OTUs, method = "euclidean") ~ Sample.Type, data = meta4year)

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = vegdist(OTUs, method = "euclidean") ~ Sample.Type, data = meta4year)
##             Df SumOfSqs      R2      F Pr(>F)    
## Sample.Type  1   3714.3 0.53776 44.209  0.001 ***
## Residual    38   3192.6 0.46224                  
## Total       39   6906.9 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Field-collected and cultured colony microbiomes are significantly different

PCoA of just field-collected colonies to check for differences between stations:

AntColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol")  &  Name != "A30" & Year == "2021")
OTU4clr<- data.frame(t(data.frame(otu_table(AntColony))))
row.names(OTU4clr) <- str_remove(row.names(OTU4clr), "X")
row.names(OTU4clr) <- gsub("\\.", "-", row.names(OTU4clr))
OTUs.clr <- codaSeq.clr(OTU4clr + 0.5, samples.by.row=TRUE)
OTU2 <- otu_table(as.matrix(OTUs.clr), taxa_are_rows = FALSE)

psCLR <- phyloseq(OTU2,TAX,META)

ordu = ordinate(psCLR, "PCoA", "euclidean")

p<-plot_ordination(psCLR, ordu)+theme_bw()  +  theme(text = element_text(size=14)) +  geom_hline(yintercept = 0, linetype = "dashed", color = "lightgrey") +  geom_vline(xintercept = 0, linetype = "dashed", color = "lightgrey") + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) +geom_point(aes(fill=Station, shape = Dataset), size =3)  + scale_shape_manual(values= c(21,21)) +scale_fill_manual(values=colors[-1]) +guides(fill = guide_legend(override.aes=list(shape=21)))
p

PERMANOVA by station :

OTUs<- data.frame(otu_table(psCLR))
meta4stat <- meta[row.names(meta) %in% row.names(OTUs),]


meta4stat$Sample.Type <- factor(meta4stat$Station , levels = c("400", "600"))

set.seed(1)
adonis2(vegdist(OTUs, method = "euclidean") ~ Sample.Type, data = meta4stat)

## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = vegdist(OTUs, method = "euclidean") ~ Sample.Type, data = meta4stat)
##             Df SumOfSqs      R2      F Pr(>F)  
## Sample.Type  1   158.22 0.05458 1.3855  0.058 .
## Residual    24  2740.78 0.94542                
## Total       25  2899.00 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Stations are not significantly different

Heat plots of ASVs by order

Prepare data frame:

AntColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") & Region == "Antarctica" &  Name != "A30")
nchF = filter_taxa(AntColony, function(x) sum(x) > 1, TRUE)
nchfffra<- transform_sample_counts(nchF, function(OTU) 100* OTU/sum(OTU))
a_ra <- psmelt(nchfffra)

Alteromonadales

altero<- subset_taxa(nchfffra, Order == " o__Alteromonadales")

sw_ra <- psmelt(altero)

sw_long<- sw_ra %>% dplyr::select(OTU, Name, Order, Family, Genus, Abundance, Station)

sw_long<- sw_long[rev(order(sw_long$Genus)),]

factord<- unique(sw_long$OTU)

sw_long$OTU <- factor(sw_long$OTU, levels = factord)

sw_long$Genus<-str_sub(sw_long$Genus, 5, str_length(sw_long$Genus))

heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
    geom_tile(aes( fill=Abundance)) +
    theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")  + 
    scale_size(name = "Relative\nabundance")+ facet_grid(~Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$Genus) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c()


heat_plot

Supplemental Figure 5

Oceanospirillales

oceano<- subset_taxa(nchfffra, Order == " o__Oceanospirillales")

sw_ra <- psmelt(oceano)

sw_long<- sw_ra %>% dplyr::select(OTU, Name, Order, Family, Genus, Abundance, Station, Year)

sw_long<- sw_long[rev(order(sw_long$Genus)),]

factord<- unique(sw_long$OTU)

sw_long$OTU <- factor(sw_long$OTU, levels = factord)

sw_long$Genus<-str_sub(sw_long$Genus, 5, str_length(sw_long$Genus))

heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
    geom_tile(aes( fill=Abundance)) +
    theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")  + 
    scale_size(name = "Relative\nabundance")+ facet_grid(~Year+Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$OTU) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c()


heat_plot

heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
    geom_tile(aes( fill=Abundance)) +
    theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")  + 
    scale_size(name = "Relative\nabundance")+ facet_grid(~Year+Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$Genus) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c()


heat_plot

ggsave(“oceano_heatplot.pdf”, width = 7, height = 5)

Supplemental FIgure 8

Rhodobacterales

rhodo<- subset_taxa(nchfffra, Order == " o__Rhodobacterales")

sw_ra <- psmelt(rhodo)

sw_long<- sw_ra %>% dplyr::select(OTU, Name, Order, Family, Genus, Abundance, Station, Year)

sw_long<- sw_long[rev(order(sw_long$Genus)),]

factord<- unique(sw_long$OTU)

sw_long$OTU <- factor(sw_long$OTU, levels = factord)

sw_long$Genus<-str_sub(sw_long$Genus, 5, str_length(sw_long$Genus))

heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
    geom_tile(aes( fill=Abundance)) +
    theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")  + 
    scale_size(name = "Relative\nabundance")+ facet_grid(~Year+Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$Genus) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c()


heat_plot

ggsave(“Rhodo_heatplot.pdf”, width = 7, height = 5) Supplemental Figure 7

sulfito <- sw_long %>% filter(Year == "2023") %>% filter(OTU == "8ee2658ac7dd8e0f20d396c3d3e1a539")

min(sulfito$Abundance)

## [1] 12.24997

mean(sulfito$Abundance)

## [1] 22.04735

max(sulfito$Abundance)

## [1] 30.79332

^ summary stats for sulfitobacter rel abunandance in cultured-colony microbiomes

Cellvibrionales

cellv<- subset_taxa(nchfffra, Order == " o__Cellvibrionales")

sw_ra <- psmelt(cellv)

sw_long<- sw_ra %>% dplyr::select(OTU, Name, Order, Family, Genus, Abundance, Station, Year)

sw_long<- sw_long[rev(order(sw_long$Genus)),]

factord<- unique(sw_long$OTU)

sw_long$OTU <- factor(sw_long$OTU, levels = factord)

sw_long$Genus<-str_sub(sw_long$Genus, 5, str_length(sw_long$Genus))

heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
    geom_tile(aes( fill=Abundance)) +
    theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")  + 
    scale_size(name = "Relative\nabundance")+ facet_grid(~Year+Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$Genus) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c()


heat_plot

heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
    geom_tile(aes( fill=Abundance)) +
    theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")  + 
    scale_size(name = "Relative\nabundance")+ facet_grid(~Year+Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$OTU) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c()


heat_plot

bc13a8f6afa703399c40d5879cc68221 = Sar92 clade ASV <- get summary stats

sw_long %>%  filter(OTU == "bc13a8f6afa703399c40d5879cc68221") %>%  summarize(mean = mean(Abundance), min = min(Abundance), max = max(Abundance))

##      mean min      max
## 1 2.78122   0 18.11352

Caulobacterialaes

caulo<- subset_taxa(nchfffra, Order == " o__Caulobacterales")

sw_ra <- psmelt(caulo)

sw_long<- sw_ra %>% dplyr::select(OTU, Name, Order, Family, Genus, Abundance, Station, Year)

sw_long<- sw_long[rev(order(sw_long$Genus)),]

factord<- unique(sw_long$OTU)

sw_long$OTU <- factor(sw_long$OTU, levels = factord)

sw_long$Genus<-str_sub(sw_long$Genus, 5, str_length(sw_long$Genus))

heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
    geom_tile(aes( fill=Abundance)) +
    theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")  + 
    scale_size(name = "Relative\nabundance")+ facet_grid(~Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$Genus) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c() +facet_grid(~Year+Station, scales = "free_x", space = "free")


heat_plot

algimonas <- sw_long %>% filter(Year =="2023") %>% filter(OTU %in% c("e7f391400f0a3e23b1c732d60e33016c", "715318dadbd602bb7863947527c67e3e")) %>% group_by(Name) %>% summarize(total = sum(Abundance))

min(algimonas$total)

## [1] 6.431744

mean(algimonas$total)

## [1] 18.20647

max(algimonas$total)

## [1] 29.7241

sd(algimonas$total)

## [1] 6.349521

^ summary stats for algimonas in cultured-colony microbiomes

Sphingomonadales

sphingo<- subset_taxa(nchfffra, Order == " o__Sphingomonadales")

sw_ra <- psmelt(sphingo)

sw_long<- sw_ra %>% dplyr::select(OTU, Name, Order, Family, Genus, Abundance, Station, Year)

sw_long<- sw_long[rev(order(sw_long$Genus)),]

factord<- unique(sw_long$OTU)

sw_long$OTU <- factor(sw_long$OTU, levels = factord)

sw_long$Genus<-str_sub(sw_long$Genus, 5, str_length(sw_long$Genus))

heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
    geom_tile(aes( fill=Abundance)) +
    theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")  + 
    scale_size(name = "Relative\nabundance")+ facet_grid(~Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$Genus) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c() +facet_grid(~Year+Station, scales = "free_x", space = "free")


heat_plot

heat_plot <- ggplot(sw_long[sw_long$Abundance >0,] ,aes(Name, OTU)) + theme_classic()+
    geom_tile(aes( fill=Abundance)) +
    theme( axis.text.x=element_text(angle=90,hjust=1,vjust=0),
          panel.background = element_blank()) +
    ylab("") +
    xlab("")  + 
    scale_size(name = "Relative\nabundance")+ facet_grid(~Station, scales = "free_x", space = "free") + scale_y_discrete(breaks = sw_long$OTU, labels = sw_long$OTU) + theme(panel.spacing.x=unit(0.05, "lines")) +scale_fill_viridis_c() +facet_grid(~Year+Station, scales = "free_x", space = "free")


heat_plot

Blast results for the the Sphingomonas ASV abundant in field-collected colony microbiomes:

f1ea98d49baf3dd5dd4b2456017e5d48 GCAGCAGTGGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCAATGCCGCGTGAGTGATGAAGGCCTTAGGGTTGTAAAGCTCTTTTACCAGGGATGATAATGACAGTACCTGGAGAATAAGCTCCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGAGCTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGCTTTTCAAGTCAGGGGTGAAATCCCGGGGCTCAACCCCGGAACTGCCCTTGAAACTGGATGGCTAGAATACTGGAGAGGTGAGTGGAATTCCGAGTGTAGAGGTGAAATTCGTAGATATTCGGAAGAACACCAGTGGCGAAGGCGACTCACTGGACAGTTATTGACGCTGAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATAC

Erythrobacter - previously found in marine sediment

Antarctica environmental mapping

Chloroplasts geoplot

OTU4chloro<- (data.frame(otu_table(ps_chloro)))
names(OTU4chloro) <- gsub("\\.", "-", names(OTU4chloro))

PRtaxdf <- PRtaxonomy %>% filter(row.names(PRtaxonomy) %in% row.names(OTU4chloro))

PRtaxmat <- as.matrix(PRtaxdf)
PRTAX = tax_table(PRtaxmat)

OTU2 <- otu_table(as.matrix(OTU4chloro), taxa_are_rows = TRUE)

ps_chlorotax  <- phyloseq(OTU2,PRTAX,META)

ps_chloro_RA<- transform_sample_counts(ps_chlorotax, function(OTU) 100* OTU/sum(OTU))

ps_chloro_RA_glomO<- tax_glom(ps_chloro_RA, 'Order')
pouch_col_chloro <- subset_samples(ps_chloro_RA_glomO, Dataset %in% c("Antarctica") & Year == "2021")
nchF = filter_taxa(pouch_col_chloro, function(x) sum(x) > 1, TRUE)

a_ra <- psmelt(nchF)

a4long<- a_ra %>%  dplyr::select(OTU, Sample, Name, Order, Class, Abundance, Station, Year)

a4long<- a4long[rev(order(a4long$Order)),]

factord<- unique(a4long$OTU)

a4long$OTU <- factor(a4long$OTU, levels = factord)

extraction_log<- read.csv("antRNA_extractions .csv")

FTextractions <- extraction_log %>% filter(SampleType =="FT")
CTDextractions <- extraction_log %>% filter(SampleType =="CTD")

ftlog <- read.csv("ftlog.csv")
ftlog$Name<- paste0("FT-", ftlog$Sample)
ctdlog <- read.csv("ctdlog.csv") %>% filter(Depth == 0)
ctdlog$Name <- paste0("CTD-", ctdlog$Sample_ID)

ftlog_notprocessed<- ftlog %>% select(Name, start_lat_degdec, start_long_degdec) %>% filter(Name %ni% a4long$Name)

names(ftlog_notprocessed)<- c("Name", "Lat", "Long")

ftlog_vols <- ftlog %>% select(Name, start_lat_degdec, start_long_degdec, vol_start, vol_end) %>% filter(Name %in% a4long$Name)

ftlog_vols$vol <- ftlog_vols$vol_end - ftlog_vols$vol_start

min(ftlog_vols$vol)

## [1] 5.3

mean(ftlog_vols$vol)

## [1] 11.13333

max(ftlog_vols$vol)

## [1] 18

ftlog<- ftlog %>% select(Name, start_lat_degdec, start_long_degdec) %>% filter(Name %in% a4long$Name)


names(ftlog)<- c("Name", "Lat", "Long")


ctdlog<- ctdlog %>% select(Name, Lat, Long) %>% filter(Name %in% a4long$Name)

sample_locs <- rbind(ftlog, ctdlog) 

a4long_locs<- left_join(a4long, sample_locs, by = "Name")

wider_a4long_locs<- a4long_locs %>% select(Name, Order, Abundance, Lat, Long) %>% pivot_wider( names_from = c("Order"), values_from = c("Abundance"))

^ volumes filtered per flow-through/underway sample

library(ggspatial)
library(rnaturalearth)
library(rnaturalearthdata)

## 
## Attaching package: 'rnaturalearthdata'

## The following object is masked from 'package:rnaturalearth':
## 
##     countries110

library(sf)

## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE

library(scatterpie)

scatter_colors <- c("#ef3c23",  "#cfe5cc", "#fac92c" ,"darkgrey", "#215F38", "#0181BB")

world <- ne_countries(scale = "medium", returnclass = "sf")

strainlist <- unique(a4long_locs$Order)
rna <- read.csv("ctdlog.csv")

FT_scatterpie_plot<-ggplot(data = world) + geom_sf(lwd = 0) + theme_void() + geom_scatterpie(data=wider_a4long_locs %>% filter(str_detect(Name, "FT")),aes(x=Long, y=Lat, group = Name, r=0.2), cols = strainlist, color=NA, alpha=.8 ) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + xlab("")+ ylab("")+ggtitle("Underway")  + 
  scale_fill_manual(values = scatter_colors) + 
  coord_sf(xlim = c(-70, -65), ylim = c(-66, -62), expand = TRUE)+ theme(legend.position = "bottom") + 
  geom_point(data = ftlog_notprocessed, aes (x= Long, y= Lat), color = "red", size = 0.1) +
  geom_point(data = (rna %>% filter(Study == "Grid")), aes (x= Long, y= Lat), color = "black", size = 0.2)

CTD_scatterpie_plot<-ggplot(data = world) + geom_sf(lwd = 0) + theme_void() + geom_scatterpie(data=wider_a4long_locs %>% filter(str_detect(Name, "CTD")),aes(x=Long, y=Lat, group = Name, r=0.5), cols = strainlist, color=NA, alpha=.8 ) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + xlab("")+ ylab("")+ggtitle("CTD/Niskin")  + scale_fill_manual(values = scatter_colors) + coord_sf(xlim = c(-73, -56), ylim = c(-68, -60), expand = TRUE) + theme(legend.position = "none") + geom_point(data = (rna %>% filter(Study == "Grid")), aes (x= Long, y= Lat), color = "black", size = 0.1)

  CTD_scatterpie_plot + FT_scatterpie_plot +plot_layout(guides = "collect") &
  theme(legend.position='bottom')

Figure 3 A-B

Bacteria geoplot scatterpie

PSColony<- subset_samples(ps_nochloro, Dataset %in% c("Antarctica") &  Name != "A30" & Year !=2023)

ps_colony_RA<- transform_sample_counts(PSColony, function(OTU) 100* OTU/sum(OTU))

ps_colony_RA_glomO<- tax_glom(ps_colony_RA, 'Order')

ps_colony_RA_glomO_F = filter_taxa(ps_colony_RA_glomO , function(x) sum(x) > 1, TRUE)

mergemelt<- psmelt(ps_colony_RA_glomO_F) 
mergemelt$Order<-str_sub(mergemelt$Order, 5, str_length(mergemelt$Order))
mergemelt$Order[mergemelt$Abundance < 1] <- "z< 1% abund."

bact_a4long_locs<- left_join(mergemelt, sample_locs, by = "Name")

bact_wider_a4long_locs<- bact_a4long_locs %>% select(Name, Order, Abundance, Lat, Long) %>% group_by(Name, Order, Lat, Long) %>% summarize(Abundance = sum(Abundance)) %>% ungroup() %>%  pivot_wider( names_from = c("Order"), values_from = c("Abundance"), values_fill = 0)

scatter_colors <- c(colors30[1:3], "#F95700", colors30[7:8],  colors30[12], colors30[15:16],  colors30[21])

strainlist <- c("Flavobacteriales", "Oceanospirillales" ,"Alteromonadales"  , "SAR11_clade", "Thiomicrospirales", "Cellvibrionales"  , "Rhodobacterales"  , "SAR86_clade","Burkholderiales",  "Chitinophagales" )    

bact_wider_a4long_locs<- data.frame(bact_wider_a4long_locs)

FT_scatterpie_plot<-ggplot(data = world) + geom_sf(lwd = 0) + theme_void() + geom_scatterpie(data=bact_wider_a4long_locs %>% filter(str_detect(Name, "FT")),aes(x=Long, y=Lat, group = Name, r=0.2), cols = strainlist, color=NA, alpha=.8 ) + 
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + xlab("")+ ylab("")+ggtitle("Flow-through")  + 
  scale_fill_manual(values = scatter_colors) + 
  coord_sf(xlim = c(-70, -65), ylim = c(-66, -62), expand = TRUE) + theme(legend.position = "bottom") + 
  geom_point(data = (rna %>% filter(Study == "Grid")), aes (x= Long, y= Lat), color = "black", size = 0.2)

CTD_scatterpie_plot<-ggplot(data = world) + geom_sf(lwd = 0) + theme_void() + geom_scatterpie(data=bact_wider_a4long_locs %>% filter(str_detect(Name, "CTD")),aes(x=Long, y=Lat, group = Name, r=0.5), cols = strainlist, color=NA, alpha=.8 ) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + xlab("")+ ylab("")+ggtitle("CTD/Niskin")  + scale_fill_manual(values = scatter_colors) + coord_sf(xlim = c(-73, -56), ylim = c(-68, -60), expand = TRUE) + theme(legend.position = "none") + geom_point(data = (rna %>% filter(Study == "Grid")), aes (x= Long, y= Lat), color = "black", size = 0.1)

  CTD_scatterpie_plot + FT_scatterpie_plot + plot_layout(guides = "collect") &
  theme(legend.position='bottom')

Figure 3 C-D

Correlations

Alteromonadales

PSColony4corr<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") &  Name != "A30" & Year !=2023)

ps_colony_RA4corr<- transform_sample_counts(PSColony4corr, function(OTU) 100* OTU/sum(OTU))

ps_colony_RA4corr <- filter_taxa(ps_colony_RA4corr, function(x) sum(x) > 1, TRUE)


melted_Colony4Corr <- psmelt(ps_colony_RA4corr) 

colAlteromonas4orr <- melted_Colony4Corr %>% filter(Order == " o__Alteromonadales")

AlteromonasASVs <- unique(colAlteromonas4orr$OTU)



Ant4corr<- subset_samples(ps_nochloro, Dataset %in% c("Antarctica") &  Name != "A30" & Year !=2023)

Ant4corr<- transform_sample_counts(Ant4corr, function(OTU) 100* OTU/sum(OTU))

melted_Ant4corrr <- psmelt(Ant4corr) 

altero_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% AlteromonasASVs)
#pivot wider
wider_altero<- altero_Ant4corrr %>% select(Name, OTU, Abundance) %>% pivot_wider( names_from = c("OTU"), values_from = c("Abundance"))

#join with Phaeo abundance
longer_altero<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(wider_altero) %>% pivot_longer(!c(Name, " Prymnesiophyceae"), names_to = "bact_ASV", values_to = "rel_abund")

## Joining with `by = join_by(Name)`

names(longer_altero)[2] <- "Prymnesiophyceae"

Correlation between all ALteromonadales ASVs in both colony and environmental samples to phaeocystis ASVs in environmental samples:

longer_altero %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>%  ggplot(aes(x=Prymnesiophyceae, y=rel_abund, color = source)) + geom_point() +facet_wrap(~bact_ASV)

The most abundant colony-associated ASV:

longer_altero %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>% filter(bact_ASV =="6c1096cb4d53196f3b70850af15c7b35") %>%  ggplot(aes(x=Prymnesiophyceae, y=rel_abund, color = source)) + geom_point() +facet_wrap(~bact_ASV)

####Paraglaciecola Sum up all paraglaciecola ASVs and plot:

altero_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% AlteromonasASVs)


summed_genera <- altero_Ant4corrr %>%  group_by(Name, Genus) %>% summarize(total_abund = sum(Abundance)) %>% filter( Genus == " g__Paraglaciecola")

## `summarise()` has grouped output by 'Name'. You can override using the
## `.groups` argument.

#join with Phaeo abundance
longer_altero<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(summed_genera)

## Joining with `by = join_by(Name)`

names(longer_altero)[2] <- "Prymnesiophyceae"

Paraglaciecola <-longer_altero %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>%  ggplot(aes(x=Prymnesiophyceae, y=total_abund, fill = source, shape = source)) + geom_point(size = 3)  + scale_shape_manual(values = c(21, 24)) +theme_test() + ggtitle("Paraglaciecola sp.") + geom_smooth(method = "lm", se = FALSE, aes(color = source)) + scale_fill_manual(values = c("lightgrey", "black" )) + scale_color_manual(values = c("darkgrey", "black" ))


Paraglaciecola

## `geom_smooth()` using formula = 'y ~ x'

CTD lm and significance:

data = longer_altero %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "CTD")

summary(lm(formula = total_abund ~ Prymnesiophyceae, data = data ))

## 
## Call:
## lm(formula = total_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##        1        2        3        4        5 
##  0.09972 -0.27477  1.87663 -2.75799  1.05640 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -1.18753    1.69346  -0.701    0.534  
## Prymnesiophyceae  0.14951    0.04697   3.183    0.050 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.027 on 3 degrees of freedom
## Multiple R-squared:  0.7716, Adjusted R-squared:  0.6954 
## F-statistic: 10.13 on 1 and 3 DF,  p-value: 0.04997

Flowthrough lm and significance:

data = longer_altero %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "FT")

summary(lm(formula = total_abund ~ Prymnesiophyceae, data = data ))

## 
## Call:
## lm(formula = total_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.149 -3.813 -0.034  1.679  8.334 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       -6.3266     4.1028  -1.542   0.1541   
## Prymnesiophyceae   0.3645     0.1091   3.339   0.0075 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.098 on 10 degrees of freedom
## Multiple R-squared:  0.5272, Adjusted R-squared:  0.4799 
## F-statistic: 11.15 on 1 and 10 DF,  p-value: 0.007501

Colwellia

Sum up Colwellia ASVs and plot:

altero_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% AlteromonasASVs)


summed_genera <- altero_Ant4corrr %>%  group_by(Name, Genus) %>% summarize(total_abund = sum(Abundance)) %>% filter( Genus == " g__Colwellia")

## `summarise()` has grouped output by 'Name'. You can override using the
## `.groups` argument.

#join with Phaeo abundance
longer_altero<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(summed_genera)

## Joining with `by = join_by(Name)`

names(longer_altero)[2] <- "Prymnesiophyceae"

Colwellia <- longer_altero %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>%  ggplot(aes(x=Prymnesiophyceae, y=total_abund, fill = source, shape = source)) + geom_point(size = 3)  + scale_shape_manual(values = c(21, 24)) +theme_test() + ggtitle("Colwellia sp.") + scale_fill_manual(values = c("lightgrey", "black" )) + scale_color_manual(values = c("darkgrey", "black" ))

Colwellia

FT-10 53.369710 g__Colwellia 7.29 % (end of 600 line) FT-18 16.081232 g__Colwellia 13.48 % (landward end of flowthroughs on 400 line) FT-7 16.412716 g__Colwellia 19.24 % (landward end of flowthroughs on 600 line)

REMEMBER - field is RNA = activity - Colwellia is more ACTIVE at lower phaeo sites?

Visualize %Paraglaciecola/Colwellia in each colony - Plot rel abund of all alteronmonadales genera for field colonies:

PSColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") &  Name != "A30" &Year == 2021)

ps_colony_RA<- transform_sample_counts(PSColony, function(OTU) 100* OTU/sum(OTU))

ps_colony_RA <- subset_taxa(ps_colony_RA, Order == " o__Alteromonadales")

ps_colony_RA_glomG<- tax_glom(ps_colony_RA, 'Genus')

ps_colony_RA_glomG_F = filter_taxa(ps_colony_RA_glomG , function(x) sum(x) > 1, TRUE)

mergemelt<- psmelt(ps_colony_RA_glomG_F) 
mergemelt$Genus<-str_sub(mergemelt$Genus, 5, str_length(mergemelt$Genus))
#mergemelt$Genus[mergemelt$Abundance < 1] <- "z< 1% abund."

spatial_plot <- ggplot(data=mergemelt, aes(x=Name, y=Abundance, fill=Genus)) + facet_grid(~ Station, scales = "free")

spatial_plot + geom_bar(aes(), stat="identity", position="stack") + 
  theme_classic() + theme(legend.position="bottom") + scale_fill_manual(values =c( "#262626", "#d0cef3","#e99eb2", "#ffc100", "#fafff1" )) + guides(fill=guide_legend(nrow=1)) + theme(legend.title =element_blank())

ggsave(“Alteromonas_barplot.pdf”, width = 7, height = 4) Supplemental Figure 6

Oceanospirillales

PSColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") &  Name != "A30" &Year == 2021)

ps_colony_RA<- transform_sample_counts(PSColony, function(OTU) 100* OTU/sum(OTU))

ps_colony_RA <- subset_taxa(ps_colony_RA, Order == " o__Oceanospirillales")

ps_colony_RA_glomF<- tax_glom(ps_colony_RA, 'Family')

ps_colony_RA_glomG_F = filter_taxa(ps_colony_RA_glomF , function(x) sum(x) > 1, TRUE)

mergemelt<- psmelt(ps_colony_RA_glomG_F) 
mergemelt$Family<-str_sub(mergemelt$Family, 5, str_length(mergemelt$Family))
#mergemelt$Genus[mergemelt$Abundance < 1] <- "z< 1% abund."

spatial_plot <- ggplot(data=mergemelt, aes(x=Name, y=Abundance, fill=Family)) + facet_grid(~ Station, scales = "free")

spatial_plot + geom_bar(aes(), stat="identity", position="stack") + 
  theme_classic() + theme(legend.position="bottom") + scale_fill_manual(values =c( "#262626", "#d0cef3","#e99eb2", "#ffc100", "#fafff1" )) + guides(fill=guide_legend(nrow=1)) + theme(legend.title =element_blank())

ggsave(“Oceano_barplot.pdf”, width = 7, height = 4) Supplemental Figure 9A

PSColony<- subset_samples(ps_nochloro, Dataset %in% c("PantCol") &  Name != "A30" &Year == 2021)

ps_colony_RA<- transform_sample_counts(PSColony, function(OTU) 100* OTU/sum(OTU))

ps_colony_RA <- subset_taxa(ps_colony_RA, Order == " o__Oceanospirillales")

#ps_colony_RA_glomF<- tax_glom(ps_colony_RA, 'Family')

ps_colony_RA_F = filter_taxa(ps_colony_RA , function(x) sum(x) > 1, TRUE)

mergemelt<- psmelt(ps_colony_RA_F) 

mergemelt$Family<-str_sub(mergemelt$Family, 5, str_length(mergemelt$Family))
#mergemelt$Genus[mergemelt$Abundance < 1] <- "z< 1% abund."

mergemelt <- mergemelt %>% filter(OTU %in% c("90fd832464c5fbc417533484304f566f", "3f64882f4221742c1cfa89d89cfe7975"))

spatial_plot <- ggplot(data=mergemelt, aes(x=Name, y=Abundance, fill=OTU)) + facet_grid(~ Station, scales = "free")

spatial_plot + geom_bar(aes(), stat="identity", position="stack") + 
  theme_classic() + theme(legend.position="bottom") + scale_fill_manual(values =c( "hotpink" ,"pink")) + guides(fill=guide_legend(nrow=1)) + theme(legend.title =element_blank())

ggsave(“Oceano_barplot_ASVs.pdf”, width = 7, height = 4) Supplemental Figure 9B

col4corr_oceano <- melted_Colony4Corr %>% filter(Order == " o__Oceanospirillales")

oceanoASVs <- unique(col4corr_oceano$OTU)


oceano_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% oceanoASVs)
#pivot wider
wider_oceano<- oceano_Ant4corrr%>% select(Name, OTU, Abundance) %>% pivot_wider( names_from = c("OTU"), values_from = c("Abundance"))

#join with Phaeo abundance
longer<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(wider_oceano) %>% pivot_longer(!c(Name, " Prymnesiophyceae"), names_to = "bact_ASV", values_to = "rel_abund")

## Joining with `by = join_by(Name)`

names(longer)[2] <- "Prymnesiophyceae"


longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>%  ggplot(aes(x=Prymnesiophyceae, y=rel_abund, color = source)) + geom_point() +facet_wrap(~bact_ASV)

mostly driven by 90fd … and d536 90fd832464c5fbc417533484304f566f d536522dff81586b788ee4acdbadb881

The colony-associated Oceanospirillales was dominated by two ASVs 90fd832464c5fbc417533484304f566f (uncultured) & 3f64882f4221742c1cfa89d89cfe7975 (Profundimonas sp., family: Nitrincolaceae)

Environmentally abundant = 90fd832464c5fbc417533484304f566f (uncultured, family: Nitrincolaceae) - most abundant by a long shot (~5-25% of enviro samples) &

d536522dff81586b788ee4acdbadb881 (0-5.5ish of enviro samples) (uncultured, family: Nitrincolaceae)

col4corr_oceano <- melted_Colony4Corr %>% filter(Order == " o__Oceanospirillales") %>%  filter(OTU  %in% c("90fd832464c5fbc417533484304f566f","3f64882f4221742c1cfa89d89cfe7975","d536522dff81586b788ee4acdbadb881" ) )

oceanoASVs <- unique(col4corr_oceano$OTU)


oceano_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% oceanoASVs)
#pivot wider
wider_oceano<- oceano_Ant4corrr%>% select(Name, OTU, Abundance) %>% pivot_wider( names_from = c("OTU"), values_from = c("Abundance"))

#join with Phaeo abundance
longer<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(wider_oceano) %>% pivot_longer(!c(Name, " Prymnesiophyceae"), names_to = "bact_ASV", values_to = "rel_abund")

## Joining with `by = join_by(Name)`

names(longer)[2] <- "Prymnesiophyceae"


longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>%  ggplot(aes(x=Prymnesiophyceae, y=rel_abund, fill = source)) +facet_wrap(~bact_ASV) + geom_point(size = 3, shape = 21)  + theme_test() + ggtitle("Oceanospirillales") + geom_smooth(method = "lm", se = FALSE, aes(color = source)) + scale_fill_manual(values = c("#00A4CC", "#F95700" )) + scale_color_manual(values = c("#00A4CC", "#F95700" ))

## `geom_smooth()` using formula = 'y ~ x'

ggsave(“oceano_phaeo_correlations.pdf”, width =8.5, height = 3.5) Supplemental Figure 10

filter(OTU %in% c(“90fd832464c5fbc417533484304f566f”,“3f64882f4221742c1cfa89d89cfe7975”,“d536522dff81586b788ee4acdbadb881” ) )

CTD

data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "CTD") %>% filter(bact_ASV %in% c("3f64882f4221742c1cfa89d89cfe7975"))

summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))

## 
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##        1        2        3        4        5 
## -0.03678 -0.07714  0.27464 -0.06177 -0.09894 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -0.080436   0.149688  -0.537   0.6283  
## Prymnesiophyceae  0.016111   0.004152   3.881   0.0303 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1792 on 3 degrees of freedom
## Multiple R-squared:  0.8339, Adjusted R-squared:  0.7785 
## F-statistic: 15.06 on 1 and 3 DF,  p-value: 0.03031

data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "FT") %>% filter(bact_ASV %in% c("3f64882f4221742c1cfa89d89cfe7975"))

summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))

## 
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39245 -0.19911 -0.02003  0.12273  0.64741 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      -0.496254   0.225084  -2.205  0.05202 . 
## Prymnesiophyceae  0.022874   0.005988   3.820  0.00337 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2797 on 10 degrees of freedom
## Multiple R-squared:  0.5934, Adjusted R-squared:  0.5527 
## F-statistic: 14.59 on 1 and 10 DF,  p-value: 0.003374

CTD

data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "CTD") %>% filter(bact_ASV %in% c("90fd832464c5fbc417533484304f566f"))

summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))

## 
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##       1       2       3       4       5 
##  1.1352 -2.4046  4.2002 -3.0602  0.1293 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        7.3459     2.8160   2.609   0.0798 .
## Prymnesiophyceae   0.2891     0.0781   3.701   0.0343 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.371 on 3 degrees of freedom
## Multiple R-squared:  0.8203, Adjusted R-squared:  0.7605 
## F-statistic:  13.7 on 1 and 3 DF,  p-value: 0.03425

data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "FT") %>% filter(bact_ASV %in% c("90fd832464c5fbc417533484304f566f"))

summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))

## 
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.1933 -1.6592  0.6824  2.1373  3.4966 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       5.17810    2.48608   2.083   0.0639 .
## Prymnesiophyceae  0.19804    0.06614   2.994   0.0135 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.089 on 10 degrees of freedom
## Multiple R-squared:  0.4727, Adjusted R-squared:   0.42 
## F-statistic: 8.966 on 1 and 10 DF,  p-value: 0.01347

CTD

data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "CTD") %>% filter(bact_ASV %in% c("d536522dff81586b788ee4acdbadb881"))

summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))

## 
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##       1       2       3       4       5 
## -0.5064 -0.4827  0.6102  2.7111 -2.3323 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)      0.005792   1.781907   0.003    0.998
## Prymnesiophyceae 0.075820   0.049420   1.534    0.223
## 
## Residual standard error: 2.133 on 3 degrees of freedom
## Multiple R-squared:  0.4396, Adjusted R-squared:  0.2529 
## F-statistic: 2.354 on 1 and 3 DF,  p-value: 0.2225

data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "FT") %>% filter(bact_ASV %in% c("d536522dff81586b788ee4acdbadb881"))

summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))

## 
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6077 -0.3321 -0.1682  0.3708  2.4802 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -0.72098    0.85415  -0.844   0.4184  
## Prymnesiophyceae  0.06464    0.02272   2.845   0.0174 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.061 on 10 degrees of freedom
## Multiple R-squared:  0.4473, Adjusted R-squared:  0.392 
## F-statistic: 8.093 on 1 and 10 DF,  p-value: 0.0174

sum_up

col4corr_oceano <- melted_Colony4Corr %>% filter(Order == " o__Oceanospirillales")

oceanoASVs <- unique(col4corr_oceano$OTU)


oceano_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% oceanoASVs) %>% group_by(Name) %>% summarize(total_abund = sum(Abundance))
#pivot wider
#wider_oceano<- oceano_Ant4corrr%>% select(Name, OTU, Abundance) %>% pivot_wider( names_from = c("OTU"), values_from = c("Abundance"))

#join with Phaeo abundance
longer<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(oceano_Ant4corrr) %>% pivot_longer(!c(Name, " Prymnesiophyceae"), names_to = "bact_ASV", values_to = "rel_abund")

## Joining with `by = join_by(Name)`

names(longer)[2] <- "Prymnesiophyceae"


Oceanospirillales<- longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>%  ggplot(aes(x=Prymnesiophyceae, y=rel_abund, fill = source, shape = source)) + geom_point(size = 3)  + scale_shape_manual(values = c(21, 24)) +theme_test() + ggtitle("Oceanospirillales") + geom_smooth(method = "lm", se = FALSE, aes(color = source)) + scale_fill_manual(values = c("lightgrey", "black" )) + scale_color_manual(values = c("darkgrey", "black" ))

Oceanospirillales

## `geom_smooth()` using formula = 'y ~ x'

CTD

data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "CTD")

summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))

## 
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##       1       2       3       4       5 
##  0.6289 -2.9977  5.0767 -0.4163 -2.2916 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       7.30982    3.07224   2.379   0.0977 .
## Prymnesiophyceae  0.38046    0.08521   4.465   0.0209 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.678 on 3 degrees of freedom
## Multiple R-squared:  0.8692, Adjusted R-squared:  0.8256 
## F-statistic: 19.94 on 1 and 3 DF,  p-value: 0.02092

data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "FT")

summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))

## 
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.5743 -1.1521  0.0937  0.6093  5.1538 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       9.84218    2.59413   3.794  0.00352 **
## Prymnesiophyceae  0.16548    0.06901   2.398  0.03746 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.223 on 10 degrees of freedom
## Multiple R-squared:  0.3651, Adjusted R-squared:  0.3016 
## F-statistic: 5.749 on 1 and 10 DF,  p-value: 0.03746

Paraglaciecola + Colwellia + Oceanospirillales + plot_layout(guides = "collect")

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

ggsave(“phaeo_bact_correlations.pdf”, width =8.5, height = 3.5)

Figure 3C

Sphingomonadales

col4corr_sphing <- melted_Colony4Corr %>% filter(Order == " o__Sphingomonadales")

sphingASVs <- unique(col4corr_sphing$OTU)


sphing_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% sphingASVs)
#pivot wider
wider_sphing<- sphing_Ant4corrr%>% select(Name, OTU, Abundance) %>% pivot_wider( names_from = c("OTU"), values_from = c("Abundance"))

#join with Phaeo abundance
longer<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(wider_sphing) %>% pivot_longer(!c(Name, " Prymnesiophyceae"), names_to = "bact_ASV", values_to = "rel_abund")

## Joining with `by = join_by(Name)`

names(longer)[2] <- "Prymnesiophyceae"


longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>%  ggplot(aes(x=Prymnesiophyceae, y=rel_abund, color = source)) + geom_point() +facet_wrap(~bact_ASV)

The colony-associated Sphingomonas ASVs are not detected in seawater samples…

PSColony<- subset_samples(ps_nochloro, Dataset %in% c("Antarctica") &  Name != "A30" & Year !=2023)

ps_colony_RA<- transform_sample_counts(PSColony, function(OTU) 100* OTU/sum(OTU))

ps_colony_RA_glomO<- tax_glom(ps_colony_RA, 'Order')



mergemelt<- psmelt(ps_colony_RA_glomO) 
mergemelt$Order<-str_sub(mergemelt$Order, 5, str_length(mergemelt$Order))

mergemelt <- mergemelt %>% filter(Order == "Sphingobacteriales")

sphingo_sums <- mergemelt %>% group_by(Sample, Name, Station) %>% summarize(total = sum(Abundance))  %>% filter(total > 0 )

## `summarise()` has grouped output by 'Sample', 'Name'. You can override using
## the `.groups` argument.

sphingo_sums

## # A tibble: 6 × 4
## # Groups:   Sample, Name [6]
##   Sample Name   Station       total
##   <chr>  <chr>  <chr>         <dbl>
## 1 Ant1   FT-10  600         0.00374
## 2 Ant12  FT-20  400-line    0.0383 
## 3 Ant17  CTD-26 600-line    0.00819
## 4 Ant2   FT-21  400-line    0.0160 
## 5 Ant6   FT-9   600-line    0.00795
## 6 Ant8   FT-13  600-500tran 0.0195

max(sphingo_sums$total)

## [1] 0.03833988

Sphingomonas as a group is not detected or extremely low abundance in all environmental samples…

Cellvibrionales (SAR92)

col4corr_oceano <- melted_Colony4Corr %>% filter(Order == " o__Cellvibrionales")

oceanoASVs <- unique(col4corr_oceano$OTU)


oceano_Ant4corrr <- melted_Ant4corrr %>% filter(OTU %in% oceanoASVs)
#pivot wider
wider_oceano<- oceano_Ant4corrr%>% select(Name, OTU, Abundance) %>% pivot_wider( names_from = c("OTU"), values_from = c("Abundance"))

#join with Phaeo abundance
longer<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(wider_oceano) %>% pivot_longer(!c(Name, " Prymnesiophyceae"), names_to = "bact_ASV", values_to = "rel_abund")

## Joining with `by = join_by(Name)`

names(longer)[2] <- "Prymnesiophyceae"


longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE) %>%  ggplot(aes(x=Prymnesiophyceae, y=rel_abund, color = source)) + geom_point() +facet_wrap(~bact_ASV)

bc13a8f6afa703399c40d5879cc68221

oceano_Ant4corrr <- melted_Ant4corrr %>% filter(OTU == "bc13a8f6afa703399c40d5879cc68221")
#pivot wider
wider_oceano<- oceano_Ant4corrr%>% select(Name, OTU, Abundance) %>% pivot_wider( names_from = c("OTU"), values_from = c("Abundance"))

#join with Phaeo abundance
longer<-wider_a4long_locs %>% select(Name, " Prymnesiophyceae") %>% left_join(wider_oceano) %>% pivot_longer(!c(Name, " Prymnesiophyceae"), names_to = "bact_ASV", values_to = "rel_abund")

## Joining with `by = join_by(Name)`

names(longer)[2] <- "Prymnesiophyceae"


cv<- longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>%  ggplot(aes(x=Prymnesiophyceae, y=rel_abund, fill = source)) + geom_point(size = 3, shape = 21)  + theme_test() + ggtitle("SAR92") + geom_smooth(method = "lm", se = FALSE, aes(color = source)) + scale_fill_manual(values = c("#00A4CC", "#F95700" )) + scale_color_manual(values = c("#00A4CC", "#F95700" ))

cv

## `geom_smooth()` using formula = 'y ~ x'

min(longer$rel_abund)

## [1] 0.6245234

max(longer$rel_abund)

## [1] 8.160423

mean(longer$rel_abund)

## [1] 3.495746

CTD

data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "CTD")

summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))

## 
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##       1       2       3       4       5 
## -1.5689  0.2756  1.2457  2.9162 -2.8686 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)       2.20253    2.20070   1.001    0.391
## Prymnesiophyceae  0.06870    0.06104   1.126    0.342
## 
## Residual standard error: 2.635 on 3 degrees of freedom
## Multiple R-squared:  0.2969, Adjusted R-squared:  0.06256 
## F-statistic: 1.267 on 1 and 3 DF,  p-value: 0.3423

data = longer %>% separate(Name, sep = "-", into = c("source", "num"), remove = FALSE)  %>% filter(source == "FT")

summary(lm(formula = rel_abund ~ Prymnesiophyceae, data = data ))

## 
## Call:
## lm(formula = rel_abund ~ Prymnesiophyceae, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.4769 -0.4509 -0.1104  0.3728  1.8167 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       0.61598    0.79262   0.777  0.45506   
## Prymnesiophyceae  0.07258    0.02109   3.442  0.00631 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9849 on 10 degrees of freedom
## Multiple R-squared:  0.5423, Adjusted R-squared:  0.4966 
## F-statistic: 11.85 on 1 and 10 DF,  p-value: 0.006307

Visualize Overall abundance of Cellvibrionales in environmental samples:

PSColony<- subset_samples(ps_nochloro, Dataset %in% c("Antarctica") &  Name != "A30" & Year !=2023)

ps_colony_RA<- transform_sample_counts(PSColony, function(OTU) 100* OTU/sum(OTU))

ps_colony_RA_glomO<- tax_glom(ps_colony_RA, 'Order')

ps_colony_RA_glomO_F = filter_taxa(ps_colony_RA_glomO , function(x) sum(x) > 1, TRUE)

mergemelt<- psmelt(ps_colony_RA_glomO_F) 
mergemelt$Order<-str_sub(mergemelt$Order, 5, str_length(mergemelt$Order))
mergemelt$Order[mergemelt$Abundance < 1] <- "z< 1% abund."

spatial_plot <- ggplot(data=mergemelt, aes(x=Sample, y=Abundance, fill=Order)) 

spatial_plot + geom_bar(aes(), stat="identity", position="stack") + 
  theme_classic() + theme(legend.position="bottom") + scale_fill_manual(values =c( rep(colors30, 2))) + guides(fill=guide_legend(nrow=5)) + theme(legend.title =element_blank())

Visualize overall abundance of SAR92 in environmental samples:

PSenviro<- subset_samples(ps_nochloro, Dataset %in% c("Antarctica") &  Name != "A30" & Year !=2023)

ps_enviro_RA<- transform_sample_counts(PSenviro, function(OTU) 100* OTU/sum(OTU))

ps_enviro_oceano <- subset_taxa(ps_enviro_RA, Order == " o__Cellvibrionales")

ps_enviro_RA_glomG<- tax_glom(ps_enviro_oceano , 'Genus')

ps_enviro_RA_glomG = filter_taxa(ps_enviro_RA_glomG , function(x) sum(x) > 0.1, TRUE)

cv_mergemelt<- psmelt(ps_enviro_RA_glomG) 
cv_mergemelt$Order<-str_sub(cv_mergemelt$Order, 5, str_length(cv_mergemelt$Order))
#mergemelt$Order[mergemelt$Abundance < 1] <- "z< 1% abund."

spatial_plot <- ggplot(data=cv_mergemelt, aes(x=Name, y=Abundance, fill=Genus)) 

spatial_plot + geom_bar(aes(), stat="identity", position="stack") + 
  theme_classic() + theme(legend.position="bottom") + scale_fill_manual(values =c( rep(colors30, 2))) + guides(fill=guide_legend(nrow=5)) + theme(legend.title =element_blank())

Almost all Cellvibrionales seqs in environmental samples are SAR92

Session Info:

sessionInfo()

## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.6.3
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] scatterpie_0.2.1        sf_1.0-15               rnaturalearthdata_1.0.0
##  [4] rnaturalearth_1.0.1     ggspatial_1.1.9         splitstackshape_1.4.8  
##  [7] biomformat_1.30.0       ape_5.7-1               reshape2_1.4.4         
## [10] scales_1.3.0            patchwork_1.2.0         stringr_1.5.1          
## [13] vegan_2.6-4             permute_0.9-7           magrittr_2.0.3         
## [16] viridis_0.6.5           viridisLite_0.4.2       gridExtra_2.3          
## [19] dplyr_1.1.4             tidyr_1.3.1             ggplot2_3.5.0          
## [22] phyloseq_1.46.0         CoDaSeq_0.99.7          car_3.1-2              
## [25] carData_3.0-5           ALDEx2_1.34.0           latticeExtra_0.6-30    
## [28] lattice_0.22-5          zCompositions_1.5.0-1   truncnorm_1.0-9        
## [31] NADA_1.6-1.1            survival_3.5-7          MASS_7.3-60.0.1        
## [34] qiime2R_0.99.6         
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3          rstudioapi_0.15.0          
##   [3] jsonlite_1.8.8              farver_2.1.1               
##   [5] rmarkdown_2.25              zlibbioc_1.48.0            
##   [7] vctrs_0.6.5                 multtest_2.58.0            
##   [9] RCurl_1.98-1.14             terra_1.7-71               
##  [11] base64enc_0.1-3             htmltools_0.5.7            
##  [13] S4Arrays_1.2.0              Rhdf5lib_1.24.2            
##  [15] SparseArray_1.2.4           Formula_1.2-5              
##  [17] rhdf5_2.46.1                sass_0.4.8                 
##  [19] KernSmooth_2.23-22          bslib_0.6.1                
##  [21] htmlwidgets_1.6.4           plyr_1.8.9                 
##  [23] cachem_1.0.8                igraph_2.0.1.1             
##  [25] lifecycle_1.0.4             iterators_1.0.14           
##  [27] pkgconfig_2.0.3             Matrix_1.6-5               
##  [29] R6_2.5.1                    fastmap_1.1.1              
##  [31] GenomeInfoDbData_1.2.11     MatrixGenerics_1.14.0      
##  [33] digest_0.6.34               colorspace_2.1-0           
##  [35] S4Vectors_0.40.2            Hmisc_5.1-1                
##  [37] GenomicRanges_1.54.1        labeling_0.4.3             
##  [39] RcppZiggurat_0.1.6          fansi_1.0.6                
##  [41] polyclip_1.10-6             httr_1.4.7                 
##  [43] abind_1.4-5                 mgcv_1.9-1                 
##  [45] compiler_4.3.2              proxy_0.4-27               
##  [47] withr_3.0.0                 htmlTable_2.4.2            
##  [49] backports_1.4.1             BiocParallel_1.36.0        
##  [51] DBI_1.2.0                   highr_0.10                 
##  [53] ggforce_0.4.1               DelayedArray_0.28.0        
##  [55] classInt_0.4-10             units_0.8-5                
##  [57] tools_4.3.2                 foreign_0.8-86             
##  [59] nnet_7.3-19                 glue_1.7.0                 
##  [61] quadprog_1.5-8              nlme_3.1-164               
##  [63] rhdf5filters_1.14.1         grid_4.3.2                 
##  [65] checkmate_2.3.1             gridBase_0.4-7             
##  [67] cluster_2.1.6               ade4_1.7-22                
##  [69] generics_0.1.3              gtable_0.3.4               
##  [71] class_7.3-22                data.table_1.15.0          
##  [73] utf8_1.2.4                  XVector_0.42.0             
##  [75] BiocGenerics_0.48.1         foreach_1.5.2              
##  [77] pillar_1.9.0                splines_4.3.2              
##  [79] tweenr_2.0.2                deldir_2.0-2               
##  [81] directlabels_2024.1.21      tidyselect_1.2.0           
##  [83] Biostrings_2.70.2           knitr_1.45                 
##  [85] IRanges_2.36.0              SummarizedExperiment_1.32.0
##  [87] stats4_4.3.2                xfun_0.42                  
##  [89] Biobase_2.62.0              matrixStats_1.2.0          
##  [91] DT_0.31                     stringi_1.8.3              
##  [93] ggfun_0.1.4                 yaml_2.3.8                 
##  [95] evaluate_0.23               codetools_0.2-19           
##  [97] interp_1.1-6                tibble_3.2.1               
##  [99] cli_3.6.2                   RcppParallel_5.1.7         
## [101] rpart_4.1.23                munsell_0.5.0              
## [103] jquerylib_0.1.4             Rcpp_1.0.12                
## [105] GenomeInfoDb_1.38.6         png_0.1-8                  
## [107] Rfast_2.1.0                 jpeg_0.1-10                
## [109] bitops_1.0-7                e1071_1.7-14               
## [111] purrr_1.0.2                 crayon_1.5.2               
## [113] rlang_1.1.3

Phaeosphere Analysis

Maggi Brisbin

5/16/2024

Setup

import packages and functions, define colors

import data

Chloroplast sequences

Bacterial Sequences

Rarefaction Curves

Alpha diversity

Beta Diversity

Bar Plots

Ordinations

Heat plots of ASVs by order

Alteromonadales

Oceanospirillales

Rhodobacterales

Cellvibrionales

Caulobacterialaes

Sphingomonadales

Antarctica environmental mapping

Chloroplasts geoplot

Bacteria geoplot scatterpie

Correlations

Alteromonadales

Colwellia

Oceanospirillales

Sphingomonadales

Cellvibrionales (SAR92)

Session Info: