Bush Medicine Processing

Notes before use

Remember to update working directory (wd) and files Must clean all names of Plants for spelling errors (update from the last years file to make things easier) Remember to append the data from 2018 Micro which is in a different format Update microorganism list if needed

Get Librarys

Set WD

Get Data

FloraData <- read.csv("Plant_Sample_Database_2023_Cleaned.csv", TRUE, fileEncoding="UTF-8-BOM")
ChemData <- read.csv("Plant_Extract_Database_2023_Raw.csv", TRUE, fileEncoding="UTF-8-BOM")
MicroData <- read.csv("Antimicrobial_Activity_Database_2023_Raw.csv")
MicroData2018 <- read.csv("Antimicrobial_Activity_Database_2018_Cleaned.csv", TRUE, fileEncoding="UTF-8-BOM")
BacteriaList <- read.csv("MicroorganismList_2023.csv", TRUE, fileEncoding="UTF-8-BOM")
ToxData <- read.csv("Toxicology_Database_2022_Cleaned.csv", fileEncoding="UTF-8-BOM")

Generate new Antibacterial Activity dataframe

Micdf2018 <- MicroData2018[,c(1:3)]
Micdf1 <- MicroData[,c(1:3)] %>%
  rename(Barcode = Barcode.1, Zone = Zone.1)
Micdf2 <- MicroData[,c(1,5,6)] %>%
  rename(Barcode = Barcode.2, Zone = Zone.2)
Micdf3 <- MicroData[,c(1,8,9)] %>%
  rename(Barcode = Barcode.3, Zone = Zone.3)
Micdf4 <- MicroData[,c(1,11,12)] %>%
  rename(Barcode = Barcode.4, Zone = Zone.4)
Micdf5 <- MicroData[,c(1,14,15)] %>%
  rename(Barcode = Barcode.5, Zone = Zone.5)


#Remove all NAs and Zeros from Barcodes

FigData <- rbind(Micdf2018,Micdf1,Micdf2,Micdf3,Micdf4,Micdf5)
FigData["Barcode"][FigData["Barcode"] == 0 ] <- NA
FigData <- na.omit(FigData)


#Rank if Antibacterial

FigData$ABActivity <- c(NA)
Count <- 1

for (val in FigData$Zone) {
    if (val != 6) {
    if (val != 0) { 
      FigData[Count,4] <- 1
      Count = Count + 1
    } else {
        FigData[Count,4] <- 0
        Count = Count + 1
    }
    } else {
        FigData[Count,4] <- 0
        Count = Count + 1
    }
}

rm(Count, Micdf1, Micdf2, Micdf3, Micdf4, Micdf5, Micdf2018, val)

Clean up of bacteria Names and Assign Gram Stain

Full Microorganism names

Count <- 1
for (val in FigData$Organism) {
  if (val %in% BacteriaList$Alt.Name) {
    
    FigData$Organism[Count] <- BacteriaList$Microorganism[match(val, BacteriaList$Alt.Name)]
  }
  Count <- Count + 1
}

rm(Count, val)

Assign Gram

FigData$Gram <- BacteriaList$Gram[match(FigData$Organism, BacteriaList$Microorganism)]

Matching of plant names

#Grab Name of Plant and order Dataframe

FigData$GS <- paste(FloraData$Genus[match(FigData$Barcode, FloraData$Barcode)], FloraData$Species.Epithet[match(FigData$Barcode, FloraData$Barcode)], sep = " ")
FigData$Common <- FloraData$Common.Name[match(FigData$Barcode, FloraData$Barcode)]
FigData <- na.omit(FigData)
FigData <- FigData[order(-FigData$ABActivity),]

#Count Times Tested Total each combo

FigData$TestType <- paste(FigData$Organism, FigData$GS, sep = " x ")
TimesTested <- as.data.frame(table(FigData$TestType))
FigData$TimesTested <- TimesTested$Freq[match(FigData$TestType, TimesTested$Var1)]

#Count Times Tested Positive each combo

PosOnlyData <- subset(FigData, ABActivity == 1)
TimesTestedPos <- as.data.frame(table(PosOnlyData$TestType))
FigData$TimesTestedPos <- TimesTestedPos$Freq[match(FigData$TestType, TimesTestedPos$Var1)]
FigData$TimesTestedPos[is.na(FigData$TimesTestedPos)] <- 0

#Tested Ratio

FigData$TestRatio <- paste(FigData$TimesTestedPos, FigData$TimesTested, sep = "/")

#Cleanup of FigData

FigData <- na.omit(FigData)
rownames(FigData) <- c(1:length(rownames(FigData)))

rm(TimesTested, TimesTestedPos, PosOnlyData)

Create Heatmap Data

#Create Heatmap Data Frame

HeatmapData <- FigData %>%
  select("Organism", "Gram", "ABActivity", "Barcode", "GS", "Common", "TestType", "TestRatio") %>%
  distinct(TestType, .keep_all = TRUE) %>%
  mutate(Year = as.numeric(substr(Barcode, 1, 4)), NewTest = if_else(Year == max(Year),1,0)) %>%
  select(!Year & !Barcode) %>%
  arrange(GS)

Generate Heatmaps

Note: Some values are in there so that the final combined figure looks good (i.e. legend position)

Normal Heatmap

Plot1 <- ggplot(subset(HeatmapData, Gram == "Gram Positive"), aes(GS, Organism, fill = ABActivity)) +
  geom_tile() +
  labs(x = "Flora Scientific Name", y="Gram Positive") +
  scale_x_discrete(position = "top", limits = unique(HeatmapData$GS), drop = FALSE) +
  scale_y_discrete(limits=rev) +
  theme(axis.text.x = element_text(angle = 60, vjust = 0.5, hjust=0,face="italic"),
        plot.margin=unit(c(0,2,0.5,1), "cm"),
        axis.title.y=element_text(size=10,face="bold"),
        axis.text.y=element_text(face="italic"),
        legend.position="none")+
  scale_fill_continuous(
    breaks = c(0, 1),
    labels = c("No Activity", "Activity"),
    guide = guide_legend(
      title = "Antimicrobial Activity",
      title.position = "top",
      label.hjust = 0,
      label.vjust = 0.5)
  )


Plot2 <- ggplot(subset(HeatmapData, Gram == "Gram Negative"), aes(GS, Organism, fill = ABActivity)) +
  geom_tile() +
  scale_x_discrete(limits = unique(HeatmapData$GS), drop = FALSE) +
  scale_y_discrete(limits=rev) +
  labs(x = "Flora Scientific Name", y="Gram Negative") +
  theme(axis.title.x=element_blank(), 
        axis.text.x=element_blank(), 
        axis.ticks.x=element_blank(), 
        plot.margin=unit(c(0.5,2,0.5,1), "cm"),
        axis.title.y=element_text(size=10,face="bold"),
        axis.text.y=element_text(face="italic"),
        legend.position="none")+
  scale_fill_continuous(
    breaks = c(0, 1),
    labels = c("No Activity", "Activity"),
    guide = guide_legend(
      title = "Antimicrobial Activity",
      title.position = "top",
      label.hjust = 0,
      label.vjust = 0.5)
  )


Plot3 <- ggplot(subset(HeatmapData, Gram == "Eukaryote"), aes(GS, Organism, fill = ABActivity)) +
  geom_tile() +
  labs(x = "Flora Common Name", y="Eukaryote") +
  scale_x_discrete(limit = unique(HeatmapData$GS),
                   labels = unique(HeatmapData$Common)) +
  scale_y_discrete(limits=rev) +
  theme(axis.text.x = element_text(angle = 300, vjust = 0, hjust=0),
        plot.margin=unit(c(0.5,2,0.5,1), "cm"),
        axis.title.y=element_text(size=10,face="bold"),
        axis.text.y=element_text(face="italic"),
        legend.position="right")+
  scale_fill_continuous(
        breaks = c(0, 1),
        labels = c("No Activity", "Activity"),
        guide = guide_legend(
          title = "Antimicrobial Activity",
          title.position = "top",
          label.hjust = 0,
          label.vjust = 0.5)
        )


LegendData <- as.grob(get_legend(Plot3))

HeightList <- unlist(c((40 + 4*count(subset(BacteriaList, Gram == "Gram Positive"))),
                       (8 + 4*count(subset(BacteriaList, Gram == "Gram Negative"))),
                       (59 + 4*count(subset(BacteriaList, Gram == "Eukaryote")))))

FinalPlot <- ggarrange(Plot1, 
                       Plot2, 
                       Plot3, 
                       ncol = 1, 
                       nrow = 3, 
                       align ="v", 
                       heights = HeightList, 
                       common.legend = TRUE, 
                       legend="none") %>%
  annotate_figure(left = text_grob("Microorgansim", rot = 90, size=12))

FinalPlot <- FinalPlot + annotation_custom(LegendData,
                    xmin = -0.6, ymin = -0.8)
FinalPlot

ggsave("Antimicrobial_Figure_2023.png",
    width=22, height=20, units="cm", dpi = 1000)

Heatmap with Tests

Plot1 <- ggplot(subset(HeatmapData, Gram == "Gram Positive"), aes(GS, Organism, fill = ABActivity)) +
  geom_tile() +
  geom_text(aes(label = TestRatio), size = 2, color = "grey") +
  labs(x = "Flora Scientific Name", y="Gram Positive") +
  scale_x_discrete(position = "top", limits = unique(HeatmapData$GS), drop = FALSE) +
  scale_y_discrete(limits=rev) +
  theme(axis.text.x = element_text(angle = 60, vjust = 0.5, hjust=0,face="italic"),
        plot.margin=unit(c(0,2,0.5,1), "cm"),
        axis.title.y=element_text(size=10,face="bold"),
        axis.text.y=element_text(face="italic"),
        legend.position="none")+
  scale_fill_continuous(
    breaks = c(0, 1),
    labels = c("No Activity", "Activity"),
    guide = guide_legend(
      title = "Antimicrobial Activity",
      title.position = "top",
      label.hjust = 0,
      label.vjust = 0.5)
  )


Plot2 <- ggplot(subset(HeatmapData, Gram == "Gram Negative"), aes(GS, Organism, fill = ABActivity)) +
  geom_tile() +
  geom_text(aes(label = TestRatio), size = 2, color = "grey") +
  scale_x_discrete(limits = unique(HeatmapData$GS), drop = FALSE) +
  scale_y_discrete(limits=rev) +
  labs(x = "Flora Scientific Name", y="Gram Negative") +
  theme(axis.title.x=element_blank(), 
        axis.text.x=element_blank(), 
        axis.ticks.x=element_blank(), 
        plot.margin=unit(c(0.5,2,0.5,1), "cm"),
        axis.title.y=element_text(size=10,face="bold"),
        axis.text.y=element_text(face="italic"),
        legend.position="none")+
  scale_fill_continuous(
    breaks = c(0, 1),
    labels = c("No Activity", "Activity"),
    guide = guide_legend(
      title = "Antimicrobial Activity",
      title.position = "top",
      label.hjust = 0,
      label.vjust = 0.5)
  )


Plot3 <- ggplot(subset(HeatmapData, Gram == "Eukaryote"), aes(GS, Organism, fill = ABActivity)) +
  geom_tile() +
  geom_text(aes(label = TestRatio), size = 2, color = "grey") +
  labs(x = "Flora Common Name", y="Eukaryote") +
  scale_x_discrete(limit = unique(HeatmapData$GS),
                   labels = unique(HeatmapData$Common)) +
  scale_y_discrete(limits=rev) +
  theme(axis.text.x = element_text(angle = 300, vjust = 0, hjust=0),
        plot.margin=unit(c(0.5,2,0.5,1), "cm"),
        axis.title.y=element_text(size=10,face="bold"),
        axis.text.y=element_text(face="italic"),
        legend.position="right")+
  scale_fill_continuous(
        breaks = c(0, 1),
        labels = c("No Activity", "Activity"),
        guide = guide_legend(
          title = "Antimicrobial Activity",
          title.position = "top",
          label.hjust = 0,
          label.vjust = 0.5)
        )


LegendData <- as.grob(get_legend(Plot3))

HeightList <- unlist(c((40 + 4*count(subset(BacteriaList, Gram == "Gram Positive"))),
                       (8 + 4*count(subset(BacteriaList, Gram == "Gram Negative"))),
                       (59 + 4*count(subset(BacteriaList, Gram == "Eukaryote")))))

FinalPlot <- ggarrange(Plot1, 
                       Plot2, 
                       Plot3, 
                       ncol = 1, 
                       nrow = 3, 
                       align ="v", 
                       heights = HeightList, 
                       common.legend = TRUE, 
                       legend="none") %>%
  annotate_figure(left = text_grob("Microorgansim", rot = 90, size=12))

FinalPlot <- FinalPlot + annotation_custom(LegendData,
                    xmin = -0.6, ymin = -0.8)
FinalPlot

ggsave("Antimicrobial_Figure_2023_Frequency.png",
    width=22, height=20, units="cm", dpi = 1000)

Heatmap that hightlights new tests

PlotLegendOnly <- ggplot(HeatmapData, aes(GS, Organism, fill = ABActivity)) + 
  geom_tile() + 
  scale_fill_gradient(na.value = NA, 
                     breaks = c(0, 1),
                     labels = c("No Activity", "Activity"),
                     guide = guide_legend(
                       title = "Antimicrobial Activity",
                       title.position = "top",
                       label.hjust = 0,
                       label.vjust = 0.5)
)

Plot1 <- ggplot(subset(HeatmapData, Gram == "Gram Positive"), aes(GS, Organism)) +
  geom_tile(aes(fill = ABActivity), alpha = 0.2) +
  geom_tile(aes(fill = ifelse(NewTest == 1, ABActivity, NA), color = ifelse(NewTest == 1, NewTest, NA)), size=0.8) +
  labs(x = "Flora Scientific Name", y="Gram Positive") +
  scale_x_discrete(position = "top", limits = unique(HeatmapData$GS), drop = FALSE) +
  scale_y_discrete(limits=rev) +
  theme(axis.text.x = element_text(angle = 60, vjust = 0.5, hjust=0,face="italic"),
        plot.margin=unit(c(0,2,0.5,1), "cm"),
        axis.title.y=element_text(size=10,face="bold"),
        axis.text.y=element_text(face="italic"),
        legend.position="none")+
  scale_fill_gradient(aesthetics = "fill",
                      na.value = NA, 
                      breaks = c(0, 1),
                      labels = c("No Activity", "Activity"),
                      guide = guide_legend(
                          title = "Antimicrobial Activity",
                          title.position = "top",
                          label.hjust = 0,
                          label.vjust = 0.5)
                      )+
  scale_fill_gradient(aesthetics = "color",
                      low = "white",
                      high = "white",
                      na.value = NA, 
                      breaks = c(0, 1)
  )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Plot2 <- ggplot(subset(HeatmapData, Gram == "Gram Negative"), aes(GS, Organism, fill = NewTest), colour = "white") +
  geom_tile(aes(fill = ABActivity), alpha = 0.2) +
  geom_tile(aes(fill = ifelse(NewTest == 1, ABActivity, NA), color = ifelse(NewTest == 1, NewTest, NA)), size=0.8) +
  scale_x_discrete(limits = unique(HeatmapData$GS), drop = FALSE) +
  scale_y_discrete(limits=rev) +
  labs(x = "Flora Scientific Name", y="Gram Negative") +
  theme(axis.title.x=element_blank(), 
        axis.text.x=element_blank(), 
        axis.ticks.x=element_blank(), 
        plot.margin=unit(c(0.5,2,0.5,1), "cm"),
        axis.title.y=element_text(size=10,face="bold"),
        axis.text.y=element_text(face="italic"),
        legend.position="none")+
  scale_fill_gradient(na.value = "#FFFFFF00",
                      breaks = c(0, 1),
                      labels = c("No Activity", "Activity"),
                      guide = guide_legend(
                         title = "Antimicrobial Activity",
                         title.position = "top",
                         label.hjust = 0,
                         label.vjust = 0.5)
  )+
  scale_fill_gradient(aesthetics = "color",
                      low = "white",
                      high = "white",
                      na.value = NA, 
                      breaks = c(0, 1)
  )

Plot3 <- ggplot(subset(HeatmapData, Gram == "Eukaryote"), aes(GS, Organism, fill = NewTest), colour = "white") +
  geom_tile(aes(fill = ABActivity), alpha = 0.2) +
#  geom_tile(aes(fill = ifelse(NewTest == 1, ABActivity, NA), color = ifelse(NewTest == 1, NewTest, NA)), size=0.8) +
  labs(x = "Flora Common Name", y="Eukaryote") +
  scale_x_discrete(limit = unique(HeatmapData$GS),
                   labels = unique(HeatmapData$Common)) +
  scale_y_discrete(limits=rev) +
  theme(axis.text.x = element_text(angle = 300, vjust = 0, hjust=0),
        plot.margin=unit(c(0.5,2,0.5,1), "cm"),
        axis.title.y=element_text(size=10,face="bold"),
        axis.text.y=element_text(face="italic"),
        legend.position="right")+
  scale_fill_gradient(na.value = "#FFFFFF00",
                      breaks = c(0, 1),
                      labels = c("No Activity", "Activity"),
                      guide = guide_legend(
                        title = "Antimicrobial Activity",
                        title.position = "top",
                        label.hjust = 0,
                        label.vjust = 0.5)
  )+
  scale_fill_gradient(aesthetics = "color",
                      low = "white",
                      high = "white",
                      na.value = NA, 
                      breaks = c(0, 1)
  )

LegendData <- as.grob(get_legend(PlotLegendOnly))

HeightList <- unlist(c((40 + 4*count(subset(BacteriaList, Gram == "Gram Positive"))),
                       (8 + 4*count(subset(BacteriaList, Gram == "Gram Negative"))),
                       (59 + 4*count(subset(BacteriaList, Gram == "Eukaryote")))))

FinalPlot <- ggarrange(Plot1, 
                       Plot2, 
                       Plot3, 
                       ncol = 1, 
                       nrow = 3, 
                       align ="v", 
                       heights = HeightList, 
                       common.legend = TRUE, 
                       legend="none") %>%
  annotate_figure(left = text_grob("Microorgansim", rot = 90, size=12))

FinalPlot <- FinalPlot + annotation_custom(LegendData,
                                           xmin = -0.6, ymin = -0.8)

FinalPlot

ggsave("Antimicrobial_Figure_2023_NewTestHighlight.png",
    width=22, height=20, units="cm", dpi = 1000)

Heatmap for each Barcode

FigData_Barcode <- FigData %>% 
  mutate(Year = as.numeric(substr(Barcode, 1, 4)), Barcode = paste0("BMP", Barcode)) %>%
  arrange(Year)


Plot1 <- FigData_Barcode %>%
  filter(Gram == "Gram Positive") %>%
  ggplot(aes(reorder(Barcode, Year), Organism, fill = ABActivity)) +
  geom_tile() +
  labs(x = "Barcodes", y="Gram Positive") +
  scale_x_discrete(position = "top", limits = unique(FigData_Barcode$Barcode), drop = FALSE) +
  scale_y_discrete(limits=rev) +
  theme(axis.text.x = element_text(angle = 60, vjust = 0.5, hjust=0,face="italic"), 
        legend.position="none", 
        plot.margin=unit(c(0,1,1,1), "cm"), 
        axis.title.y=element_text(size=10,face="bold"),
        axis.text.y=element_text(face="italic"))

Plot2 <- FigData_Barcode %>%
  filter(Gram == "Gram Negative") %>%
  ggplot(aes(reorder(Barcode, Year), Organism, fill = ABActivity)) +
  geom_tile() +
  scale_x_discrete(limits = unique(FigData_Barcode$Barcode), drop = FALSE) +
  scale_y_discrete(limits=rev) +
  labs(y="Gram Negative") +
  theme(axis.title.x=element_blank(), 
        axis.text.x=element_blank(), 
        axis.ticks.x=element_blank(), 
        legend.position="none", 
        plot.margin=unit(c(0,1,1,1), "cm"),
        axis.title.y=element_text(size=10,face="bold"),
        axis.text.y=element_text(face="italic"))


Plot3 <- FigData_Barcode %>%
  filter(Gram == "Eukaryote") %>%
  ggplot(aes(reorder(Barcode, Year), Organism, fill = ABActivity)) +
  geom_tile() +
  labs(y="Eukaryote") +
  scale_x_discrete(limit = unique(FigData_Barcode$Barcode)) +
  scale_y_discrete(limits=rev) +
  theme(axis.title.x=element_blank(), 
        axis.text.x=element_blank(), 
        axis.ticks.x=element_blank(),
        legend.position="bottom",
        plot.margin=unit(c(0,1,1,1), "cm"),
        axis.title.y=element_text(size=10,face="bold"),
        axis.text.y=element_text(face="italic"))+
  scale_fill_continuous(
        breaks = c(0, 1),
        labels = c("No Activity", "Activity"),
        guide = guide_legend(
          title = "Antimicrobial Activity",
          title.position = "top",
          label.hjust = 0,
          label.vjust = 0.5)
        )


HeightList <- unlist(c((40 + 4*count(subset(BacteriaList, Gram == "Gram Positive"))),
                       (11 + 4*count(subset(BacteriaList, Gram == "Gram Negative"))),
                       (33 + 4*count(subset(BacteriaList, Gram == "Eukaryote")))))


FinalPlot <- ggarrange(Plot1, Plot2, Plot3,
                       ncol = 1,
                       nrow = 3,
                       align ="v",
                       heights = HeightList) %>%
annotate_figure(left = text_grob("Microorgansim", rot = 90, size=12))

FinalPlot

ggsave("Antimicrobial_Figure_2023_Barcode.png",
    width=30, height=20, units="cm", dpi = 1000)
df1 <- FloraData %>% 
  mutate(Year = as.numeric(substr(Barcode, 1, 4))) %>% 
  group_by(Year) %>% 
  summarise(Number = n(), Sample = "Flora Sample")

df2 <- ChemData %>% 
  mutate(Year = as.numeric(substr(Barcode, 1, 4))) %>% 
  group_by(Year) %>% 
  summarise(Number = n(), Sample = "Chemical Extract")
  
df3 <- FigData %>% 
  mutate(Year = as.numeric(substr(Barcode, 1, 4))) %>% 
  group_by(Year) %>% 
  summarise(Number = n(), Sample = "Antimicrobial Assay")  

df4 <- ToxData %>% 
  mutate(Year = as.numeric(substr(Barcode, 1, 4))) %>% 
  group_by(Year) %>% 
  summarise(Number = n(), Sample = "Toxicity Assay")  

Generic_Stats <- rbind(df1,df2,df3,df4)

rm(df1, df2, df3, df4)

Generic_Stats <- Generic_Stats %>% 
  mutate_at("Year", ~na_if(., 0)) %>%
  na.omit()




Generic_Stat_Plot <- Generic_Stats %>% 
  mutate(Sample = factor(Sample, 
                        levels=c("Flora Sample",
                                 "Chemical Extract",
                                 "Antimicrobial Assay",
                                 "Toxicity Assay"))) %>%
  group_by(Sample) %>%
  arrange(Year) %>%
  mutate(Cum = cumsum(Number),
         MAX.Year = if_else(Year == max(Year), Year, NA),
         MAX.Cum = if_else(Cum == max(Cum), Cum, NA)) %>%
  ggplot()+
  geom_bar(aes(x=Year, y=Number, fill = Sample), stat="identity", colour = "black", width=0.8)+
  geom_line(aes(x=Year, y=Cum, colour = Sample))+
  geom_point(aes(x=MAX.Year, y=MAX.Cum, colour = Sample))+
  geom_text(aes(x=MAX.Year, y=MAX.Cum, label = MAX.Cum), 
            hjust = "inward", 
            nudge_x = -0.1,
            nudge_y = 30)+
  labs(y= "Number of samples Processed")+
  theme(legend.position = "bottom",
        legend.title=element_blank())+
  ggtitle("Number of Samples Processed and Analysed to Data", subtitle = "Lines cumulative ")+
  facet_wrap(~ Sample, nrow = 1)

Generic_Stat_Plot
## Warning: Removed 13 rows containing missing values (`geom_point()`).
## Warning: Removed 13 rows containing missing values (`geom_text()`).

ggsave("Total_Sample_Number_2023.png",
    width=20, height=10, units="cm", dpi = 1000)
## Warning: Removed 13 rows containing missing values (`geom_point()`).
## Removed 13 rows containing missing values (`geom_text()`).

Export Processed Dataframe

#Export Dataframe

write.csv(HeatmapData, "HeatMap_Processed_Data_Export.csv", row.names=FALSE)