-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathHawaiiFaunaList.R
120 lines (106 loc) · 5.24 KB
/
HawaiiFaunaList.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# Hawaii Marine Fauna Regional Species List
# Sources: Bishop Museum, Lifewatch.be, Ocean Biodiversity Information System
# InverteBase, ARMS, Various professionals
# Compiled August 2023
# Updated July 2024
library(tidyverse)
source("dataWranglingFuns.R")
# Collect Invert Data from File Generated by regionalInvertList.R
invertData <- read.csv("HawaiiInvertList.csv")
# Fish Data Compiled by Jonathan Whitney
# Sources:
# B.C. Mundy 2005: Checklist of the Fishes of the Hawaiian Archipelago
# J.E. Randall 2007: Reef and shore fishes of the Hawaiian Islands
# FishBase
fishData <- read.csv("HawaiiFishList.csv") %>%
rename("TaxStatus" = "Status")
# Define columns of interest to be used with wormsProcess function
# See dataWranglingFuns.R for full list of possible columns.
ColOfInterest <- c(
"scientificname", "AphiaID", "valid_AphiaID", "valid_name", "status",
"kingdom", "phylum", "class", "order", "family", "genus", "rank"
)
# Parse out metadata from original fish data file
fishMeta <- fishData %>%
# Common name, endemic status, species name, pelagic distribution, and original record of species.
select(FBname, status, species, DemersPelag, source) %>%
# Create endemic column by collapsing status column into a binary indicating endemic or not
mutate(endemic = case_when(status == "Endemic" ~ 1,
.default = 0)) %>%
# Remove status from dataframe
select(!status) %>%
# Create data origin column based on numbers correlating to original record of species.
mutate(origin = case_when(source == "1" ~ "Randall2007",
source == "2" ~ "Fishbase",
source == "3" ~ "Mundy2005",
source == "1,3" ~ "Mundy2005",
source == "2,3" ~ "Mundy2005",
source == "1,4" ~ "Randall2007",
source == "2,4" ~ "Mundy2005",
.default = "Whitney")) %>%
# Remove source column from dataframe.
select(!source) %>%
# Rename columns to be able to properly join to invert data.
rename("distribution" = "DemersPelag",
"common" = "FBname")
# Process species through WoRMS API to collect data regarding Taxonomy
fishWormsData <- wormsProcess(fishData, ColOfInterest)
# Combine WoRMS processed data with metadata dataframe.
fishJoinData <- fishWormsData %>%
left_join(fishMeta, by = join_by("scientificname" == "species"))
# Combine fish data with invert data to create comprehensive marine fauna list
allFauna <- invertData %>%
full_join(fishJoinData) %>%
select(!X)
# Adding in local names for species
# Separating local names list by family, genus, and species names
localNames = read.csv("local_name_list.csv", na.strings=c("", "NA"))
localFamilies = localNames %>% filter(!is.na(family)) %>% select(-genus, -species)
localGenus = localNames %>% filter(!is.na(genus)) %>% select(-family, -species)
localSpecies = localNames %>% filter(!is.na(species)) %>% select(-family, -genus)
# Add Marine Mammals (July 2024)
mammals = readxl::read_xlsx(path = "Species Lists/MarineMammals_PacificIslandsRegion.xlsx") %>%
rename("species" = "Scientific Name",
"common" = "Common Name") %>%
select("species", "common") %>%
# CRP = Cetacean Research Program
mutate(origin = "CRP")
# Pull taxonomy from WoRMS and join with original dataset for common names
mammalsWorms = wormsProcess(mammals, ColOfInterest) %>%
left_join(mammals, by = join_by("scientificname" == "species"))
localNamesAdded = allFauna %>%
# First join by family, expecting many-to-many relationship
left_join(localFamilies, by = join_by(family),
relationship = "many-to-many") %>%
# Take original dataset common name, unless original is blank.
mutate(common = coalesce(common.x, common.y)) %>%
# Remove redundant columns
select(-common.x, -common.y, -species) %>%
# Repeat for genus
left_join(localGenus, by = join_by(genus),
relationship = "many-to-many") %>%
mutate(common = coalesce(common.x, common.y),
local = coalesce(local.x, local.y),
fishery = coalesce(fishery.x, fishery.y)) %>%
select(-common.x, -common.y, -fishery.x, -fishery.y, -local.x, -local.y) %>%
# Prep for joining species
rename("species" = valid_name) %>%
# Join species, ignore columns with NA species (family or genus identification)
left_join(localSpecies, by = join_by(species),
relationship = "many-to-many") %>%
mutate(local = coalesce(local.x, local.y),
common = coalesce(common.x, common.y),
fishery = coalesce(fishery.x, fishery.y)) %>%
select(-local.x, -local.y, -common.x, -common.y, -fishery.x, -fishery.y) %>%
full_join(mammalsWorms)
# Formatting and removing redundant data
# Preparing for export
exportFinal = localNamesAdded %>%
select(-locationID:-listID, -TROPHIC_GUILD_CODE) %>%
relocate(worms_name, .before = valid_AphiaID) %>%
relocate(rank, .before = kingdom) %>%
rename("localName" = local) %>%
relocate(genus, .before = species) %>%
arrange(worms_name)
# Save off marine fauna list to CSV file.
write.csv(exportFinal, "HawaiiMarineFaunaList.csv", row.names = FALSE)