Install packages

install.packages("ggplot2")
library(ggplot2) 
install.packages('tidyverse')
library(tidyverse)

Data handling

# Load data
rawlie.data <- read.table('Pseudemoia_rawlinsoni_records_FINAL.csv', header=T,sep=',')  
str(rawlie.data)
View(rawlie.data)
summary(rawlie.data)
attach(rawlie.data)

#### make a new df which excludes records from our 2021/2022 tile surveys and active surveys (i.e. from a separate study)
rawlie.data2 <- subset(rawlie.data, Group=="Personal communication" | Group=="Online databases")
View(rawlie.data2)

Creat Bar plot showing the number of records of Pseudemoia rawlinoni in each year grouped by source.

###### stacked barplot
ggplot(rawlie.data2, aes(Year, fill=Group)) + 
  geom_bar(position=position_stack(reverse = TRUE), alpha=0.7, color="black")  +
  scale_fill_manual(values=c("cornflowerblue","maroon")) +
  scale_x_continuous(n.breaks = 10) +
  scale_y_continuous(name="Number of records", n.breaks = 5) +
  guides(fill=guide_legend(title="Record source", reverse = TRUE)) +
  theme_bw()
HABITAT SUITABILITY MODELLING
install.packages("dismo")
install.packages("maptools")
install.packages("maps")
install.packages("mapdata")
install.packages("dplyr")
install.packages("CoordinateCleaner")
install.packages("raster")
install.packages("ggplot2")
install.packages("scales")
install.packages("spThin")
install.packages("ecospat")
install.packages("sf")
install.packages("rnaturalearthdata")
install.packages("rnaturalearth")
install.packages("tidyverse")
install.packages("usdm")
install.packages("ENMeval")
install.packages("rJava")
install.packages("ncdf4")
install.packages("biomod2")

library(biomod2)
library(dismo)
library(maptools)
library(maps)    
library(mapdata) 
library(dplyr)
library(CoordinateCleaner)
library(raster)
library(spThin)
library(ecospat)
library(sf)
library(scales)
library(rnaturalearthdata)
library(rnaturalearth)
library(tidyverse)
library(usdm)
library(ENMeval)
library(rJava)
library(ncdf4)

LOAD OCCURRENCE DATA

getwd()
rawlie.data <- read.table('rawlie_data_pre-post.csv', header=T,sep=',')  %>% 
  mutate(Species="Pseudemoia rawlinsoni")
str(rawlie.data)
View(rawlie.data)

get aus map

aus_map <- ne_countries(scale = "medium",
              country = "Australia",
              returnclass = "sf")

plot records on Aus map

ggplot() +
  geom_sf(data = aus_map) +
  geom_point(data = rawlie.data, 
             aes(x = Longitude, 
                 y = Latitude)) +
  theme_bw()

thining to 1km

rawlie_geoThin <- thin(loc.data = rawlie.data,
                         lat.col = "Latitude",
                         long.col = "Longitude",
                         spec.col = "Species",
                         thin.par = 1,
                         reps = 100,
                         locs.thinned.list.return = TRUE,
                         write.files = FALSE,
                         write.log.file = FALSE)
rawlie_geoThin <- as.data.frame(rawlie_geoThin[7]) 

#write.csv(rawlie_geoThin, "rawlie_geoThin.csv", row.names = FALSE)

load background bioregions

rawlie_bioregions <- st_read("/Volumes/PHOTOS 4T1/QGIS/Rawlinsoni/rawlie IBRA regions/rawlie_background_V3.shp")

ggplot() +
  geom_sf(data = rawlie_bioregions) +
  geom_point(data = rawlie.data, 
             aes(x = Longitude, 
                 y = Latitude)) +
  theme_bw()

Uploading all environmental variables

# create template raster
raster_temp <- raster(rawlie_bioregions, res = 0.008333)

# resampling all variable to have same dimensions as template bioregion raster
# WORLDCLIM
worldclim <- stack(list.files(path = "/Volumes/PHOTOS 4T1/QGIS/wc2.1_30s_bio", 
                             full.names=TRUE)) %>% 
  resample(raster_temp, 
           method = "bilinear") %>% 
  mask(rawlie_bioregions)

#ENVIREM
envirem <- stack(list.files(path = "/Volumes/PHOTOS 4T1/QGIS/ENVIREM_elev_Australia_current_30arcsec_generic copy", 
                             pattern = ".tif", full.names=TRUE)) %>% 
  resample(raster_temp, 
           method = "bilinear") %>% 
  mask(rawlie_bioregions)

### NDVI

NDVI <- stack(list.files(path = "/Volumes/PHOTOS 4T1/QGIS/MODIS13 NDVI", 
                             pattern = ".tif", full.names=TRUE)) %>% 
  resample(raster_temp, 
           method = "bilinear") %>% 
  mask(rawlie_bioregions)

# DIST TO WATER LINES and BODIES
water_euclidean <- stack(list.files(path = "/Volumes/PHOTOS 4T1/QGIS/Water", 
                             pattern = ".tif", full.names=TRUE)) %>% 
  resample(raster_temp, 
           method = "bilinear") %>% 
  mask(rawlie_bioregions)

# soil moisture

soil_moisture <- stack(list.files(path = "/Volumes/PHOTOS 4T1/QGIS/Soil moisture", 
                             pattern = ".tif", full.names=TRUE)) %>% 
  resample(raster_temp, 
           method = "bilinear") %>% 
  mask(rawlie_bioregions)

# human index

human_index <- stack(list.files(path = "/Volumes/PHOTOS 4T1/QGIS/human index", 
                             pattern = ".tif", full.names=TRUE)) %>% 
  resample(raster_temp, 
           method = "bilinear") %>% 
  mask(rawlie_bioregions)

plot some varibles to check that they look good


plot(worldclim)
plot(human_index)

save variables to a raster stack called env

env <- stack(worldclim,
             envirem,
             NDVI,
             water_euclidean,
             soil_moisture,
             human_index)
names(env) <- c("bio1", 
                    "bio10",
                    "bio11",
                    "bio12",
                    "bio13",
                    "bio14",
                    "bio15",
                    "bio16",
                    "bio17",
                    "bio18",
                    "bio19",
                    "bio2",
                    "bio3",
                    "bio4",
                    "bio5",
                    "bio6",
                    "bio7",
                    "bio8",
                    "bio9",
                "aridity", "topowet", "tri","NDVI", "dist_to_waterbodies",  "dist_to_watercourses","soil_moisture","human_influence"
                    )

#dir.create("envStack")
#writeRaster(env, "./envStack/env.nc")

extract environmental data based on geothin

rawlie_env <- raster::extract(env, rawlie_geoThin) %>% 
  cbind(rawlie_geoThin) %>% 
  na.omit()

rawlie_env

check for correlations among variables

rawlie_env %>% 
  cor(method="pearson") %>% 
  as.data.frame()

PCA to check for correlations among variables

s.corcircle(dudi.pca(rawlie_env[-c(28,29)],
                     scannf = F,
                     nf = 2)$co, 
            xax = 1,
            yax = 2,
            grid = FALSE)

Variance inflation factor test (-28,29) is removing the last two columns from the data fram which are lat and long

vif(as.data.frame(rawlie_env[-c(28,29)]))

vifstep(as.data.frame(rawlie_env[-c(28,29)]))

vifcor(as.data.frame(rawlie_env[-c(28,29)]),
       th = 0.7)

loading in stack of variables AGAIN

#run1
env <- brick("./envStack/env.nc")
names(env) <- c("bio1", 
                    "bio10",
                    "bio11",
                    "bio12",
                    "bio13",
                    "bio14",
                    "bio15",
                    "bio16",
                    "bio17",
                    "bio18",
                    "bio19",
                    "bio2",
                    "bio3",
                    "bio4",
                    "bio5",
                    "bio6",
                    "bio7",
                    "bio8",
                    "bio9",
                "aridity", "topowet", "tri","NDVI", "dist_to_waterbodies",  "dist_to_watercourses","soil_moisture","human_influence"
                    )

subsetting based on desired variables

#run2
env_ps <- env[[c("bio5",
                 "bio6",
                 "bio15",
                 "aridity", 
                 "NDVI", 
                 "topowet",
                 "soil_moisture")]]
plot(env_ps)
#summary(env_ps[[7]])
plot(env_ps[["soil_moisture"]])
plot(env_ps[["NDVI"]])
plot(env_ps[["aridity"]])
plot(env_ps[["bio5"]])
plot(env_ps[["bio6"]])
plot(env_ps[["bio15"]])
plot(env_ps[["topowet"]])

reading csv of occ and bg

#run 3
rawlie_geoThin <- read.csv("rawlie_geoThin.csv")
rawlie_bg <- read.csv("rawlie_bg.csv")

extract enviro data from rawlie_geothin points based on smaller list of new preselected variables

rawlie_env <- raster::extract(env_ps, rawlie_geoThin) %>% 
  cbind(rawlie_geoThin) %>% 
  na.omit()

rawlie_env
rawlie_env %>% 
  cor(method="pearson") %>% 
  as.data.frame()
s.corcircle(dudi.pca(rawlie_env[-c(8,9)],
                     scannf = F,
                     nf = 2)$co, 
            xax = 1,
            yax = 2,
            grid = FALSE)
vif(as.data.frame(rawlie_env[-c(8,9)]))

vifstep(as.data.frame(rawlie_env[-c(8,9)]))

vifcor(as.data.frame(rawlie_env[-c(8,9)]),
       th = 0.72)

Visualizing spatial autocorrelation using mantel r correlogram

Spatial autocorrelation is not significantly different from zero in many distances

rawlie_correlogram <- rawlie_geoThin %>% 
  st_as_sf(coords = c("Longitude", "Latitude"),
           crs = 4326) %>% #converting projection from lat/long to metres for better interpretation
  st_transform(crs = 3112) %>% 
  st_coordinates() %>% 
  cbind(raster::extract(env_ps, rawlie_geoThin)) %>% 
  cbind(rawlie_geoThin) %>% 
  na.omit()

ecospat.mantel.correlogram(dfvar = rawlie_correlogram, 
                             colxy = 1:2, 
                             n = 100, 
                             colvar = 3:10, 
                             max = 10000, 
                             nclass = 50, 
                             nperm = 100)

GENERATE BG POINTS

library(spatstat)

#reference any one of your rasters so that the environmental raster has same properties as bias raster
raster_ref <- env_ps[["aridity"]]

samplingSites <- rawlie_geoThin %>% 
  st_as_sf(coords = c("Longitude", "Latitude"),
           crs = 4326) %>% #converting projection from lat/long to metres for better interpretation
  st_transform(crs = 3112) %>% 
                 st_coordinates() %>% 
                 data.frame()

shp <- rawlie_bioregions %>% 
        st_transform(3112) %>% 
  st_union()

Visualize reference data

Reference raster

plot(raster_ref)

Sampling sites

ggplot() +
  geom_sf(data = shp %>% st_as_sf()) +
  geom_sf(data = samplingSites %>% 
                  st_as_sf(coords = c("X", "Y"), 
                           crs = 3112)) +
  theme_minimal()

Posterio Analysis

#Point-pattern process transformation

Create a Point-pattern process (PPP) using the raw data. Here, i used the Lord Howe Island shapefile as “window”, and the coordinates of the samplign sites as “x = X” (originally “Longitude” in raw file) and “Y = y” (originally “Latitude” in raw file).

win <- as.owin(shp)
ppp <- ppp(x = pull(dplyr::select(samplingSites, X)),
           y = pull(dplyr::select(samplingSites, Y)),
           window = win)

Visualize PPP

plot(ppp)

Compute the bandwidth

I used cross-validation to select a smoothing bandwidth for the kernel estimation of point process intensity. The bandwidth σ is chosen to minimise the mean-square error criterion defined by Diggle (1985).

bw.diggle(X = ppp)

#sigma value is kilometres radius when smoothing the densities of occurences
#dont report if specifying your own bandwidth (which you are)

Compute the density of PPP

Here, the bandwidth was computed using cross-validation; edge correction was enabled and used diggle method. I then converted the bias file into raster with a GDA92 projection.

biasFile <- raster(density.ppp(ppp,
                               kernel = "gaussian",
                               sigma = 50000,
                               edge = TRUE,
                               diggle = TRUE),
                   crs = "EPSG:3112")

raster::crs(biasFile) <- "EPSG:3112"

Visualize density map / bias file RUN AS FULL CHUNK

plot(biasFile)
plot(shp, 
     color = NA, 
     add = TRUE)
plot(samplingSites %>% 
       st_as_sf(coords = c("X", "Y")),
     add = TRUE)

resample the bias file, to align with the reference raster.

Maxent requires all raster files (environemntal rasters and bias file) to have the same projection, resolution, and exent. Basically, if you cant stack() the raster files, maxent won’t run.

here, I first reprojected the bias file to a WGS 84 (reference raster projection), and then resampled the bias file based on the resolution and extent of the reference raster using a bilinear resampling method.

biasFile_export <- biasFile %>%
                     projectRaster(crs = "EPSG:4326") %>% 
                     resample(y = raster_ref,
                              method = "bilinear")

Plot bias file

plot(biasFile_export)

Double checking if the bias file has the same properties as that of the reference raster

#crs
proj4string(raster_ref) == proj4string(biasFile_export)
#resolution
res(raster_ref) == res(biasFile_export)
#extent
extent(raster_ref) == extent(biasFile_export)

write bias file

Finally, the bias file is written in the “./output” folder of the R project.

writeRaster(biasFile_export,
            "/Volumes/PHOTOS 4T1/QGIS/Rawlinsoni_project/biasfile.tif",
            overwrite = TRUE)

summary(biasFile_export)

Make all NAs 0, and then make all negative 0

values(biasFile_export)[is.na(values(biasFile_export))] <- 0

values(biasFile_export)[which(values(biasFile_export) < 0)] <- 0

making the bias background

rawlie_bg_bias <- xyFromCell(biasFile_export, sample(ncell(biasFile_export), 10000, prob = values(biasFile_export))) 

rawlie_bg_bias <- rawlie_bg_bias %>% 
  as_data_frame() %>% 
  rename(Longitude = x,
         Latitude = y)
  

plot(rawlie_bg_bias)
#write.csv(rawlie_bg_bias, "rawlie_bg_bias.csv", row.names = FALSE)

run model

save.image()

rawlie_enm <- ENMevaluate(occs = rawlie_geoThin, 
                          envs = env_ps, 
                          bg = rawlie_bg_bias, 
                          tune.args = list(rm=c(1:3), 
                                           fc=c('L','LQ', 'LQH', 'H')), 
                          algorithm='maxent.jar',
                          partitions = "checkerboard2", numCores = 6, parallel=TRUE)

save.image()

checking best model

x <- rawlie_enm@results %>% 
  arrange(delta.AICc)
x
#write.csv(x, "Model_eval.csv")

variable importance and response

library(ENMeval)
plot(eval.models(rawlie_enm)[["rm.1_fc.LQ"]],type = "cloglog") 
response(eval.models(rawlie_enm)[["rm.1_fc.LQ"]]) 

plot prediction

plot(eval.predictions(rawlie_enm)[["rm.1_fc.LQ"]]) 
points(rawlie_geoThin)

save prediction as a tif file

raster::writeRaster(eval.predictions(rawlie_enm)[["rm.1_fc.LQ"]], "rawlie.SDM.bias.corrected.tif", 
            format = "GTiff", overwrite = TRUE)

save.image()

tranforming into binary map

#### extract fitted scores of background an occurrence points

# combine bg and geo thin points
temp <- bind_rows(rawlie_geoThin %>% 
            mutate(rawlie = 1),
          rawlie_bg_bias %>% 
            mutate(rawlie = 0))
#extract predictions based on both occ and bg points
fitted_df <- raster::extract(eval.predictions(rawlie_enm)[["rm.1_fc.LQ"]], temp[,1:2]) %>% 
  as.data.frame() %>% 
  rename("fitted" = ".") %>% 
  bind_cols(temp) %>% 
  na.omit()

tranforming into binary map (continued)

# look for threatshold value (PUT "cutoff" value into the next few lines of code)
library(biomod2)
Find.Optim.Stat(Stat="ROC",
                Fit = fitted_df[, "fitted"],
                Obs = fitted_df[, "rawlie"])

#transform prediction in to binary maps
plot(BinaryTransformation(eval.predictions(rawlie_enm)[["rm.1_fc.LQ"]], 0.5867845))

#save
raster::writeRaster(BinaryTransformation(eval.predictions(rawlie_enm)[["rm.1_fc.LQ"]], 0.5867845), "rawlie.binary.bias.corrected.tif", 
            format = "GTiff", overwrite = TRUE)

save image


save.image()
---
title: "rawlinsoni project"
Paper: "Capturing uncatalogued distribution records to improve conservation assessments of Data Deficient species: a case study using the glossy grass skink"
Authors of this code: Jules Farquhar and Arman Pili
Date: 12.09.2022
output:
  html_notebook: default
  pdf_document: default
---
### Install packages
```{r}
install.packages("ggplot2")
library(ggplot2) 
install.packages('tidyverse')
library(tidyverse)
```
#### Data handling
```{r}
# Load data
rawlie.data <- read.table('Pseudemoia_rawlinsoni_records_FINAL.csv', header=T,sep=',')  
str(rawlie.data)
View(rawlie.data)
summary(rawlie.data)
attach(rawlie.data)

#### make a new df which excludes records from our 2021/2022 tile surveys and active surveys (i.e. from a separate study)
rawlie.data2 <- subset(rawlie.data, Group=="Personal communication" | Group=="Online databases")
View(rawlie.data2)
```
#### Creat Bar plot showing the number of records of Pseudemoia rawlinoni in each year grouped by source.
```{r}
###### stacked barplot
ggplot(rawlie.data2, aes(Year, fill=Group)) + 
  geom_bar(position=position_stack(reverse = TRUE), alpha=0.7, color="black")  +
  scale_fill_manual(values=c("cornflowerblue","maroon")) +
  scale_x_continuous(n.breaks = 10) +
  scale_y_continuous(name="Number of records", n.breaks = 5) +
  guides(fill=guide_legend(title="Record source", reverse = TRUE)) +
  theme_bw()
```
##### HABITAT SUITABILITY MODELLING
```{r}
install.packages("dismo")
install.packages("maptools")
install.packages("maps")
install.packages("mapdata")
install.packages("dplyr")
install.packages("CoordinateCleaner")
install.packages("raster")
install.packages("ggplot2")
install.packages("scales")
install.packages("spThin")
install.packages("ecospat")
install.packages("sf")
install.packages("rnaturalearthdata")
install.packages("rnaturalearth")
install.packages("tidyverse")
install.packages("usdm")
install.packages("ENMeval")
install.packages("rJava")
install.packages("ncdf4")
install.packages("biomod2")

library(biomod2)
library(dismo)
library(maptools)
library(maps)    
library(mapdata) 
library(dplyr)
library(CoordinateCleaner)
library(raster)
library(spThin)
library(ecospat)
library(sf)
library(scales)
library(rnaturalearthdata)
library(rnaturalearth)
library(tidyverse)
library(usdm)
library(ENMeval)
library(rJava)
library(ncdf4)
```
#### LOAD OCCURRENCE DATA
```{r}
getwd()
rawlie.data <- read.table('rawlie_data_pre-post.csv', header=T,sep=',')  %>% 
  mutate(Species="Pseudemoia rawlinsoni")
str(rawlie.data)
View(rawlie.data)
```
#### get aus map
```{r}
aus_map <- ne_countries(scale = "medium",
              country = "Australia",
              returnclass = "sf")
```
#### plot records on Aus map
```{r}
ggplot() +
  geom_sf(data = aus_map) +
  geom_point(data = rawlie.data, 
             aes(x = Longitude, 
                 y = Latitude)) +
  theme_bw()
```
#### thining to 1km
```{r}
rawlie_geoThin <- thin(loc.data = rawlie.data,
                         lat.col = "Latitude",
                         long.col = "Longitude",
                         spec.col = "Species",
                         thin.par = 1,
                         reps = 100,
                         locs.thinned.list.return = TRUE,
                         write.files = FALSE,
                         write.log.file = FALSE)
rawlie_geoThin <- as.data.frame(rawlie_geoThin[7]) 

#write.csv(rawlie_geoThin, "rawlie_geoThin.csv", row.names = FALSE)
```
#### load background bioregions
```{r}
rawlie_bioregions <- st_read("/Volumes/PHOTOS 4T1/QGIS/Rawlinsoni/rawlie IBRA regions/rawlie_background_V3.shp")

ggplot() +
  geom_sf(data = rawlie_bioregions) +
  geom_point(data = rawlie.data, 
             aes(x = Longitude, 
                 y = Latitude)) +
  theme_bw()
```
#### Uploading all environmental variables
```{r}
# create template raster
raster_temp <- raster(rawlie_bioregions, res = 0.008333)

# resampling all variable to have same dimensions as template bioregion raster
# WORLDCLIM
worldclim <- stack(list.files(path = "/Volumes/PHOTOS 4T1/QGIS/wc2.1_30s_bio", 
                             full.names=TRUE)) %>% 
  resample(raster_temp, 
           method = "bilinear") %>% 
  mask(rawlie_bioregions)

#ENVIREM
envirem <- stack(list.files(path = "/Volumes/PHOTOS 4T1/QGIS/ENVIREM_elev_Australia_current_30arcsec_generic copy", 
                             pattern = ".tif", full.names=TRUE)) %>% 
  resample(raster_temp, 
           method = "bilinear") %>% 
  mask(rawlie_bioregions)

### NDVI

NDVI <- stack(list.files(path = "/Volumes/PHOTOS 4T1/QGIS/MODIS13 NDVI", 
                             pattern = ".tif", full.names=TRUE)) %>% 
  resample(raster_temp, 
           method = "bilinear") %>% 
  mask(rawlie_bioregions)

# DIST TO WATER LINES and BODIES
water_euclidean <- stack(list.files(path = "/Volumes/PHOTOS 4T1/QGIS/Water", 
                             pattern = ".tif", full.names=TRUE)) %>% 
  resample(raster_temp, 
           method = "bilinear") %>% 
  mask(rawlie_bioregions)

# soil moisture

soil_moisture <- stack(list.files(path = "/Volumes/PHOTOS 4T1/QGIS/Soil moisture", 
                             pattern = ".tif", full.names=TRUE)) %>% 
  resample(raster_temp, 
           method = "bilinear") %>% 
  mask(rawlie_bioregions)

# human index

human_index <- stack(list.files(path = "/Volumes/PHOTOS 4T1/QGIS/human index", 
                             pattern = ".tif", full.names=TRUE)) %>% 
  resample(raster_temp, 
           method = "bilinear") %>% 
  mask(rawlie_bioregions)

```
#### plot some varibles to check that they look good
```{r}

plot(worldclim)
plot(human_index)

```
#### save variables to a raster stack called env
```{r}
env <- stack(worldclim,
             envirem,
             NDVI,
             water_euclidean,
             soil_moisture,
             human_index)
names(env) <- c("bio1", 
                    "bio10",
                    "bio11",
                    "bio12",
                    "bio13",
                    "bio14",
                    "bio15",
                    "bio16",
                    "bio17",
                    "bio18",
                    "bio19",
                    "bio2",
                    "bio3",
                    "bio4",
                    "bio5",
                    "bio6",
                    "bio7",
                    "bio8",
                    "bio9",
                "aridity", "topowet", "tri","NDVI", "dist_to_waterbodies",  "dist_to_watercourses","soil_moisture","human_influence"
                    )

#dir.create("envStack")
#writeRaster(env, "./envStack/env.nc")
```
#### extract environmental data based on geothin
```{r}
rawlie_env <- raster::extract(env, rawlie_geoThin) %>% 
  cbind(rawlie_geoThin) %>% 
  na.omit()

rawlie_env

```
#### check for correlations among variables
```{r}
rawlie_env %>% 
  cor(method="pearson") %>% 
  as.data.frame()

```
#### PCA to check for correlations among variables
```{r}
s.corcircle(dudi.pca(rawlie_env[-c(28,29)],
                     scannf = F,
                     nf = 2)$co, 
            xax = 1,
            yax = 2,
            grid = FALSE)
```
#### Variance inflation factor test (-28,29) is removing the last two columns from the data fram which are lat and long
```{r}
vif(as.data.frame(rawlie_env[-c(28,29)]))

vifstep(as.data.frame(rawlie_env[-c(28,29)]))

vifcor(as.data.frame(rawlie_env[-c(28,29)]),
       th = 0.7)
```
#### loading in stack of variables AGAIN
```{r}
#run1
env <- brick("./envStack/env.nc")
names(env) <- c("bio1", 
                    "bio10",
                    "bio11",
                    "bio12",
                    "bio13",
                    "bio14",
                    "bio15",
                    "bio16",
                    "bio17",
                    "bio18",
                    "bio19",
                    "bio2",
                    "bio3",
                    "bio4",
                    "bio5",
                    "bio6",
                    "bio7",
                    "bio8",
                    "bio9",
                "aridity", "topowet", "tri","NDVI", "dist_to_waterbodies",  "dist_to_watercourses","soil_moisture","human_influence"
                    )
```
#### subsetting based on desired variables
```{r}
#run2
env_ps <- env[[c("bio5",
                 "bio6",
                 "bio15",
                 "aridity", 
                 "NDVI", 
                 "topowet",
                 "soil_moisture")]]
plot(env_ps)
#summary(env_ps[[7]])
plot(env_ps[["soil_moisture"]])
plot(env_ps[["NDVI"]])
plot(env_ps[["aridity"]])
plot(env_ps[["bio5"]])
plot(env_ps[["bio6"]])
plot(env_ps[["bio15"]])
plot(env_ps[["topowet"]])
```
#### reading csv of occ and bg
```{r}
#run 3
rawlie_geoThin <- read.csv("rawlie_geoThin.csv")
rawlie_bg <- read.csv("rawlie_bg.csv")
```
#### extract enviro data from rawlie_geothin points based on smaller list of new preselected variables
```{r}
rawlie_env <- raster::extract(env_ps, rawlie_geoThin) %>% 
  cbind(rawlie_geoThin) %>% 
  na.omit()

rawlie_env
```

```{r}
rawlie_env %>% 
  cor(method="pearson") %>% 
  as.data.frame()

```

```{r}
s.corcircle(dudi.pca(rawlie_env[-c(8,9)],
                     scannf = F,
                     nf = 2)$co, 
            xax = 1,
            yax = 2,
            grid = FALSE)
```

```{r}
vif(as.data.frame(rawlie_env[-c(8,9)]))

vifstep(as.data.frame(rawlie_env[-c(8,9)]))

vifcor(as.data.frame(rawlie_env[-c(8,9)]),
       th = 0.72)
```
#### Visualizing spatial autocorrelation using mantel r correlogram

Spatial autocorrelation is not significantly different from zero in many distances
```{r}
rawlie_correlogram <- rawlie_geoThin %>% 
  st_as_sf(coords = c("Longitude", "Latitude"),
           crs = 4326) %>% #converting projection from lat/long to metres for better interpretation
  st_transform(crs = 3112) %>% 
  st_coordinates() %>% 
  cbind(raster::extract(env_ps, rawlie_geoThin)) %>% 
  cbind(rawlie_geoThin) %>% 
  na.omit()

ecospat.mantel.correlogram(dfvar = rawlie_correlogram, 
                             colxy = 1:2, 
                             n = 100, 
                             colvar = 3:10, 
                             max = 10000, 
                             nclass = 50, 
                             nperm = 100)
```
#### GENERATE BG POINTS 
```{r}
library(spatstat)

#reference any one of your rasters so that the environmental raster has same properties as bias raster
raster_ref <- env_ps[["aridity"]]

samplingSites <- rawlie_geoThin %>% 
  st_as_sf(coords = c("Longitude", "Latitude"),
           crs = 4326) %>% #converting projection from lat/long to metres for better interpretation
  st_transform(crs = 3112) %>% 
                 st_coordinates() %>% 
                 data.frame()

shp <- rawlie_bioregions %>% 
        st_transform(3112) %>% 
  st_union()
```
#### Visualize reference data

Reference raster
```{r}
plot(raster_ref)
```
#### Sampling sites
```{r}
ggplot() +
  geom_sf(data = shp %>% st_as_sf()) +
  geom_sf(data = samplingSites %>% 
                  st_as_sf(coords = c("X", "Y"), 
                           crs = 3112)) +
  theme_minimal()

```
#### Posterio Analysis

#Point-pattern process transformation

Create a Point-pattern process (PPP) using the raw data. Here, i used the Lord Howe Island shapefile as "window", and the coordinates of the samplign sites as "x = X" (originally "Longitude" in raw file) and "Y = y" (originally "Latitude" in raw file).

```{r}
win <- as.owin(shp)
ppp <- ppp(x = pull(dplyr::select(samplingSites, X)),
           y = pull(dplyr::select(samplingSites, Y)),
           window = win)
```

#### Visualize PPP
```{r}
plot(ppp)

```
#### Compute the bandwidth
I used cross-validation to select a smoothing bandwidth for the kernel estimation of point process intensity. The bandwidth σ is chosen to minimise the mean-square error criterion defined by Diggle (1985).
```{r}
bw.diggle(X = ppp)

#sigma value is kilometres radius when smoothing the densities of occurences
#dont report if specifying your own bandwidth (which you are)
```
#### Compute the density of PPP
Here, the bandwidth was computed using cross-validation; edge correction was enabled and used diggle method. I then converted the bias file into raster with a GDA92 projection.
```{r, message = FALSE}
biasFile <- raster(density.ppp(ppp,
                               kernel = "gaussian",
                               sigma = 50000,
                               edge = TRUE,
                               diggle = TRUE),
                   crs = "EPSG:3112")

raster::crs(biasFile) <- "EPSG:3112"
```
#### Visualize density map / bias file RUN AS FULL CHUNK
```{r}
plot(biasFile)
plot(shp, 
     color = NA, 
     add = TRUE)
plot(samplingSites %>% 
       st_as_sf(coords = c("X", "Y")),
     add = TRUE)
```
#### resample the bias file, to align with the reference raster.

Maxent requires all raster files (environemntal rasters and bias file) to have the same projection, resolution, and exent. Basically, if you cant stack() the raster files, maxent won't run.

here, I first reprojected the bias file to a WGS 84 (reference raster projection), and then resampled the bias file based on the resolution and extent of the reference raster using a bilinear resampling method.

```{r}
biasFile_export <- biasFile %>%
                     projectRaster(crs = "EPSG:4326") %>% 
                     resample(y = raster_ref,
                              method = "bilinear")
```
#### Plot bias file
```{r}
plot(biasFile_export)
```
#### Double checking if the bias file has the same properties as that of the reference raster

```{r}
#crs
proj4string(raster_ref) == proj4string(biasFile_export)
#resolution
res(raster_ref) == res(biasFile_export)
#extent
extent(raster_ref) == extent(biasFile_export)
```
#### write bias file
Finally, the bias file is written in the "./output" folder of the R project.
```{r}
writeRaster(biasFile_export,
            "/Volumes/PHOTOS 4T1/QGIS/Rawlinsoni_project/biasfile.tif",
            overwrite = TRUE)

summary(biasFile_export)
```
#### Make all NAs 0, and then make all negative 0
```{r}
values(biasFile_export)[is.na(values(biasFile_export))] <- 0

values(biasFile_export)[which(values(biasFile_export) < 0)] <- 0

```
#### making the bias background
```{r}
rawlie_bg_bias <- xyFromCell(biasFile_export, sample(ncell(biasFile_export), 10000, prob = values(biasFile_export))) 

rawlie_bg_bias <- rawlie_bg_bias %>% 
  as_data_frame() %>% 
  rename(Longitude = x,
         Latitude = y)
  

plot(rawlie_bg_bias)
#write.csv(rawlie_bg_bias, "rawlie_bg_bias.csv", row.names = FALSE)
```
#### run model
```{r}
save.image()

rawlie_enm <- ENMevaluate(occs = rawlie_geoThin, 
                          envs = env_ps, 
                          bg = rawlie_bg_bias, 
                          tune.args = list(rm=c(1:3), 
                                           fc=c('L','LQ', 'LQH', 'H')), 
                          algorithm='maxent.jar',
                          partitions = "checkerboard2", numCores = 6, parallel=TRUE)

save.image()
```
#### checking best model
```{r}
x <- rawlie_enm@results %>% 
  arrange(delta.AICc)
x
#write.csv(x, "Model_eval.csv")
```
#### variable importance and response
```{r}
library(ENMeval)
plot(eval.models(rawlie_enm)[["rm.1_fc.LQ"]],type = "cloglog") 
response(eval.models(rawlie_enm)[["rm.1_fc.LQ"]]) 
```
#### plot prediction
```{r}
plot(eval.predictions(rawlie_enm)[["rm.1_fc.LQ"]]) 
points(rawlie_geoThin)
```
#### save prediction as a tif file
```{r}
raster::writeRaster(eval.predictions(rawlie_enm)[["rm.1_fc.LQ"]], "rawlie.SDM.bias.corrected.tif", 
            format = "GTiff", overwrite = TRUE)

save.image()
```
#### tranforming into binary map
```{r}
#### extract fitted scores of background an occurrence points

# combine bg and geo thin points
temp <- bind_rows(rawlie_geoThin %>% 
            mutate(rawlie = 1),
          rawlie_bg_bias %>% 
            mutate(rawlie = 0))
#extract predictions based on both occ and bg points
fitted_df <- raster::extract(eval.predictions(rawlie_enm)[["rm.1_fc.LQ"]], temp[,1:2]) %>% 
  as.data.frame() %>% 
  rename("fitted" = ".") %>% 
  bind_cols(temp) %>% 
  na.omit()
```

#### tranforming into binary map (continued)
```{r}
# look for threatshold value (PUT "cutoff" value into the next few lines of code)
library(biomod2)
Find.Optim.Stat(Stat="ROC",
                Fit = fitted_df[, "fitted"],
                Obs = fitted_df[, "rawlie"])

#transform prediction in to binary maps
plot(BinaryTransformation(eval.predictions(rawlie_enm)[["rm.1_fc.LQ"]], 0.5867845))

#save
raster::writeRaster(BinaryTransformation(eval.predictions(rawlie_enm)[["rm.1_fc.LQ"]], 0.5867845), "rawlie.binary.bias.corrected.tif", 
            format = "GTiff", overwrite = TRUE)

```
#### save image
```{r}

save.image()
```


