### ### R Script for SNP filtering and SNP-based phylogeographic analysis of the weevil Palirhoeus eatoni ### in Baird et al 2021: ### "50 Million Years of Beetle Evolution Along the Antarctic Polar Front" ### NB: filtering parameters used here are the final parameters published, ### other filters were applied by altering the thresholds/parameters of the filters. ################################################################################ ################################################################################ library(dartR) library(devtools) library(ggplot2) library(radiator) library(ncf) library(vegan) library(adegenet) gl <- gl.read.dart(filename=“data.csv”) #loading the csv file from DArT into R and into a genlight object populations<-read.table(“pop.csv”) #loading the list of populations for each individual gl@pop<-populations #adding population labels to the genlight object levels(pop(gl)) #checking the populations are correct #Filtering SNPs gl80 <- gl.filter.callrate(gl, method = "loc", threshold = 0.8) #at least 80% of loci called gl80<-gl.filter.secondaries(gl80) #secondary reads from the same fragment were removed (this was done following the 80% filter as the process is random, and therefore it is possible some SNPs with higher callrates than another SNP on the same fragment would have been removed gl80maf<-filter_maf(gl80, maf.thresholds=c(“SNP”, 0.2, “OR”, 0.02, 1) #minor allele frequency filter (this was done in radiator) which means minor allele frequency of the SNP had to be either 20% locally OR 2% globally to be considered gl8050maf <- gl.filter.callrate(gl80maf, method="ind", threshold = 0.5) #50% of individuals are called for each loci (note this reduces the # individuals, not SNPs) gl8050maf<-gl.filter.monomorphs(gl8050maf) #excludes all monomorphic loci #Object conversion gi8050maf<-gl2genind(gl8050maf) fasta8050maf<-gl2fasta(gl8050maf) #PCoA - NB: all figures were saved as PDFs and imported into Adobe Illustrator for manipulation pc8050maf <- gl.pcoa(gl8050maf, nfactors=5) gl.pcoa.plot(pc8050maf, gl8050maf, labels="pop", xaxis=1, yaxis=2) #Interactive PCoA (used to confirm individual placement when needed) gl.pcoa.plot(pc8050maf, gl8050maf, labels="interactive", xaxis=1, yaxis=2) ggplotly() #3D plot gl.pcoa.plot.3d(pc8050maf, gl8050maf) #Hardy-Wienberg-Equilibrium calcuations - exported as a csv file hwe<-gl.report.hwe(gl8050maf, bonf = TRUE) write.csv(hwe, “filepath/hwe.csv”) ##PGDSpider2 used to generate input file for Bayescan (GESTE/BayeScan format from structure file) ##BayeScan2 GUI was used with a false discovery rate of 1% (alpha=0.01) ##The loci putatively identified as under selection (Bayescan output) and those found to violate hwe (gl.report.hwe output) were loaded into Excel, sorted by ID number, and compared #File conversion and export for other programs - using radiator structure<-genomic_converter(gl8050maf, strata = NULL, output = c(“structure”), filename=“gl8050maf.structure”, parallel.core = parallel::detectCores() - 1, verbose = TRUE) #Partial Mantel tests of Isolation-By-Distance x <- matrix_of_geographic_distances #Note this is provided in the Supplementary Material y <- matrix_of_genetic_distances #Note these were Fst values as calculated in GenoDive, also provided in the Supplementary Material z <- matrix_of_cluster_membership partial.mantel.test(M1=x,M2=y, M3=z, resamp=1000, method="pearson") #This tests for a correlation between geographic distance and genetic distance, correcting for genetic cluster partial.mantel.test(M1=z,M2=y, M3=x, resamp=1000, method="pearson") #This tests for a correlation between cluster and genetic distance, correcting for geographic distance #Fixed allelic differences analysis gl.fixed.diff(gl8050maf, test=TRUE, pb=TRUE) # The ouptut of this analysis is a csv file containing a matrix of fixed allelic differences between each pair of populations