### ### R Script for biogeographic analysis in Baird et al 2021: ### "50 Million Years of Beetle Evolution Along the Antarctic Polar Front", ### using BioGeoBEARS package (Nick Matzke, https://github.com/nmatzke/BioGeoBEARS) ### ################################################################################ ################################################################################ library(GenSA) library(FD) library(snow) library(parallel) library(rexpokit) library(cladoRcpp) library(BioGeoBEARS) trfn = "BGB_Beast_final_input_tree.newick" tr = read.tree(trfn) geogfn = "BGB_geog_input.data" tipranges = getranges_from_LagrangePHYLIP(lgdata_fn=geogfn) tipranges max(rowSums(dfnums_to_numeric(tipranges@df))) max_range_size = 4 ################## DEC Model, stratified ###################### BioGeoBEARS_run_object = define_BioGeoBEARS_run() BioGeoBEARS_run_object$geogfn = geogfn BioGeoBEARS_run_object$trfn = trfn BioGeoBEARS_run_object$max_range_size = max_range_size BioGeoBEARS_run_object$min_branchlength = 0.000001 BioGeoBEARS_run_object$include_null_range = TRUE BioGeoBEARS_run_object$timesfn = "BGB_times_input.txt" BioGeoBEARS_run_object$areas_allowed_fn = "BGB_Areas_Allowed_input.txt" BioGeoBEARS_run_object$on_NaN_error = -1e50 BioGeoBEARS_run_object$speedup = TRUE BioGeoBEARS_run_object$use_optimx = "GenSA" BioGeoBEARS_run_object$num_cores_to_use = 1 BioGeoBEARS_run_object$force_sparse = FALSE BioGeoBEARS_run_object = readfiles_BioGeoBEARS_run(BioGeoBEARS_run_object) BioGeoBEARS_run_object = section_the_tree(inputs=BioGeoBEARS_run_object, make_master_table=TRUE, plot_pieces=FALSE) BioGeoBEARS_run_object$return_condlikes_table = TRUE BioGeoBEARS_run_object$calc_TTL_loglike_from_condlikes_table = TRUE BioGeoBEARS_run_object$calc_ancprobs = TRUE BioGeoBEARS_run_object check_BioGeoBEARS_run(BioGeoBEARS_run_object) runslow = TRUE resfn = "Beast_final_DEC_stratified.Rdata" if (runslow) { res = bears_optim_run(BioGeoBEARS_run_object) res save(res, file=resfn) resDEC = res } else { # Loads to "res" load(resfn) resDEC = res } ################ Adding a J parameter (DEC+J Model, stratified) #################### dstart = resDEC$outputs@params_table["d","est"] estart = resDEC$outputs@params_table["e","est"] jstart = 0.0001 BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["d","init"] = dstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["d","est"] = dstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["e","init"] = estart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["e","est"] = estart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","type"] = "free" BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","init"] = jstart BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["j","est"] = jstart check_BioGeoBEARS_run(BioGeoBEARS_run_object) resfn = "Beast_final_DEC+J_stratified.Rdata" runslow = TRUE if (runslow) { #sourceall("/Dropbox/_njm/__packages/BioGeoBEARS_setup/") res = bears_optim_run(BioGeoBEARS_run_object) res save(res, file=resfn) resDECj = res } else { # Loads to "res" load(resfn) resDECj = res } ################ Adding an X parameter (DEC+J+X Model, stratified) #################### BioGeoBEARS_run_object$BioGeoBEARS_model_object@params_table["x","type"] = "free" BioGeoBEARS_run_object$distsfn = "BGB_Distances_input.txt" BioGeoBEARS_run_object = readfiles_BioGeoBEARS_run(BioGeoBEARS_run_object) BioGeoBEARS_run_object resfn = "Beast_final_DEC+J+X_stratified.Rdata" runslow = TRUE if (runslow) { #sourceall("/Dropbox/_njm/__packages/BioGeoBEARS_setup/") res = bears_optim_run(BioGeoBEARS_run_object) res save(res, file=resfn) resDECjx = res } else { # Loads to "res" load(resfn) resDECjx = res } ################ Plotting ancestral reconstructions based on each of the 3 models ################### pdffn = "Beast_final_DEC.pdf" pdf(pdffn, width=12, height=10) results_object = resDEC scriptdir = np(system.file("extdata/a_scripts", package="BioGeoBEARS")) plot_BioGeoBEARS_results(results_object, addl_params=list("j"), plotwhat="pie", label.offset=0.6, tipcex=0.8, statecex=0.6, splitcex=0.4, titlecex=0.8, plotsplits=FALSE, cornercoords_loc=scriptdir, include_null_range=TRUE, tr=tr, tipranges=tipranges) dev.off() pdffn = "Beast_final_DEC+J.pdf" pdf(pdffn, width=12, height=10) results_object = resDECj plot_BioGeoBEARS_results(results_object, addl_params=list("j"), plotwhat="pie", label.offset=0.6, tipcex=0.8, statecex=0.6, splitcex=0.4, titlecex=0.8, plotsplits=FALSE, cornercoords_loc=scriptdir, include_null_range=TRUE, tr=tr, tipranges=tipranges) dev.off() pdffn = "Beast_final_DEC+J+X.pdf" pdf(pdffn, width=12, height=10) results_object = resDECjx plot_BioGeoBEARS_results(results_object, addl_params=list("j"), plotwhat="pie", label.offset=0.6, tipcex=0.8, statecex=0.6, splitcex=0.4, titlecex=0.8, plotsplits=FALSE, cornercoords_loc=scriptdir, include_null_range=TRUE, tr=tr, tipranges=tipranges) dev.off() ################ Biogeographic Stochastic Mapping (using DEC+J+X model) ################### model_name = "DEC+J+X" res = resDECjx clado_events_tables = NULL ana_events_tables = NULL lnum = 0 BSM_inputs_fn = "BSM_inputs_file.Rdata" runInputsSlow = TRUE if (runInputsSlow) { stochastic_mapping_inputs_list = get_inputs_for_stochastic_mapping(res=res) save(stochastic_mapping_inputs_list, file=BSM_inputs_fn) } else { # Loads to "stochastic_mapping_inputs_list" load(BSM_inputs_fn) } # END if (runInputsSlow) runBSMslow = TRUE if (runBSMslow == TRUE) { # Saves to: RES_clado_events_tables.Rdata # Saves to: RES_ana_events_tables.Rdata BSM_output = runBSM(res, stochastic_mapping_inputs_list=stochastic_mapping_inputs_list, maxnum_maps_to_try=100, nummaps_goal=100, maxtries_per_branch=40000, save_after_every_try=TRUE, savedir=getwd(), seedval=12345, wait_before_save=0.01) RES_clado_events_tables = BSM_output$RES_clado_events_tables RES_ana_events_tables = BSM_output$RES_ana_events_tables } else { # Load previously saved... # Loads to: RES_clado_events_tables load(file="RES_clado_events_tables.Rdata") # Loads to: RES_ana_events_tables load(file="RES_ana_events_tables.Rdata") BSM_output = NULL BSM_output$RES_clado_events_tables = RES_clado_events_tables BSM_output$RES_ana_events_tables = RES_ana_events_tables } # END if (runBSMslow == TRUE) clado_events_tables = BSM_output$RES_clado_events_tables ana_events_tables = BSM_output$RES_ana_events_tables head(clado_events_tables[[1]]) head(ana_events_tables[[1]]) length(clado_events_tables) length(ana_events_tables) include_null_range = TRUE areanames = names(tipranges@df) areas = areanames max_range_size = 4 length(clado_events_tables) length(ana_events_tables) head(clado_events_tables[[1]][,-20]) tail(clado_events_tables[[1]][,-20]) head(ana_events_tables[[1]]) tail(ana_events_tables[[1]]) areanames = names(tipranges@df) actual_names = areanames actual_names dmat_times = get_dmat_times_from_res(res=res, numstates=NULL) dmat_times clado_events_tables = BSM_output$RES_clado_events_tables ana_events_tables = BSM_output$RES_ana_events_tables BSMs_w_sourceAreas = simulate_source_areas_ana_clado(res, clado_events_tables, ana_events_tables, areanames) clado_events_tables = BSMs_w_sourceAreas$clado_events_tables ana_events_tables = BSMs_w_sourceAreas$ana_events_tables counts_list = count_ana_clado_events(clado_events_tables, ana_events_tables, areanames, actual_names) summary_counts_BSMs = counts_list$summary_counts_BSMs print(conditional_format_table(summary_counts_BSMs)) hist_event_counts(counts_list, pdffn=paste0(model_name, "_histograms_of_event_counts.pdf")) tmpnames = names(counts_list) cat("\n\nWriting tables* of counts to tab-delimited text files:\n(* = Tables have dimension=2 (rows and columns). Cubes (dimension 3) and lists (dimension 1) will not be printed to text files.) \n\n") for (i in 1:length(tmpnames)) { cmdtxt = paste0("item = counts_list$", tmpnames[i]) eval(parse(text=cmdtxt)) # Skip cubes if (length(dim(item)) != 2) { next() } outfn = paste0(tmpnames[i], ".txt") if (length(item) == 0) { cat(outfn, " -- NOT written, *NO* events recorded of this type", sep="") cat("\n") } else { cat(outfn) cat("\n") write.table(conditional_format_table(item), file=outfn, quote=FALSE, sep="\t", col.names=TRUE, row.names=TRUE) } # END if (length(item) == 0) } # END for (i in 1:length(tmpnames)) cat("...done.\n")