diff --git a/DESCRIPTION b/DESCRIPTION index 3263b24..225793d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,16 +17,15 @@ Depends: Imports: assertthat, methods, - readr, jsonlite, - stringr, tidytree, utils, gh, curl, cli, - magrittr, - bnpsd + bnpsd, + rlang, + phangorn Suggests: devtools, ggplot2, @@ -40,8 +39,7 @@ Suggests: treeio (>= 1.32.0), rmarkdown, pandoc, - dplyr, - rlang + dplyr VignetteBuilder: knitr Config/Needs/website: rmarkdown Remotes: diff --git a/NAMESPACE b/NAMESPACE index 1ee4684..1f00e18 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,26 +1,25 @@ # Generated by roxygen2: do not edit by hand -export("%>%") export(tp_compile) export(tp_data) -export(tp_fp_fetch) +export(tp_expected_input) export(tp_json_to_phylo) -export(tp_list) export(tp_map_tree) export(tp_model) -export(tp_model_names) -export(tp_parse) +export(tp_model_library) export(tp_parse_host_rep) +export(tp_parse_mcmc) +export(tp_parse_smc) export(tp_phylo_to_tpjson) export(tp_run) +export(tp_run_options) export(tp_smc_convergence) -export(tp_stored_compiled) -export(tp_stored_data) -export(tp_stored_model) export(tp_tempdir) -export(tp_treeppl) export(tp_treeppl_json) -export(tp_write) +export(tp_write_data) +export(tp_write_model) importFrom(bnpsd,tree_reorder) -importFrom(magrittr,"%>%") importFrom(methods,is) +importFrom(phangorn,add_edge_length) +importFrom(phangorn,allCompat) +importFrom(rlang,.data) diff --git a/R/checker.R b/R/checker.R deleted file mode 100644 index 91eb2d9..0000000 --- a/R/checker.R +++ /dev/null @@ -1,14 +0,0 @@ -#' Check input for inference with TreePPL -#' -#' This function checks the input for `tp_go()`. -#' -#' @param model A TreePPL model (S3) -#' @param data A list with each item name accordingly to the data structure in the -#' model. -#' -#' @return ... -#' -tp_check_input <- function(model, data ) { - - -} diff --git a/R/compile.R b/R/compile.R new file mode 100644 index 0000000..fe41425 --- /dev/null +++ b/R/compile.R @@ -0,0 +1,176 @@ +#' Options that can be passed to TreePPL compiler +#' +#' @returns A string with the output from the compiler's help +#' +tp_compile_options <- function() { + + #### under development #### + + # text from tpplc --help + return() + +} + + + +#' Compile a TreePPL model and create inference machinery +#' +#' @description +#' `tp_compile` compile a TreePPL model and create inference machinery to be +#' used by [treepplr::tp_run]. +#' +#' @param model One of tree options: +#' * The full path of the model file that contains the TreePPL code, OR +#' * A string with the name of a model supported by treepplr +#' (see [treepplr::tp_model_library()]), OR +#' * A string containing the entire TreePPL code. +#' @param method Inference method to be used. See tp_compile_options() +#' for all supported methods. +#' @param iterations The number of MCMC iterations to be run. +#' @param particles The number of SMC particles to be run. +#' @param dir The directory where you want to save the executable. Default is +#' [base::tempdir()] +#' @param output Complete path to the compiled TreePPL program that will be +#' created. Default is dir/.exe +#' @param ... See tp_compile_options() for all supported arguments. +#' +#' @return The path for the compiled TreePPL program. +#' @export + +tp_compile <- function(model, + method, + iterations = NULL, + particles = NULL, + dir = NULL, + output = NULL, + ...) { + + options(scipen=999) + + if(is.null(dir)){ + dir_path <- tp_tempdir() + } else { + dir_path <- dir + } + + # check path, find or write model + model_file_name <- tp_model(model) + + musts <- paste("-m", method) + + if(is.null(iterations) & is.null(particles)){ + stop("If using MCMC, please choose number of iterations. + If using SMC, please choose number of particles.") + } + + if(!is.null(iterations)){ + musts <- paste(musts, "--particles", iterations) + } + + if(!is.null(particles)){ + musts <- paste(musts, "--particles", particles) + } + + # create string with other options to tpplc + args_list <- list(...) + args_vec <- unlist(args_list) + + vec <- c() + for(i in seq_along(args_vec)) { + str <- paste0("--", names(args_vec[i]), " ", args_vec[[i]]) + vec <- c(vec, str) + } + + args_str <- paste(vec, collapse = " ") + + # output + if(is.null(output)){ + output_path <- paste0(dir_path, names(model_file_name), ".exe") + } else { + output_path <- output + } + + options <- paste("--output", output_path, args_str) + + # Preparing the command line program + tpplc_path <- installing_treeppl() #### move this? #### + command <- paste(tpplc_path, model_file_name, musts, options) + + # Compile program + # Empty LD_LIBRARY_PATH from R_env for this command specifically + # due to conflict with internal env from treeppl self-contained + system(paste0("LD_LIBRARY_PATH= MCORE_LIBS= ", command)) + + return(output_path) +} + + + +#' Import a TreePPL model +#' +#' @description +#' `tp_model` takes TreePPL code and prepares it to be used by +#' [treepplr::tp_compile()]. +#' +#' @param model_input One of tree options: +#' * The full path of the model file that contains the TreePPL code, OR +#' * A string with the name of a model supported by treepplr +#' (see [treepplr::tp_model_library()]), OR +#' * A string containing the entire TreePPL code. +#' +#' @return The path to the TreePPL model file +#' @export +tp_model <- function(model_input) { + + if (!assertthat::is.string(model_input)){ + stop("Input has to be a sring.") + } + + res <- try(file.exists(model_input), silent = TRUE) + + # If path exists, it's all good + if (!is(res,"try-error") && res) { + model_path <- model_input + # name the model + if (is.null(names(model_path))){ + names(model_path) <- "custom_model" + } + + # If path doesn't exist + } else { + # It can be a model name in the library + res_lib <- try(tp_find_model(model_input), silent = TRUE) + # model_input has the name of a known model + if (!is(res_lib,"try-error") && length(res_lib) != 0) { + model_path <- res_lib + names(model_path) <- model_input + # OR model_input contains the model + #### (needs to be verified as an appropriate model later) #### + } else { + model_path <- tp_write_model(model_input) + names(model_path) <- "custom_model" + } + } + return(model_path) +} + + + + +#' Write out a custom model to tp_tempdir() +#' +#' @param model A string containing the entire TreePPL code. +#' @param model_file_name An optional name to the file created +#' +#' @returns The path to the file created +#' @export +#' +tp_write_model <- function(model, model_file_name = "tmp_model_file") { + + path <- paste0(tp_tempdir(), model_file_name, ".tppl") + cat(model, file = path) + + return(path) + +} + diff --git a/R/tree_convertion.R b/R/data.R similarity index 59% rename from R/tree_convertion.R rename to R/data.R index f77ecfd..47fd0e5 100644 --- a/R/tree_convertion.R +++ b/R/data.R @@ -1,3 +1,220 @@ + +#' List expected input variables for a model +#' +#' @param model A TreePPL model +#' +#' @returns The expected input data for a given TreePPL model. +#' @export +#' +tp_expected_input <- function(model) { + + #### under development here and in treeppl #### + +} + + + +#' Import data for TreePPL program +#' +#' @description +#' Prepare data input for [treepplr::tp_run()]. +#' +#' @param data_input One of the following options: +#' * A named list (or structured list) containing TreePPL data, OR +#' * The full path of a multiple sequence alignment in fasta (.fasta, .fas) +#' or nexus (.nexus, .nex) format, OR +#' * For test data, a string with the name of a model supported by treepplr +#' (see [treepplr::tp_model_library()]). +#' @param data_file_name An optional name for the file created. Ignored if `data_input` +#' is the name of a model from the TreePPL library. +#' @param dir The directory where you want to save the data file in JSON format. +#' Default is [base::tempdir()]. Ignored if `data_input` is the name of a model +#' from the TreePPL library. +#' +#' @return The path for the data file that will be used by [treepplr::tp_run()]. +#' +#' @details +#' `data_input`: The name of each list element has to match the name of a model +#' input, which is defined in the TreePPL model code. +#' +#' @export +#' @examples +#' \dontrun{ +#' # Example using a model name supported by TreePPL +#' input <- tp_data("tree_inference") +#' input +#' +#' # Example using an internal FASTA file (same input data as before, but in fasta format) +#' fasta_file <- system.file("extdata", "tree_inference.fasta", package = "treepplr") +#' input <- tp_data(fasta_file) +#' input +#' } +#' +tp_data <- function(data_input, data_file_name = "tmp_data_file", dir = tp_tempdir()) { + + #### TODO data inputs have to be named as it is expected in the model #### + + if (assertthat::is.string(data_input)) { + + if (grepl("\\.(fasta|fas|nexus|nex)$", data_input, ignore.case = TRUE)) { + data <- read_aln(data_input) + data_list <- tp_list(list(data)) + data_path <- tp_write_data(data_list, data_file_name, dir) + + } else { + + res_lib <- tp_find_data(data_input) + + # model_input has the name of a known model + if (length(res_lib) != 0) { + data_path <- res_lib + + } else { + stop("Invalid input string.") + } + } + + # OR data_input is a list (or a structured list) + } else if (is.list(data_input)) { + + # flatten the list + data_list <- tp_list(data_input) + # write json with input data + data_path <- tp_write_data(data_list, data_file_name, dir) + + } else { + stop("Unknow R type (not a valid path, known data model, or list") + } + + return(data_path) + +} + + +#' Write data to file +#' +#' @param data_list A named list of data input +#' @param data_file_name An optional name for the file created +#' @param dir The directory where you want to save the data file in JSON format. +#' Default is [base::tempdir()]. +#' +#' @returns The path to the created file +#' +#' @export +tp_write_data <- function(data_list, data_file_name = "tmp_data_file", dir = tp_tempdir()) { + + input_json <- jsonlite::toJSON(data_list, auto_unbox=TRUE) + path <- paste0(dir, data_file_name, ".json") + write(input_json, file = path) + + return(path) + +} + + +#' Create a flat list +#' +#' @description +#' `tp_list` takes a variable number of arguments and returns a list. +#' +#' @param ... Variadic arguments (see details). +#' +#' @details +#' This function takes a variable number of arguments, so that users can pass as +#' arguments either independent lists, or a single structured +#' list of list (name_arg = value_arg). +#' +#' @return A list. +tp_list <- function(...) { + dotlist <- list(...) + + if (length(dotlist) == 1L && is.list(dotlist[[1]])) { + dotlist <- dotlist[[1]] + } + + dotlist +} + + + +# UTILS: Data conversion + + +## Sequence data + +# Read alignment in FASTA or NEXUS (for tree inference) +read_aln <- function(file) { + # define the encoding + # NB: everything that is not ACTG will be replace with gap ("-") within the function + base_code <- c( + "A" = 0, + "C" = 1, + "G" = 2, + "T" = 3, + "-" = 4 + ) + + # Print an error message if the input is not in fasta or nexus + if (!grepl("\\.(fasta|fas|nexus|nex)$", file, ignore.case = TRUE)) { + stop("Please, provide an input file in fasta or nexus format") + } + + # If the input is a FASTA file + else if (grepl("\\.(fasta|fas)$", file, ignore.case = TRUE)) { + raw <- readLines(file, warn = FALSE) + raw <- raw[nzchar(raw)] # remove empty lines, if any + #nm <- gsub(">", "", raw[grepl(">", raw)]) # sequence names + sq <- raw[!grepl(">", raw)] # sequences + # sequence matrix + sq_list <- strsplit(sq, "") + sq_mat <- do.call(rbind, sq_list) + sq_mat <- toupper(sq_mat) + sq_mat[] <- gsub("[^ACGT]", "-", sq_mat, ignore.case = TRUE) # replace everything that is not ACTG with "-" + # numerical matrix + num_mat <- matrix( + base_code[sq_mat], + nrow = nrow(sq_mat), + ncol = ncol(sq_mat) + #dimnames = list(nm, NULL) + ) + # final list + res <- list(data = num_mat) + return(res) + + # If the input is a NEXUS file + } else if (grepl("\\.(nexus|nex)$", file, ignore.case = TRUE)) { + # nexus matrix block + raw <- readLines(file, warn = FALSE) + raw <- raw[nzchar(raw)] # remove empty lines, if any + start <- grep("^[[:space:]]*matrix[[:space:]]*$", tolower(raw)) + end <- grep(";", raw) + end <- end[end > start][1] + mat <- raw[(start + 1):(end - 1)] # extract contents + mat <- trimws(mat) + mat <- mat[nzchar(mat)] # remove empty entries + #nm <- sub("\\s+.*$", "", mat) # sequence names (split on the 1st white space) + sq <- sub("^\\S+\\s+", "", mat) # sequences + # sequence matrix + sq_list <- strsplit(sq, "") + sq_mat <- do.call(rbind, sq_list) + sq_mat <- toupper(sq_mat) + sq_mat[] <- gsub("[^ACGT]", "-", sq_mat, ignore.case = TRUE) # replace everything that is not ACTG with "-" + # numerical matrix + num_mat <- matrix( + base_code[sq_mat], + nrow = nrow(sq_mat), + ncol = ncol(sq_mat) + #dimnames = list(nm, NULL) + ) + # final list + res <- list(data = num_mat) + return(res) + } +} + + +## Phylogenetic trees + #' Convert phylo obj to TreePPL tree #' #' @description @@ -15,7 +232,6 @@ #' @return A TreePPL json str #' #' @export - tp_phylo_to_tpjson <- function(phylo_tree, age = "") { name <- "tree" @@ -41,8 +257,6 @@ tp_phylo_to_tpjson <- function(phylo_tree, age = "") { #' @param phylo_tree an object of class [ape::phylo]. #' #' @return A pair (root index, tppl_tree) -#' - tp_phylo_to_tppl_tree <- function(phylo_tree) { name <- deparse(substitute(phylo_tree)) @@ -193,7 +407,7 @@ rec_tree_list <- function(tree, row_index) { #' #' @param json_out One of two options: #' * A list of TreePPL output in parsed JSON format (output from -#' [treepplr::tp_treeppl()]), OR +#' [treepplr::tp_run()]), OR #' * The full path of the json file containing the TreePPL output. #' #' @return A list with two elements: @@ -304,7 +518,6 @@ build_newick_node <- function(node, parent_age) { } - # Function to ladderize tree and correct tip label sequence ladderize_tree <- function(tree, temp_file = "temp", orientation = "left"){ if(file.exists(paste0("./", temp_file))){ @@ -322,3 +535,7 @@ ladderize_tree <- function(tree, temp_file = "temp", orientation = "left"){ return(tree_lad) } + + + + diff --git a/R/getter.R b/R/getter.R deleted file mode 100644 index daa1961..0000000 --- a/R/getter.R +++ /dev/null @@ -1,86 +0,0 @@ -#' Import a TreePPL model -#' -#' @description -#' `tp_model` takes TreePPL code and prepares it to be used by -#' [treepplr::tp_treeppl()]. -#' -#' @param model_input One of tree options: -#' * The full path of the model file that contains the TreePPL code, OR -#' * A string with the name of a model supported by treepplr -#' (see [treepplr::tp_model_names()]), OR -#' * A string containing the entire TreePPL code. -#' -#' @return A TreePPL model (S3). A structured list with the -#' string containing the TreePPL model and a class -#' ([treepplr::tp_model_names()] or "custom") -#' @export -#' -tp_model <- function(model_input) { - class_model <- NULL - res <- try(file.exists(model_input), silent = TRUE) - # If path exists, import model from file - if (!is(res, "try-error") && res) { - model <- readr::read_file(model_input) - class_model <- "custom" - # If path doesn't exist - } else if (assertthat::is.string(model_input)) { - res <- - try(get(model_input, treepplr::tp_model_names()), silent = TRUE) - # model_input has the name of a known model - if (!is(res, "try-error")) { - model <- find_file(res, "tppl") - class_model <- res - # OR model_input is a string - # (needs to be verified as an appropriate model later) - } else { - model <- model_input - class_model <- "custom" - } - } - - if (!is.null(class_model)) { - class(model) <- class_model - model - } else { - stop("Unknow R type (not a valid path, a known data model, or a model)") - } -} - -#' Import data for TreePPL program -#' -#' @description -#' `tp_data` takes data and prepares it to be used by -#' [treepplr::tp_treeppl()]. -#' -#' @param data_input One of tree options: -#' * The full path of the JSON file that contains the data, OR -#' * A string with the name of a model supported by treepplr -#' (see [treepplr::tp_model_names()]), OR -#' * A list (or structured list) containing TreePPL data. -#' -#' @return a list, see [treepplr::tp_check_input()] for further details. -#' @export -#' -tp_data <- function(data_input) { - res <- try(file.exists(data_input), silent = TRUE) - # If path exists, import data from file - if (!is(res, "try-error") && res) { - data <- tp_list(jsonlite::fromJSON(data_input)) - # If path doesn't exist - } else if (assertthat::is.string(data_input)) { - res <- try(get(data_input, treepplr::tp_model_names()), silent = TRUE) - # data_input has the name of a known model - if (!is(res, "try-error")) { - data <- tp_list(find_file(res, "json")) - } - # OR data_input is a list (or a structured list) - } else if (is.list(data_input)) { - data <- tp_list(data_input) - } - - if (is(data, "list")) { - data - } else { - stop("Unknow R type (not a valid path, known data model, or list)") - } -} diff --git a/R/post_treatment.R b/R/post_treatment.R index 3637fde..0811318 100644 --- a/R/post_treatment.R +++ b/R/post_treatment.R @@ -1,36 +1,94 @@ -#' Parse simple TreePPL json output +#' Parse simple TreePPL json SMC output #' #' @description -#' `tp_parse` takes TreePPL json output and returns a data.frame +#' `tp_parse_smc` takes TreePPL json SMC output and returns a data.frame #' #' @param treeppl_out a character vector giving the TreePPL json output -#' produced by [tp_treeppl]. +#' produced by [tp_run] using an SMC method. +#' +#' @details +#' Particles with -Inf weight are removed. +#' #' #' @return A data frame with the output from inference in TreePPL. #' @export -tp_parse <- function(treeppl_out) { +tp_parse_smc <- function(treeppl_out) { + + result_df <- list() + + for (i in seq_along(treeppl_out)) { + + # remove sweeps with nan norm const + if (treeppl_out[[i]]$normConst == "nan"){ + print("Removing sweep without normalizing constant") + } else { + + samples_c <- unlist(treeppl_out[[i]]$samples) + log_weight_c <- unlist(treeppl_out[[i]]$weights) + + if(is.null(names(samples_c))){ + result_df <- rbind(result_df, + data.frame(sweep = i, + samples = samples_c, + log_weight = log_weight_c, + norm_constant = treeppl_out[[i]]$normConst) + ) + } else { + result_df <- rbind(result_df, + data.frame(sweep = i, + parameter = names(samples_c), + samples = samples_c, + log_weight = log_weight_c, + norm_constant = treeppl_out[[i]]$normConst) + ) + } + } + } + + # check if all sweeps were removed + if (length(result_df) == 0) { + stop("All sweeps failed") + } else{ + + result_df <- result_df |> + # remove particles with -Inf weight + dplyr::mutate(log_weight = as.numeric(.data$log_weight)) |> + dplyr::filter(!is.infinite(.data$log_weight)) |> + dplyr::mutate(total_lweight = .data$log_weight + .data$norm_constant) |> + dplyr::mutate(norm_weight = exp(.data$total_lweight - max(.data$total_lweight))) |> + dplyr::select(-"total_lweight") + } + return(result_df) +} + +#' Parse simple TreePPL json MCMC output +#' +#' @description +#' `tp_parse_mcmc` takes TreePPL json MCMC output and returns a data.frame +#' +#' @param treeppl_out a character vector giving the TreePPL json output +#' produced by [tp_run] using an MCMC method. +#' +#' @return A data frame with the output from inference in TreePPL. +#' @export +tp_parse_mcmc <- function(treeppl_out) { result_df <- list() for (i in seq_along(treeppl_out)) { samples_c <- unlist(treeppl_out[[i]]$samples) - log_weight_c <- unlist(treeppl_out[[i]]$weights) if(is.null(names(samples_c))){ result_df <- rbind(result_df, data.frame(run = i, - samples = samples_c, - log_weight = log_weight_c, - norm_const = treeppl_out[[i]]$normConst) + samples = samples_c) ) } else { result_df <- rbind(result_df, data.frame(run = i, parameter = names(samples_c), - samples = samples_c, - log_weight = log_weight_c, - norm_const = treeppl_out[[i]]$normConst) + samples = samples_c) ) } } @@ -46,7 +104,7 @@ tp_parse <- function(treeppl_out) { #' model of host repertoire evolution and returns a data.frame #' #' @param treeppl_out a character vector giving the TreePPL json output -#' produced by [tp_treeppl]. +#' produced by [tp_run]. #' #' @return A list (n = n_runs) of data frames with the output from inference #' in TreePPL under the host repertoire evolution model. @@ -229,25 +287,37 @@ peel_tree <- function(subtree, } -#' Check for convergence across multiple SMC sweeps/runs +#' Check for convergence across multiple SMC sweeps. #' -#' @param treeppl_out a character vector giving the TreePPL json output -#' produced by [tp_treeppl]. +#' @param treeppl_out a data frame outputted by [tp_parse_smc()]. #' #' @returns Variance in the normalizing constants across SMC sweeps. #' @export #' tp_smc_convergence <- function(treeppl_out) { - output <- tp_parse(treeppl_out) - zs <- output %>% - dplyr::slice_head(n = 1, by = run) %>% - dplyr::pull(norm_const) + zs <- treeppl_out |> + dplyr::slice_head(n = 1, by = .data$sweep) |> + dplyr::pull(.data$norm_constant) - return(var(zs)) + return(stats::var(zs)) } +#' Check for convergence across multiple MCMC runs. +#' +#' @param treeppl_out a data frame outputted by [tp_parse_mcmc()]. +#' +#' @returns Gelman and Rubin's convergence diagnostic +#' +tp_mcmc_convergence <- function(treeppl_out) { + + # create coda::mcmc objects for each run + # list(mcmc objects) + #coda::gelman.diag + +} + #' Find the Maximum A Posteriori (MAP) Tree from weighted samples #' @@ -313,7 +383,7 @@ tp_map_tree <- function(trees_out) { # but ape::consensus uses simple mean. For most purposes, this is sufficient. final_map <- map <- phangorn::allCompat(matching_trees, rooted=TRUE) |> phangorn::add_edge_length(matching_trees, - fun = function(x) weighted.mean(x, matching_weights)) + fun = function(x) stats::weighted.mean(x, matching_weights)) print(paste("MAP Topology found")) print(paste("Posterior Probability:", round(map_prob, 4))) diff --git a/R/run.R b/R/run.R new file mode 100644 index 0000000..bb1323e --- /dev/null +++ b/R/run.R @@ -0,0 +1,110 @@ +#' Options that can be passed to a TreePPL program +#' +#' @returns A string with the output from the executable's help +#' @export +#' +tp_run_options <- function() { + + #### under development here and in treeppl #### + + # text from treeppl executable --help + return() + +} + + +#' Run a TreePPL program +#' +#' @description +#' Run TreePPL and return output. +#' +#' @param compiled_model a [base::character] with the full path to the compiled model +#' outputted by [treepplr::tp_compile]. +#' @param data a [base::character] with the full path to the data file in TreePPL +#' JSON format (as outputted by [treepplr::tp_data]). +#' @param n_runs When using MCMC, a [base::integer] giving the number of runs to be done. +#' @param n_sweeps When using SMC, a [base::integer] giving the number of SMC sweeps to be done. +#' @param dir a [base::character] with the full path to the directory where you +#' want to save the output. Default is [base::tempdir()]. +#' @param out_file_name a [base::character] with the name of the output file in +#' JSON format. Default is "out". +#' @param ... See [treepplr::tp_run_options] for all supported arguments. +#' +#' +#' @return A list of TreePPL output in parsed JSON format. +#' @export +#' +#' @examples +#' \dontrun{ +#' # When using SMC +#' # compile model and create SMC inference machinery +#' exe_path <- tp_compile(model = "coin", method = "smc-bpf", particles = 2000) +#' +#' # prepare data +#' data_path <- tp_data(data_input = "coin") +#' +#' # run TreePPL +#' result <- tp_run(exe_path, data_path, n_sweeps = 2) +#' +#' +#' # When using MCMC +#' # compile model and create MCMC inference machinery +#' exe_path <- tp_compile(model = "coin", method = "mcmc-naive", iterations = 2000) +#' +#' # prepare data +#' data_path <- tp_data(data_input = "coin") +#' +#' # run TreePPL +#' result <- tp_run(exe_path, data_path, n_runs = 2) +#' } + +tp_run <- function(compiled_model, + data, + n_runs = NULL, + n_sweeps = NULL, + dir = NULL, + out_file_name = "out", + ...) { + + if(is.null(n_runs) & is.null(n_sweeps)){ + stop("At least one of n_runs and n_sweeps needs to be passed") + } + + n_string <- "" + if(!is.null(n_runs)){ + #### change to --iterations when it's fixed in treeppl #### + n_string <- paste0(n_string, "--sweeps ", n_runs, " ") + } + + if(!is.null(n_sweeps)){ + n_string <- paste0(n_string, "--sweeps ", n_sweeps, " ") + } + + if(is.null(dir)){ + dir_path <- tp_tempdir() + } else { + dir_path <- dir + } + + output_path <- paste0(dir_path, out_file_name, ".json") + + # Empty LD_LIBRARY_PATH from R_env for this command specifically + # due to conflict with internal env from treeppl self container + command <- paste("LD_LIBRARY_PATH= MCORE_LIBS=", + compiled_model, + data, + n_string, + paste(">", output_path) + ) + system(command) + + # simple parsing + #### change this? #### + json_out <- readLines(output_path) |> + lapply(jsonlite::fromJSON, simplifyVector = FALSE) + + return(json_out) +} + + + diff --git a/R/runner.R b/R/runner.R deleted file mode 100644 index a83da11..0000000 --- a/R/runner.R +++ /dev/null @@ -1,406 +0,0 @@ -#' Compile and run a TreePPL program -#' -#' @description -#' `tp_treeppl` execute TreePPL and return TreePPL output (string JSON format). -#' -#' @param model a TreePPL model (S3). -#' @param model_file_name a character vector giving a model name. -#' @param data a phyjson object (S3). -#' @param data_file_name a character vector giving a data name. -#' @param compile_model a [base::logical] to tell if the model need to be -#' compile -#' @param samples a [base::integer] giving the number of samples (mcmc) or -#' particules (smc). -#' @param seed a [base::numeric] to use as a random seed. -#' @param n_runs a [base::integer] giving the number of run (mcmc)/sweap (smc). -#' @param method a character vector giving the inference method name. -#' @param align a [base::logical] to tell if need to align the model. -#' @param cps a character vector giving the configuration of CPS transformation. -#' @param delay a character vector giving the configuration of delayed sampling. -#' @param kernel a [base::numeric] value giving the driftScale for driftKernel -#' in MCMC. -#' @param mcmc_lw_gprob a [base::numeric] probability of performing a global -#' MCMC step. -#' @param pmcmc_particles a [base::integer] number of particles for the smc -#' proposal computation -#' @param prune a [base::logical] to tell if the model will try to be pruned. -#' @param subsample a [base::integer] number of draw to subsample from the -#' posterior distribution. -#' @param resample a character vector giving the selected resample placement -#' method -#' -#' @details -#' This function takes TreePPL object (S3) and phyjson object (S3), -#' compile TreePPL model, run it with data and returning TreePPL output. -#' -#' TreePPL need to be install on your computer and the PATH set for R/RSTUDIO -#' (see [install](https://treeppl.org/docs/Howtos) manual). -#' The executable and the output files will be written in R's [base::tempdir()]. -#' -#' `model` : A TreePPL model (S3), see [treepplr::tp_model] for further details. -#' Use 'NULL' if you have previously provide an model. Check already provide -#' model with [treepplr::tp_stored_model]. -#' -#' `model_file_name` : a character vector giving to [treepplr::tp_treeppl] as -#' a model name. Use a [treepplr::tp_stored_data] name if you have already -#' write your model with [treepplr::tp_treeppl]. -#' -#' `data` : A list, see [treepplr::tp_check_input()] for further -#' details. Use 'NULL' if you have previously provide data. Check already -#' provide data with [treepplr::tp_stored_data]. -#' -#' `data_file_name` : a character vector giving to [treepplr::tp_treeppl] -#' a data name. Use a [treepplr::tp_stored_data] name if you have already write -#' your data with [treepplr::tp_treeppl]. -#' -#' `compile_model` : a [base::logical] telling if the model need to be compiled. -#' Can be use to avoid to compile a model again in R's [base::tempdir()] -#' if you have already compile a `model` in a previous call of -#' [treepplr::tp_treeppl]. Check already compile model -#' with [treepplr::tp_stored_compiled]. -#' -#' `samples` : The number of samples (mcmc) / particules (smc) during inference. -#' -#' `seed` : The random seed to use. Using 'NULL' initialized randomly. -#' -#' `n_runs` : The number of run (mcmc) / sweap (smc) used for the inference. -#' -#' `method` : Inference method to be used. The selected inference method. -#' The supported methods are: is-lw, smc-bpf, smc-apf, mcmc-lightweight, -#' mcmc-trace, mcmc-naive, pmcmc-pimh. -#' -#' The following options are highly dependable of the method used. -#' Check \[not implemented yet\] for more information. -#' -#' `align` : Whether or not to align the model for certain inference algorithms. -#' -#' `cps` : Configuration of CPS transformation (only applicable to certain -#' inference algorithms). The supported options are: none, partial, and full. -#' -#' `delay` : The model is transformed to an efficient representation if -#' possible. The supported options are: static or dynamic. Use 'NULL' to ignore. -#' -#' `kernel` : The value of the driftScale for driftKernel in MCMC. Use 'NULL' -#' to ignore. Use in conjuction with `method` mcmc-lightweight". -#' Use 'NULL' to ignore -#' -#' `mcmc_lw_gprob` : The probability of performing a global MH step -#' (non-global means only modify a single sample in the previous trace). -#' Use in conjuction with `method` mcmc-lightweight". Use 'NULL' to ignore -#' -#' `pmcmc_particles` : The number of particles for the smc proposal computation. -#' This option is used if one of the following methods are used: pmcmc-*. -#' Use 'NULL' to ignore -#' -#' `prune` : The model is pruned if possible. -#' -#' `subsample` : The number of draw to subsample from the posterior -#' distribution. Use in conjuction with `method` smc-apf or smc-bpf. -#' Use 'NULL' to ignore. -#' -#' `resample`: The selected resample placement method, for inference algorithms -#' where applicable. The supported methods are: -#' likelihood (resample immediately after all likelihood updates), -#' align (resample after aligned likelihood updates, forces --align), -#' and manual (sample only at manually defined resampling locations). -#' Use 'NULL' to ignore. -#' -#' @return A list of TreePPL output in parsed JSON format. -#' @export - -tp_treeppl <- - function(model = NULL, - model_file_name = "tmp_model_file", - data = NULL, - data_file_name = "tmp_data_file", - compile_model = TRUE, - samples = 1000, - seed = NULL, - n_runs = 1, - method = "smc-bpf", - align = FALSE, - cps = "none", - delay = NULL, - kernel = NULL, - mcmc_lw_gprob = NULL, - pmcmc_particles = NULL, - prune = FALSE, - subsample = NULL, - resample = NULL) { - - tp_write(model, model_file_name, data, data_file_name) - - if (compile_model) { - - tp_compile( - model_file_name, - samples, - seed, - method, - align, - cps, - delay, - kernel, - mcmc_lw_gprob, - pmcmc_particles, - prune, - subsample, - resample - ) - } - - return(tp_run(model_file_name, data_file_name, n_runs)) - } - -#' Prepare input for [tp_compile()] -#' -#' @description -#' `tp_write` writes an JSON file to be used by [tp_compile()]. -#' -#' @param model a TreePPL model (S3). -#' @param model_file_name a character vector giving a model name. -#' @param data a phyjson object (S3). -#' @param data_file_name a character vector giving a data name. -#' -#' @details -#' This function takes TreePPL object (S3) and phyjson object (S3) and write -#' them in [base::tempdir()]. -#' -#' `model` : A TreePPL model (S3), see [treepplr::tp_model] for further details. -#' Use 'NULL' if you have previously provide an model. Check already provide -#' model with [treepplr::tp_stored_model]. -#' -#' `model_file_name` : a character vector giving to [treepplr::tp_treeppl] as -#' a model name. Use a [treepplr::tp_stored_data] name if you have already -#' write your model with [treepplr::tp_write]. -#' -#' `data` : A list, see [treepplr::tp_check_input()] for further -#' details. Use 'NULL' if you have previously provide data. Check already -#' provide data with [treepplr::tp_stored_data]. -#' -#' `data_file_name` : a character vector giving to [treepplr::tp_treeppl] -#' a data name. Use a [treepplr::tp_stored_data] name if you have already write -#' your data with [treepplr::tp_write]. -#' -#' @export -tp_write <- function(model = NULL, - model_file_name = "tmp_model_file", - data = NULL, - data_file_name = "tmp_data_file") { - dir <- tp_tempdir() - - if (!is.null(model)) { - cat(model, file = paste0(dir, model_file_name, ".tppl")) - } - - # write json with input data - if (!is.null(data)) { - input_json <- jsonlite::toJSON(data, auto_unbox=TRUE) - write(input_json, file = paste0(dir, data_file_name, ".json")) - } -} - -#' Compile for [tp_run()] -#' -#' @description -#' `tp_compile` compile a TreePPL model to use by [treepplr::tp_run]. -#' -#' @param model_file_name a character vector giving a model name. -#' @param samples a [base::integer] giving the number of samples (mcmc) or -#' particules (smc). -#' @param seed a [base::numeric] to use as a random seed. -#' @param method a character vector giving the inference method name. -#' @param align a [base::logical] to tell if need to align the model. -#' @param cps a character vector giving the configuration of CPS transformation. -#' @param delay a character vector giving the configuration of delayed sampling. -#' @param kernel a [base::numeric] value giving the driftScale for driftKernel -#' in MCMC. -#' @param mcmc_lw_gprob a [base::numeric] probability of performing a global -#' MCMC step. -#' @param pmcmc_particles a [base::integer] number of particles for the smc -#' proposal computation -#' @param prune a [base::logical] to tell if the model will try to be pruned. -#' @param subsample a [base::integer] number of draw to subsample from the -#' posterior distribution. -#' @param resample a character vector giving the selected resample placement -#' method. -#' -#' @details -#' -#' `model_file_name` : a character vector giving to [treepplr::tp_treeppl] as -#' a model name. Use a [treepplr::tp_stored_data] name if you have already -#' write your model with [treepplr::tp_treeppl]. -#' -#' `seed` : The random seed to use. Using 'NULL' initialized randomly. -#' -#' `method` : Inference method to be used. The selected inference method. -#' The supported methods are: is-lw, smc-bpf, smc-apf, mcmc-lightweight, -#' mcmc-trace, mcmc-naive, pmcmc-pimh. -#' -#' The following options are highly dependable of the method used. -#' Check \[not implemented yet\] for more information. -#' -#' `align` : Whether or not to align the model for certain inference algorithms. -#' -#' `cps` : Configuration of CPS transformation (only applicable to certain -#' inference algorithms). The supported options are: none, partial, and full. -#' -#' `delay` : The model is transformed to an efficient representation if -#' possible. The supported options are: static or dynamic. Use 'NULL' to ignore. -#' -#' `kernel` : The value of the driftScale for driftKernel in MCMC. Use 'NULL' -#' to ignore. Use in conjuction with `method` mcmc-lightweight". -#' Use 'NULL' to ignore -#' -#' `mcmc_lw_gprob` : The probability of performing a global MH step -#' (non-global means only modify a single sample in the previous trace). -#' Use in conjuction with `method` mcmc-lightweight". Use 'NULL' to ignore -#' -#' `pmcmc_particles` : The number of particles for the smc proposal computation. -#' This option is used if one of the following methods are used: pmcmc-*. -#' Use 'NULL' to ignore -#' -#' `prune` : The model is pruned if possible. -#' -#' `subsample` : The number of draw to subsample from the posterior -#' distribution. Use in conjuction with `method` smc-apf or smc-bpf. -#' Use 'NULL' to ignore. -#' -#' `resample`: The selected resample placement method, for inference algorithms -#' where applicable. The supported methods are: -#' likelihood (resample immediately after all likelihood updates), -#' align (resample after aligned likelihood updates, forces --align), -#' and manual (sample only at manually defined resampling locations). -#' Use 'NULL' to ignore. -#' -#' @return The R's [base::tempdir()] whreŕe the compile file is stored. -#' @export - -tp_compile <- function(model_file_name = "tmp_model_file", - samples = 1000, - seed = NULL, - method = "smc-bpf", - align = FALSE, - cps = "none", - delay = NULL, - kernel = NULL, - mcmc_lw_gprob = NULL, - pmcmc_particles = NULL, - prune = FALSE, - subsample = NULL, - resample = NULL) { - # if dir = NULL return temp_dir, if not return dir - dir_path <- tp_tempdir() - options(scipen=999) - argum <- paste( - paste0(dir_path, model_file_name, ".tppl"), - paste("-m", method), - paste("--particles", samples), - paste0("--output ", dir_path, model_file_name, ".exe") - ) - - if (cps != "none") { - argum <- paste(argum, paste0("--cps ", cps)) - } - - if (!is.null(seed)) { - argum <- paste(argum, paste0("--seed ", seed)) - } - - if (align) { - argum <- paste(argum, "--align ") - } - - if (!is.null(delay)) { - if (delay == "static") { - argum <- paste(argum, "--static-delay ") - } - if (delay == "dynamic") { - argum <- paste(argum, "--dynamic-delay ") - } - } - - if (!is.null(kernel)) { - argum <- paste(argum, paste0("--kernel --drift ", kernel)) - } - - if (!is.null(mcmc_lw_gprob)) { - argum <- paste(argum, paste0("--mcmc_lw_gprob ", mcmc_lw_gprob)) - } - - if (!is.null(pmcmc_particles)) { - argum <- paste(argum, paste0("--pmcmcParticles ", pmcmc_particles)) - } - - if (prune) { - argum <- paste(argum, "--prune ") - } - - if (!is.null(subsample)) { - argum <- paste(argum, paste0("--subsample -n ", subsample)) - } - - if (!is.null(resample)) { - argum <- paste(argum, paste0("--resample ", resample)) - } - - # Preparing the command line program - tpplc_path <- installing_treeppl() - command <- paste(tpplc_path, argum) - - # Compile program - # Empty LD_LIBRARY_PATH from R_env for this command specifically - # due to conflict with internal env from treeppl self container - system(paste0("LD_LIBRARY_PATH= MCORE_LIBS= ", command)) - - return(dir_path) -} - -#' Run a TreePPL program -#' -#' #' -#' @description -#' `tp_treeppl` execute TreePPL and return TreePPL output (string JSON format). -#' -#' @param model_file_name a character vector giving a model name. -#' @param data_file_name a character vector giving a data name. -#' @param n_runs a [base::integer] giving the number of runs (mcmc)/sweaps (smc). -#' -#' @details -#' -#' `model_file_name` : a character vector giving to [treepplr::tp_treeppl] as -#' a model name. Use a [treepplr::tp_stored_data] name if you have already -#' write your model with [treepplr::tp_treeppl]. -#' -#' `data_file_name` : a character vector giving to [treepplr::tp_treeppl] -#' a data name. Use a [treepplr::tp_stored_data] name if you have already write -#' your data with [treepplr::tp_treeppl]. -#' -#' `n_runs` : The number of run (mcmc) / sweap (smc) used for the inference. -#' -#' @return A list of TreePPL output in parsed JSON format. -#' @export - -tp_run <- function(model_file_name = "tmp_model_file", - data_file_name = "tmp_data_file", - n_runs = 1) { - - n_runs_string <- paste0("--sweeps ", n_runs, " ") - - # if dir_path = NULL return temp_dir, if not return dir - dir_path <- tp_tempdir() - - # n_runs - # Empty LD_LIBRARY_PATH from R_env for this command specifically - # due to conflict with internal env from treeppl self container - command <- paste0("LD_LIBRARY_PATH= MCORE_LIBS= ", dir_path, model_file_name, ".exe ", - dir_path, data_file_name, ".json ", n_runs_string, - paste(">", paste0(dir_path, model_file_name, "_out.json") - )) - system(command) - - json_out <- readLines(paste0(dir_path, model_file_name, "_out.json")) %>% - lapply(jsonlite::fromJSON, simplifyVector = FALSE) - - return(json_out) -} diff --git a/R/treepplr-package.R b/R/treepplr-package.R index c53e54c..571f857 100644 --- a/R/treepplr-package.R +++ b/R/treepplr-package.R @@ -3,7 +3,9 @@ ## usethis namespace: start #' @importFrom bnpsd tree_reorder -#' @importFrom magrittr %>% #' @importFrom methods is +#' @importFrom phangorn add_edge_length +#' @importFrom phangorn allCompat +#' @importFrom rlang .data ## usethis namespace: end NULL diff --git a/R/utils-pipe.R b/R/utils-pipe.R deleted file mode 100644 index fd0b1d1..0000000 --- a/R/utils-pipe.R +++ /dev/null @@ -1,14 +0,0 @@ -#' Pipe operator -#' -#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. -#' -#' @name %>% -#' @rdname pipe -#' @keywords internal -#' @export -#' @importFrom magrittr %>% -#' @usage lhs \%>\% rhs -#' @param lhs A value or the magrittr placeholder. -#' @param rhs A function call using the magrittr semantics. -#' @return The result of calling `rhs(lhs)`. -NULL diff --git a/R/utils.R b/R/utils.R index 37f4dab..26aa1f5 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,6 +1,33 @@ -#' Fetch the latest version of treeppl -#' @export +# Platform-dependent treeppl self-contained installation +installing_treeppl <- function() { + if (Sys.getenv("TPPLC") != "") { + tpplc_path <- Sys.getenv("TPPLC") + } else{ + + tag <- tp_fp_fetch() + if (Sys.info()['sysname'] == "Windows") { + # No self container for Windows, need to install it manually + "tpplc" + } else if(Sys.info()['sysname'] == "Linux") { + path <- system.file("treeppl-linux", package = "treepplr") + file_name <- paste0("treeppl-",substring(tag, 2)) + } else {#Mac OS have a lot of different name + path <- system.file("treeppl-mac", package = "treepplr") + file_name <- paste0("treeppl-",substring(tag, 2)) + } + # Test if tpplc is already here + tpplc_path <- paste0("/tmp/",file_name,"/tpplc") + if(!file.exists(tpplc_path)) { + utils::untar(list.files(path=path, full.names=TRUE), + exdir="/tmp") + } + } + tpplc_path +} + + +# Fetch the latest version of treeppl tp_fp_fetch <- function() { if (Sys.info()["sysname"] == "Windows") { # no self container for Windows, need to install it manually @@ -53,28 +80,6 @@ tp_fp_fetch <- function() { repo_info[[1]]$tag_name } -# Platform-dependent treeppl self contain installation -installing_treeppl <- function() { - tag <- tp_fp_fetch() - if (Sys.info()['sysname'] == "Windows") { - # No self container for Windows, need to install it manually - "tpplc" - } else if(Sys.info()['sysname'] == "Linux") { - path <- system.file("treeppl-linux", package = "treepplr") - file_name <- paste0("treeppl-",substring(tag, 2)) - } else {#Mac OS have a lot of different name - path <- system.file("treeppl-mac", package = "treepplr") - file_name <- paste0("treeppl-",substring(tag, 2)) - } - # Test if tpplc is already here - tpplc_path <- paste0("/tmp/",file_name,"/tpplc") - if(!file.exists(tpplc_path)) { - utils::untar(list.files(path=path, full.names=TRUE), - exdir="/tmp") - } - tpplc_path -} - #' Temporary directory for running treeppl @@ -120,123 +125,59 @@ sep <- function() { #' Model names supported by treepplr #' -#' @description Provides a list of all model names supported by treepplr. -#' The names can also be used to find data for these models -#' (see [treepplr::tp_data]). +#' @description Provides a list of all models in the TreePPL model library. #' #' @return A list of model names. #' @export -tp_model_names <- function() { - list( - custom = "custom", - coin = "coin", - hostrep3states = "hostrep3states", - hostrep2states = "hostrep2states", - tree_inference = "tree_inference", - crbd = "crbd", - clads = "clads" +tp_model_library <- function() { + + # take whatever treeppl version is in the tmp + fd <- list.files("/tmp", pattern = "treeppl", full.names = TRUE) + # make sure you get the most recent version if you have more than one treeppl folder in the tmp + fd <- sort(fd, decreasing = TRUE)[1] + # go to the right treeppl folder, whatever it is called + fd <- list.files(fd, pattern = "treeppl", full.names = TRUE) + # add the rest of the path + fd <- paste0(fd, "/lib/mcore/treeppl/models") + # model names + mn <- list.files(fd, full.names = TRUE, recursive = TRUE, pattern = "\\.tppl$") + subcategory <- grepl(".*models/([^/]+)/([^/]+)/([^/]+)\\.tppl$", mn) + no_sub <- mn[!subcategory] + + # results in a data frame + rs <- data.frame( + "category" = sub(".*models/([^/]+)/.*", "\\1", no_sub), + "model_name" = sub(".*/([^/]+)\\.tppl$", "\\1", no_sub) ) -} - - -# Find model and data files for model_name -find_file <- function(model_name, exten) { - if (exten == "tppl") { - readr::read_file(paste0( - system.file("extdata", package = "treepplr"), - sep(), - model_name, - ".", - exten - )) - } else if (exten == "json") { - jsonlite::fromJSON(paste0( - system.file("extdata", package = "treepplr"), - sep(), - model_name, - ".", - exten - )) - } -} - -#' Model file names stored by user in [base::tempdir] using -#' [treepplr::tp_write] -#' -#' @description Provides a list of all the model file names currently -#' stored in [base::tempdir]. To verify if there is a -#' matching data file, see [treepplr::tp_stored_data]. -#' -#' @return A list of model file names. -#' @export + # order by category then by model name + rs <- rs[order(rs$category, rs$model_name, decreasing = FALSE), ] + rownames(rs) <- NULL + rs -tp_stored_model <- function() { - stored_files("tppl") } -#' Data file names stored by user in [base::tempdir] using -#' [treepplr::tp_write] -#' -#' @description Provides a list of all the data file names currently -#' stored in [base::tempdir]. To verify if there is a -#' matching model file, see [treepplr::tp_stored_model]. -#' -#' @return A list of data file names. -#' @export -tp_stored_data <- function() { - res <- stored_files("json") - list_na <- stringr::str_extract(res, "^((?!_out).)*$") - list <- list_na[!is.na(list_na)] - list -} +# Find model for model_name +tp_find_model <- function(model_name) { -#' List of compiled models in [base::tempdir] -#' -#' @description Provides a list of all the compiled model file names currently -#' stored in [base::tempdir]. -#' -#' @return A list of compiled model file names. -#' @export - -tp_stored_compiled <- function() { - stored_files("exe") -} + # take whatever treeppl version is in the tmp + version <- list.files("/tmp", pattern = "treeppl", full.names = FALSE) + # make sure you get the most recent version if you have more than one treeppl folder in the tmp + version <- sort(version, decreasing = TRUE)[1] -#Provides file names already provided by user, stored in [base::tempdir] -stored_files <- function(exten) { - tmp <- list.files(tp_tempdir()) - list_na <- - stringr::str_extract(tmp, paste0(".*(?=\\.", exten, ")")) - list <- stringr::str_sort(list_na[!is.na(list_na)]) - list + res <- system(paste0("find /tmp/", version," -name ", model_name, ".tppl"), + intern = T, ignore.stderr = TRUE) } +# Find data for model_name +tp_find_data <- function(model_name) { -#' Create a flat list -#' -#' @description -#' `tp_list` takes a variable number of arguments and returns a list. -#' -#' @param ... Variadic arguments (see details). -#' -#' @details -#' This function takes a variable number of arguments, so that users can pass as -#' arguments either independent lists, or a single structured -#' list of list (name_arg = value_arg). -#' -#' @return A list. -#' @export -#' -tp_list <- function(...) { - dotlist <- list(...) - - if (length(dotlist) == 1L && is.list(dotlist[[1]])) { - dotlist <- dotlist[[1]] - } + # take whatever treeppl version is in the tmp + version <- list.files("/tmp", pattern = "treeppl", full.names = FALSE) + # make sure you get the most recent version if you have more than one treeppl folder in the tmp + version <- sort(version, decreasing = TRUE)[1] - dotlist + system(paste0("find /tmp/", version ," -name testdata_", model_name, ".json"), + intern = T) } - - diff --git a/inst/extdata/README.txt b/inst/extdata/README.txt deleted file mode 100644 index 6ff54f7..0000000 --- a/inst/extdata/README.txt +++ /dev/null @@ -1,24 +0,0 @@ -### The files included in this folder come from the treeppl repository. - - -## Coin - -model from https://github.com/treeppl/treeppl/blob/main/models/lang/coin.tppl - -data from https://github.com/treeppl/treeppl/blob/main/models/lang/data/testdata_coin.json - - -## Tree inference - -model from https://github.com/treeppl/treeppl/blob/main/models/tree-inference/tree_inference_pruning.tppl - -data from https://github.com/treeppl/treeppl/blob/main/models/tree-inference/data/testdata_tree_inference_pruning.json - - -## Host repertoire - -model from https://github.com/treeppl/treeppl/blob/main/models/host-repertoire-evolution/flat-root-prior-HRM.tppl - -data from https://github.com/treeppl/treeppl/blob/main/models/host-repertoire-evolution/data/testdata_flat-root-prior-HRM.json - - diff --git a/inst/extdata/coin.json b/inst/extdata/coin.json deleted file mode 100644 index 1fea568..0000000 --- a/inst/extdata/coin.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "coinflips":[false,true,true,true,false,true,true,false,false,true,false,false,false,false,false,false,false,true,true,true] -} diff --git a/inst/extdata/coin.tppl b/inst/extdata/coin.tppl deleted file mode 100644 index 85a32f9..0000000 --- a/inst/extdata/coin.tppl +++ /dev/null @@ -1,39 +0,0 @@ -/* - * File: coin.tppl - * Description: Simplest meaningful probabilistic program. Evaluates how likely it is that a coin is fair, given data. - */ - -/** - * Conditions the likelihood of the computation - * on an observed datapoint to come from a particular Bernoulli experiment - * Parameters: - * datapoint: Real - * probability: Real in (0, 1), the probability of True in the Bernoulli experiment - * Returns: nothing - * Side-effects: reweighs the computation - */ -function flip(datapoint: Bool, probability: Real) { - observe datapoint ~ Bernoulli(probability); -} - -/* - * Model function - * Data: - * coinflips: Bool[] - * Prior: - * p ~ Beta(2, 2) - * Posterior: - * p | coinflips - */ -model function coinModel(coinflips: Bool[]) => Real { - // Uncomment if you want to test the input - //printLn("Input:"); - //let coinStr = apply(bool2string, coinflips); - //printLn(join(coinStr)); - assume p ~ Beta(2.0, 2.0); // prior - let n = length(coinflips); - for i in 1 to n { - flip(coinflips[i], p); // likelihood - } - return(p); // posterior -} diff --git a/inst/extdata/crb.tppl b/inst/extdata/crb.tppl deleted file mode 100644 index 10e4318..0000000 --- a/inst/extdata/crb.tppl +++ /dev/null @@ -1,15 +0,0 @@ -function walk(node: Tree, time:Real, lambda: Real) { - observe 0 ~ Poisson(lambda * (time - node.age)); - if node is Node { - observe 0.0 ~ Exponential(lambda); - walk(node.left, node.age, lambda); - walk(node.right, node.age, lambda); - } -} - -model function crb(tree: Tree) => Real { - assume lambda ~ Gamma(1.0, 1.0); - walk(tree.left, tree.age, lambda); - walk(tree.right, tree.age, lambda); - return lambda; -} diff --git a/inst/extdata/hostrep3states.json b/inst/extdata/hostrep3states.json deleted file mode 100644 index b9f5c88..0000000 --- a/inst/extdata/hostrep3states.json +++ /dev/null @@ -1,93 +0,0 @@ -{ - "symbiont_tree": { - "__constructor__": "Node", - "__data__": { - "label": 11, - "age": 10.0, - "left": { - "__constructor__": "Node", - "__data__": { - "label": 9, - "age": 3.360348, - "left": { - "__constructor__": "Node", - "__data__": { - "label": 7, - "age": 1.279190, - "left": { - "__constructor__": "Leaf", - "__data__": { - "label": 1, - "age": 0.0 - } - }, - "right": { - "__constructor__":"Leaf", - "__data__": { - "label":2, - "age":0.0 - } - } - } - }, - "right": { - "__constructor__": "Node", - "__data__": { - "label": 8, - "age": 0.153814, - "left": { - "__constructor__": "Leaf", - "__data__": { - "label": 3, - "age": 0.0 - } - }, - "right": { - "__constructor__": "Leaf", - "__data__": { - "label": 4, - "age": 0.0 - } - } - } - } - } - }, - "right": { - "__constructor__": "Node", - "__data__": { - "label": 10, - "age": 1.079144, - "left": { - "__constructor__": "Leaf", - "__data__": { - "label": 5, - "age": 0.0 - } - }, - "right": { - "__constructor__": "Leaf", - "__data__": { - "label": 6, - "age": 0.0 - } - } - } - } - } - }, - "ntips":6, - "nhosts":5, - "interactions":[2,2,0,0,0, - 2,2,0,0,0, - 0,0,2,2,0, - 0,0,2,2,0, - 0,0,0,0,2, - 0,0,0,0,2], - "host_distances":[0.0,0.8630075756,2.6699063134,2.6699063134,2.6699063134, - 0.8630075756,0.0,2.6699063134,2.6699063134,2.6699063134, - 2.6699063134,2.6699063134,0.0,1.2256551506,1.9598979474, - 2.6699063134,2.6699063134,1.2256551506,0.0,1.9598979474, - 2.6699063134,2.6699063134,1.9598979474,1.9598979474,0.0], - "dMean":4.405579 -} diff --git a/inst/extdata/hostrep3states.tppl b/inst/extdata/hostrep3states.tppl deleted file mode 100644 index 7e82953..0000000 --- a/inst/extdata/hostrep3states.tppl +++ /dev/null @@ -1,414 +0,0 @@ -import "treeppl::models/host-repertoire-evolution/host-rep-lib/lib.tppl" -import "treeppl::models/host-repertoire-evolution/host-rep-lib/container-types.tppl" -import "treeppl::models/host-repertoire-evolution/host-rep-lib/helpers.tppl" -import "treeppl::models/host-repertoire-evolution/host-rep-lib/belief-propagation.tppl" -import "treeppl::models/host-repertoire-evolution/host-rep-lib/dist-helpers.tppl" -import "treeppl::models/host-repertoire-evolution/host-rep-lib/full-model.tppl" -import "treeppl::models/host-repertoire-evolution/host-rep-lib/rb-drift-kernels.tppl" -import "treeppl::models/host-repertoire-evolution/host-rep-lib/independence-model.tppl" - -type ReturnType = ReturnType{ - lambda: Real[], mu: Real, beta: Real, tree: HistoryTree -} - - -model function rejectAccept( - symbiont_tree: TreeLabeled, - ntips: Int, - nhosts: Int, - interactions: Int[], - host_distances: Real[], - dMean: Real -) => ReturnType { - assume lambda ~ Dirichlet([1., 1., 1., 1.]) drift rbLambdaMove(lambda); - assume mu ~ Exponential(10.) drift rbMuMove(mu); - assume beta ~ Exponential(1.) drift rbBetaMove(beta); - - let r = mtxCreate(3,3, - [0.-lambda[1], lambda[1], 0.0, - lambda[2], 0.-(lambda[2]+lambda[3]), lambda[3], - 0.0, lambda[4], 0.-lambda[4]] - ); - let qMatrix = mtxSclrMul(mu, r); - - - let nestedInteractions = nestSeq(interactions, ntips, nhosts); - let postorderTree = postorderTraverse(symbiont_tree, qMatrix, nestedInteractions, nhosts); - - // Sample the root. We assume in both the independence and full model - // that we have a flat prior on the root - let rootPrior = mtxCreate(nhosts, 3, ones(3 * nhosts)); - let rootSamplingProb = mtxElemMul(postorderTree.outMsg, rootPrior); - let initRootRep = suggestRepAligned(rootSamplingProb, 1, nhosts); - let rootRep = suggestRepRS(rootSamplingProb, nhosts, initRootRep, 0); - - // Calculate the debt and excess - let rootLogDebt = getRepertoireDebt(rootRep, rootSamplingProb, nhosts); - let rootLogExcess = -log((3.^(Real(nhosts))) - (2.^Real(nhosts))); - - // This is an aligned point so we can weight by the importance ratio - logWeight rootLogExcess - rootLogDebt; - - // Compute messages to pass to pass to the children of the root - let newMsg = mtxCreate(nhosts, 3, observationMessage(rootRep, 1, nhosts)); - let leftMsg = mtxMul(newMsg, postorderTree.leftKernel); - let rightMsg = mtxMul(newMsg, postorderTree.rightKernel); - - // Construct an object containing the rate matrix in a suitable format for sampling - // from the embedded Markov chain - - let embeddedQMatrix = rateMatrixToEmbeddedMarkovChain(qMatrix); - let modelParams = ModelParams { - beta = beta, - hostMetric = mtxCreate(nhosts, nhosts, host_distances), - embeddedQMatrix = embeddedQMatrix, - meanDist = dMean - }; - let rootAge = postorderTree.age; - let leftRepertoireTree = sampleTreeHistory( - postorderTree.left, nhosts, leftMsg, rootRep, rootAge, modelParams, postorderTree.leftKernel - ); - let rightRepertoireTree = sampleTreeHistory( - postorderTree.right, nhosts, rightMsg, rootRep, rootAge, modelParams, postorderTree.rightKernel - ); - - // Construct the root node of the repertoire tree - let historyTree = HistoryNode { - age = symbiont_tree.age, label = symbiont_tree.label, - left = leftRepertoireTree, right = rightRepertoireTree, - repertoire = rootRep, history = [] - }; - return ReturnType { - lambda = lambda, - mu = mu, - beta = beta, - tree = historyTree - }; -} - -function suggestRepAligned(msg: Matrix[Real], i: Int, max: Int) => Int[] { - if i <= max { - let param = mtx3ToSeq(msg, i); - assume x ~ Categorical(param) drift categoricalMove(x, param); - return cons(x, suggestRepAligned(msg, i + 1, max)); - } else { - return []; - } -} - -function suggestRepRS(msg: Matrix[Real], max: Int, initialRep: Int[], depth: Int) => Int[] { - let _REP_REJECTION_DEPTH = 10; - if any(is2, initialRep) { - return initialRep; - } else { - if depth < _REP_REJECTION_DEPTH { - let newRep = suggestRepUnaligned(msg, 1, max); - return suggestRepRS(msg, max, newRep, depth + 1); - } else { - // Set weight to zero and return dummy value - weight 0.0; - return initialRep; - } - } -} - -function suggestRepUnaligned(msg: Matrix[Real], i: Int, max: Int) => Int[] { - if i <= max { - let param = mtx3ToSeq(msg, i); - assume x ~ Categorical(param); - return paste0([[x], suggestRepUnaligned(msg, i + 1, max)]); - } else { - return []; - } -} - -function sampleTreeHistory( - tree: MsgTree, - nhosts: Int, - preorderMsg: Matrix[Real], - parentRep: Int[], - parentAge: Real, - modelParams: ModelParams, - branchKernel: Matrix[Real] -) => HistoryTree { - if tree is MsgLeaf { - // We are at a leaf, so we don't need to sample the interactions - let rep = tree.interactions; - // Sample the incident branch - let branchSample = sampleBranch( - parentRep, - rep, - parentAge, - tree.age, - nhosts, - modelParams, - branchKernel, - 0 - ); - - // We are aligned so we can weight the program here (shouldn't matter for MCMC) - if branchSample.success { - logWeight branchSample.logExcess - branchSample.logDebt; - } else { - // We failed to sample a correct branch, set program weight to zero - weight 0.0; - } - - return HistoryLeaf { - age = tree.age, - label = tree.label, - repertoire = rep, - history = branchSample.history - }; - } else { - let samplingProb = mtxElemMul(tree.outMsg, preorderMsg); - let initRep = suggestRepAligned(samplingProb, 1, nhosts); - let rep = suggestRepRS(samplingProb, nhosts, initRep, 0); - let nodeLogDebt = getRepertoireDebt(rep, samplingProb, nhosts); - - // Sample the incident branch - let branchSample = sampleBranch( - parentRep, - rep, - parentAge, - tree.age, - nhosts, - modelParams, - branchKernel, - 0 - ); - - // We are aligned so we can weight the program here (shouldn't matter for MCMC) - if branchSample.success { - logWeight branchSample.logExcess - branchSample.logDebt - nodeLogDebt; - } else { - // We failed to sample a correct branch, set program weight to zero - weight 0.0; - } - - // Since we sampled a repertoire at the node we should propagate this information to the child nodes - let newMsg = mtxCreate(nhosts, 3, observationMessage(rep, 1, nhosts)); - let leftMsg = mtxMul(newMsg, tree.leftKernel); - let rightMsg = mtxMul(newMsg, tree.rightKernel); - - // Sample the two subtrees - let left = sampleTreeHistory( - tree.left, nhosts, leftMsg, rep, tree.age, modelParams, tree.leftKernel - ); - let right = sampleTreeHistory( - tree.right, nhosts, rightMsg, rep, tree.age, modelParams, tree.rightKernel - ); - - return HistoryNode { - age = tree.age, - label = tree.label, - repertoire = rep, - history = branchSample.history, - left = left, - right = right - }; - } -} - -function sampleBranch( - startRep: Int[], - finalRep: Int[], - startAge: Real, - finalAge: Real, - nhosts: Int, - modelParams: ModelParams, - branchKernel: Matrix[Real], - rejectionDepth: Int -) => CorrectedBranchSample { - let _MAX_REJECTION_DEPTH_BRANCH = 100; - if rejectionDepth <= _MAX_REJECTION_DEPTH_BRANCH { - // Sample the events per host - let unorderedBranch = sampleUnorderedBranch( - startRep, finalRep, startAge, finalAge, 1, nhosts, modelParams.embeddedQMatrix, 1 - ); - if unorderedBranch.success { - - // All host histories were sampled correctly, - // compile the events for all hosts and sort them - let allHostEvents = paste0(unorderedBranch.history); - let orderedEvents = qSort(compAge, allHostEvents); - let nEvents = length(orderedEvents); - if allTimesValidBranch(startRep, orderedEvents, 1, nEvents, nhosts) { - // The branch is correct along the full history, - // accept and calculate the true weight of the sample - - // Calculate the debt -- this is the density of the endpoint conditioned - // (parallel) chains at the current sample. - let logDebt = independenceLikelihoodEndCond( - startRep, finalRep, startAge, finalAge, unorderedBranch.history, modelParams, branchKernel - ); - // Calculate the likelihood of the path under the full model - let logExcess = fullModelWeight( - 1, startRep, finalRep, startAge, finalAge, orderedEvents, nEvents, nhosts, modelParams - ); - // Return the branch sample with both the debt and the excess - return CorrectedBranchSample { - history = orderedEvents, - logDebt = logDebt, - logExcess = logExcess, - success = true - }; - } - } - // Either one of the hosts failed to be sampled correctly - // or the history was invalid somewhere along the branch - return sampleBranch( - startRep, - finalRep, - startAge, - finalAge, - nhosts, - modelParams, - branchKernel, - rejectionDepth + 1 - ); - } else { - // We ran out of rejection depth for the branch - return CorrectedBranchSample { - history = [], - logDebt = -log(0.0), - logExcess = log(0.0), - success = false - }; - } -} - -function compAge(left: Event, right: Event) => Int { - if (isNaN(right.eventTime)) { - return -1; - } - if (isNaN(left.eventTime)) { - return 1; - } - if (right.eventTime >= left.eventTime) { - return 1; - } else { - return -1; - } -} - -function sampleUnorderedBranch( - startRep: Int[], - finalRep: Int[], - startAge: Real, - finalAge: Real, - hostIndex: Int, - nhosts: Int, - embeddedQMatrix: EmbeddedMarkovChainMatrix, - rejectionDepth: Int -) => BranchSample { - let _MAX_REJECTION_DEPTH_HOST = 100; - // Note that startAge > finalAge since the tree is moving forwards in time - if hostIndex <= nhosts { - let hostHistory = sampleHostHistory( - startRep[hostIndex], finalRep[hostIndex], startAge, finalAge, hostIndex, embeddedQMatrix, 1 - ); - if hostHistory.success { - let otherHostHistories = sampleUnorderedBranch( - startRep, finalRep, startAge, finalAge, hostIndex + 1, nhosts, embeddedQMatrix, 0 - ); - return BranchSample { - history = cons(hostHistory.history, otherHostHistories.history), - success = otherHostHistories.success - }; - } else { - if rejectionDepth <= _MAX_REJECTION_DEPTH_HOST { - return sampleUnorderedBranch( - startRep, finalRep, startAge, finalAge, hostIndex, nhosts, embeddedQMatrix, rejectionDepth + 1 - ); - } else { - return BranchSample { - history = [], - success = false - }; - } - } - } else { - return BranchSample { - history = [], - success = true - }; - } -} - - -function sampleHostHistory( - startState: Int, - finalState: Int, - startAge: Real, - finalAge: Real, - host: Int, - embeddedQMatrix: EmbeddedMarkovChainMatrix, - stepIndex: Int -) => HostBranchSample { - let totalRate = embeddedQMatrix.totalRates[startState + 1]; - if startState != finalState && stepIndex == 1 { - // If there is a mutation on the interval, sample the first event from a truncated exponential - - let t = sampleTruncatedExponential(totalRate, startAge - finalAge); - let eventTime = startAge - t; - - // Sample the next event - let nextState = sampleNextEvent(startState, embeddedQMatrix); - let param = embeddedQMatrix.transitionProbs[startState + 1]; - - let restOfHistory = sampleHostHistory(nextState, finalState, eventTime, finalAge, host, embeddedQMatrix, stepIndex + 1); - return HostBranchSample { - history = cons( - Event { eventTime = eventTime, fromState = startState, toState = nextState, host = host }, - restOfHistory.history - ), - success = restOfHistory.success - }; - - } else { - // If the final state does not equal the start state, then we don't want to - // force there to be events in the remaining interval - assume t ~ Exponential(totalRate); - if startAge - t < finalAge { - // We overshoot so we are finished, and just save the debt - if startState == finalState { - return HostBranchSample { - history = [], - success = true - }; - } else { - return HostBranchSample { - history = [], - success = false - }; - } - } else { - - // Sample the next transition - let nextState = sampleNextEvent(startState, embeddedQMatrix); - let param = embeddedQMatrix.transitionProbs[startState + 1]; - // assume nextState ~ Categorical(param); - - // Record the event time - let eventTime = startAge - t; - let restOfHistory = sampleHostHistory(nextState, finalState, eventTime, finalAge, host, embeddedQMatrix, stepIndex + 1); - return HostBranchSample { - history = cons( - Event { eventTime = eventTime, fromState = startState, toState = nextState, host = host }, - restOfHistory.history - ), - success = restOfHistory.success - }; - } - } -} - -function sampleTruncatedExponential(rate: Real, maxT: Real) => Real { - let uMin = exp(-rate * maxT); - // Assuming directly from the truncated distribution gave numerical errors - assume u_ ~ Uniform(0.0, 1.0); - let u = uMin + u_ * (1. - uMin); - let expSample = -log(u) / rate; - return expSample; -} diff --git a/inst/extdata/tree_inference.fasta b/inst/extdata/tree_inference.fasta new file mode 100644 index 0000000..b458523 --- /dev/null +++ b/inst/extdata/tree_inference.fasta @@ -0,0 +1,8 @@ +>seq1 +ccagtaaaatccttg +>seq2 +ccagacaaatcacca +>seq3 +aaccagcaaagatta +>seq4 +aaccatacaaggtca \ No newline at end of file diff --git a/inst/extdata/tree_inference.json b/inst/extdata/tree_inference.json deleted file mode 100644 index 9448966..0000000 --- a/inst/extdata/tree_inference.json +++ /dev/null @@ -1,8 +0,0 @@ -{"data": -[ - [1, 1, 0, 2, 3, 0, 0, 0, 0, 3, 1, 1, 3, 3, 2], - [1, 1, 0, 2, 0, 1, 0, 0, 0, 3, 1, 0, 1, 1, 0], - [0, 0, 1, 1, 0, 2, 1, 0, 0, 0, 2, 0, 3, 3, 0], - [0, 0, 1, 1, 0, 3, 0, 1, 0, 0, 2, 2, 3, 1, 0] -] -} diff --git a/inst/extdata/tree_inference.nexus b/inst/extdata/tree_inference.nexus new file mode 100644 index 0000000..cc230e1 --- /dev/null +++ b/inst/extdata/tree_inference.nexus @@ -0,0 +1,31 @@ +#NEXUS + +BEGIN DATA; +DIMENSIONS NTAX=4 NCHAR=15; +FORMAT DATATYPE=DNA GAP=- MISSING=?; +MATRIX + +seq1 ccagtaaaatccttg +seq2 ccagacaaatcacca +seq3 aaccagcaaagatta +seq4 aaccatacaaggtca +; + +END; + +BEGIN ASSUMPTIONS; +EXSET * UNTITLED = ; +END; + +BEGIN CODONS; +CODONPOSSET * CodonPositions = + N:, + 1: 1-13\3, + 2: 2-14\3, + 3: 3-15\3; +CODESET * UNTITLED = Universal: all ; +END; + +BEGIN SETS; +END; + diff --git a/inst/extdata/tree_inference.tppl b/inst/extdata/tree_inference.tppl deleted file mode 100644 index 38aa9b2..0000000 --- a/inst/extdata/tree_inference.tppl +++ /dev/null @@ -1,155 +0,0 @@ -// TreePPL script for tree inference under the -// Jukes Cantor model with a strict clock prior. Pruning hard-coded. - -// The nucleotide is specified as -// "A" for adenine - 0 -// "C" for cytosine - 1 -// "G" for guanine - 2 -// "T" for thymine - 3 -// "-" for gaps - 4 - -// TYPES - -type MsgTree = - | Leaf {age: Real, index: Int, msg: Matrix[Real][]} - | Node {age: Real, msg: Matrix[Real][], left: MsgTree, right: MsgTree} - -// FUNCTIONS - -// Randomly sample two indices in the trees vector, to be combined. Avoiding mirror cases. -function pickpair(n: Int) => Int[] { - assume i ~ Categorical(rep(n, 1./Real(n))); - let i = i + 1; - assume j ~ Categorical(rep((n-1), 1./Real(n-1))); - let j = j+1; - if (j < i) { return [i, j];} - else { return [i,j+1];} -} - -// Build forest of trees from leaves, recursively -function build_forest(data: Int[][], forest: MsgTree[], index: Int, data_len: Int, seq_len: Int) => MsgTree[] { - let new_message = sapply(data[index], get_leaf_message); - let new_leaf = Leaf{age = 0.0, index = index, msg = new_message}; - let new_forest = paste0([forest, [new_leaf]]); - if (data_len == index) { - return new_forest; - } - else { - return build_forest(data, new_forest, index + 1, data_len, seq_len); - } -} - -// Get message from leaves for each site -function get_leaf_message(seq: Int) => Matrix[Real] { - if (seq == 0) { // "A" - let message = rvecCreate(4, [1.0, 0.0, 0.0, 0.0]); - logWeight (log(0.25)); - return message; - } - if (seq == 1) { // "C" - let message = rvecCreate(4, [0.0, 1.0, 0.0, 0.0]); - logWeight (log(0.25)); - return message; - } - if (seq == 2) { // "G" - let message = rvecCreate(4, [0.0, 0.0, 1.0, 0.0]); - logWeight (log(0.25)); - return message; - } - if (seq == 3) { // "T" - let message = rvecCreate(4, [0.0, 0.0, 0.0, 1.0]); - logWeight (log(0.25)); - return message; - } - if (seq == 4) { // "-" - let message = rvecCreate(4, [1.0, 1.0, 1.0, 1.0]); - return message; - } - else { - return error("Invalid state at leaf"); - } -} - -//Compute log likelihood for each site -function get_log_likes(msg: Matrix[Real]) => Real { - let stationary_probs = cvecCreate(4, [0.25,0.25,0.25,0.25]); - let like = mtxMul(msg, stationary_probs); - let log_like = log(mtxGet(1, 1, like)); - return log_like; -} - -// KEY FUNCTION: CLUSTER -function cluster(q: Matrix[Real], trees: MsgTree[], maxAge: Real, seq_len: Int) => MsgTree[] { - - let n = length(trees); - - // Check if we have reached the root of the tree - if (n == 1) { - return trees; - } - - // Randomly sample two indices in the trees vector with function pickpair - // We will combine these, named left and right child - let pairs = pickpair(n); - let left_child = trees[pairs[1]]; - let right_child = trees[pairs[2]]; - - // Get the age of the new internal node - assume t ~ Exponential(10.0); - let age = maxAge + t; - - // Get incoming messages from children - let tmatrix_left = mtxTrans(mtxExp(mtxSclrMul(age-left_child.age, q))); - let tmatrix_right = mtxTrans(mtxExp(mtxSclrMul(age-right_child.age, q))); - - let left_in_msg = sapply(left_child.msg, mtxMul(_, tmatrix_left)); - let right_in_msg = sapply(right_child.msg, mtxMul(_, tmatrix_right)); - - // Get the message of the new node. - let node_msg = messageElemMul(left_in_msg, right_in_msg); - - // Weights - let log_likes = sapply(node_msg, get_log_likes); - logWeight (seqSumReal(log_likes)); - - // Remove weights of previous internal children - let log_likes_left = sapply(left_child.msg, get_log_likes); - logWeight -(seqSumReal(log_likes_left)); - let log_likes_right = sapply(right_child.msg, get_log_likes); - logWeight -(seqSumReal(log_likes_right)); - - // Manual resample - resample; - - // Combine picked pair of trees into a new node - let parent = Node{age=age, msg=node_msg, left=left_child, right=right_child}; - - // Compute new_trees list - let min = minInt(pairs[2],pairs[1]); - let max = maxInt(pairs[2],pairs[1]); - let new_trees = paste0([slice(trees, 1, min), slice(trees, (min+1), max), slice(trees, (max+1), (n+1)), [parent]]); - - // Recursive call to cluster for new_trees - return cluster(q, new_trees, age, seq_len); -} - -// MODEL FUNCTION - -model function myModel(data: Int[][]) => MsgTree[] { - // Define the scaled rate matrix for Jukes-Cantor - let q = mtxCreate(4,4, - [ -1.0, (1.0/3.0), (1.0/3.0), (1.0/3.0), - (1.0/3.0), -1.0, (1.0/3.0), (1.0/3.0), - (1.0/3.0), (1.0/3.0), -1.0, (1.0/3.0), - (1.0/3.0), (1.0/3.0), (1.0/3.0), -1.0] - ); - - let data_len = length(data); - let seq_len = length(data[1]); - - // Define the initial trees sequence (containing the leaves) - let trees = build_forest(data, [], 1, data_len, seq_len); - - // Build the tree by random clustering and return - return cluster(q, trees, 0.0, seq_len); -} diff --git a/man/pipe.Rd b/man/pipe.Rd deleted file mode 100644 index a648c29..0000000 --- a/man/pipe.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils-pipe.R -\name{\%>\%} -\alias{\%>\%} -\title{Pipe operator} -\usage{ -lhs \%>\% rhs -} -\arguments{ -\item{lhs}{A value or the magrittr placeholder.} - -\item{rhs}{A function call using the magrittr semantics.} -} -\value{ -The result of calling \code{rhs(lhs)}. -} -\description{ -See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. -} -\keyword{internal} diff --git a/man/tp_check_input.Rd b/man/tp_check_input.Rd deleted file mode 100644 index d13d516..0000000 --- a/man/tp_check_input.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/checker.R -\name{tp_check_input} -\alias{tp_check_input} -\title{Check input for inference with TreePPL} -\usage{ -tp_check_input(model, data) -} -\arguments{ -\item{model}{A TreePPL model (S3)} - -\item{data}{A list with each item name accordingly to the data structure in the -model.} -} -\value{ -... -} -\description{ -This function checks the input for \code{tp_go()}. -} diff --git a/man/tp_compile.Rd b/man/tp_compile.Rd index f5adb6e..922289a 100644 --- a/man/tp_compile.Rd +++ b/man/tp_compile.Rd @@ -1,108 +1,47 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/runner.R +% Please edit documentation in R/compile.R \name{tp_compile} \alias{tp_compile} -\title{Compile for \code{\link[=tp_run]{tp_run()}}} +\title{Compile a TreePPL model and create inference machinery} \usage{ tp_compile( - model_file_name = "tmp_model_file", - samples = 1000, - seed = NULL, - method = "smc-bpf", - align = FALSE, - cps = "none", - delay = NULL, - kernel = NULL, - mcmc_lw_gprob = NULL, - pmcmc_particles = NULL, - prune = FALSE, - subsample = NULL, - resample = NULL + model, + method, + iterations = NULL, + particles = NULL, + dir = NULL, + output = NULL, + ... ) } \arguments{ -\item{model_file_name}{a character vector giving a model name.} +\item{model}{One of tree options: +\itemize{ +\item The full path of the model file that contains the TreePPL code, OR +\item A string with the name of a model supported by treepplr +(see \code{\link[=tp_model_library]{tp_model_library()}}), OR +\item A string containing the entire TreePPL code. +}} -\item{samples}{a \link[base:integer]{base::integer} giving the number of samples (mcmc) or -particules (smc).} +\item{method}{Inference method to be used. See tp_compile_options() +for all supported methods.} -\item{seed}{a \link[base:numeric]{base::numeric} to use as a random seed.} +\item{iterations}{The number of MCMC iterations to be run.} -\item{method}{a character vector giving the inference method name.} +\item{particles}{The number of SMC particles to be run.} -\item{align}{a \link[base:logical]{base::logical} to tell if need to align the model.} +\item{dir}{The directory where you want to save the executable. Default is +\code{\link[base:tempfile]{base::tempdir()}}} -\item{cps}{a character vector giving the configuration of CPS transformation.} +\item{output}{Complete path to the compiled TreePPL program that will be +created. Default is dir/\if{html}{\out{}}.exe} -\item{delay}{a character vector giving the configuration of delayed sampling.} - -\item{kernel}{a \link[base:numeric]{base::numeric} value giving the driftScale for driftKernel -in MCMC.} - -\item{mcmc_lw_gprob}{a \link[base:numeric]{base::numeric} probability of performing a global -MCMC step.} - -\item{pmcmc_particles}{a \link[base:integer]{base::integer} number of particles for the smc -proposal computation} - -\item{prune}{a \link[base:logical]{base::logical} to tell if the model will try to be pruned.} - -\item{subsample}{a \link[base:integer]{base::integer} number of draw to subsample from the -posterior distribution.} - -\item{resample}{a character vector giving the selected resample placement -method.} +\item{...}{See tp_compile_options() for all supported arguments.} } \value{ -The R's \code{\link[base:tempfile]{base::tempdir()}} whreŕe the compile file is stored. +The path for the compiled TreePPL program. } \description{ -\code{tp_compile} compile a TreePPL model to use by \link{tp_run}. -} -\details{ -\code{model_file_name} : a character vector giving to \link{tp_treeppl} as -a model name. Use a \link{tp_stored_data} name if you have already -write your model with \link{tp_treeppl}. - -\code{seed} : The random seed to use. Using 'NULL' initialized randomly. - -\code{method} : Inference method to be used. The selected inference method. -The supported methods are: is-lw, smc-bpf, smc-apf, mcmc-lightweight, -mcmc-trace, mcmc-naive, pmcmc-pimh. - -The following options are highly dependable of the method used. -Check [not implemented yet] for more information. - -\code{align} : Whether or not to align the model for certain inference algorithms. - -\code{cps} : Configuration of CPS transformation (only applicable to certain -inference algorithms). The supported options are: none, partial, and full. - -\code{delay} : The model is transformed to an efficient representation if -possible. The supported options are: static or dynamic. Use 'NULL' to ignore. - -\code{kernel} : The value of the driftScale for driftKernel in MCMC. Use 'NULL' -to ignore. Use in conjuction with \code{method} mcmc-lightweight". -Use 'NULL' to ignore - -\code{mcmc_lw_gprob} : The probability of performing a global MH step -(non-global means only modify a single sample in the previous trace). -Use in conjuction with \code{method} mcmc-lightweight". Use 'NULL' to ignore - -\code{pmcmc_particles} : The number of particles for the smc proposal computation. -This option is used if one of the following methods are used: pmcmc-*. -Use 'NULL' to ignore - -\code{prune} : The model is pruned if possible. - -\code{subsample} : The number of draw to subsample from the posterior -distribution. Use in conjuction with \code{method} smc-apf or smc-bpf. -Use 'NULL' to ignore. - -\code{resample}: The selected resample placement method, for inference algorithms -where applicable. The supported methods are: -likelihood (resample immediately after all likelihood updates), -align (resample after aligned likelihood updates, forces --align), -and manual (sample only at manually defined resampling locations). -Use 'NULL' to ignore. +\code{tp_compile} compile a TreePPL model and create inference machinery to be +used by \link{tp_run}. } diff --git a/man/tp_compile_options.Rd b/man/tp_compile_options.Rd new file mode 100644 index 0000000..dce5d89 --- /dev/null +++ b/man/tp_compile_options.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/compile.R +\name{tp_compile_options} +\alias{tp_compile_options} +\title{Options that can be passed to TreePPL compiler} +\usage{ +tp_compile_options() +} +\value{ +A string with the output from the compiler's help +} +\description{ +Options that can be passed to TreePPL compiler +} diff --git a/man/tp_data.Rd b/man/tp_data.Rd index cdec0cc..b02c9a4 100644 --- a/man/tp_data.Rd +++ b/man/tp_data.Rd @@ -1,24 +1,48 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/getter.R +% Please edit documentation in R/data.R \name{tp_data} \alias{tp_data} \title{Import data for TreePPL program} \usage{ -tp_data(data_input) +tp_data(data_input, data_file_name = "tmp_data_file", dir = tp_tempdir()) } \arguments{ -\item{data_input}{One of tree options: +\item{data_input}{One of the following options: \itemize{ -\item The full path of the JSON file that contains the data, OR -\item A string with the name of a model supported by treepplr -(see \code{\link[=tp_model_names]{tp_model_names()}}), OR -\item A list (or structured list) containing TreePPL data. +\item A named list (or structured list) containing TreePPL data, OR +\item The full path of a multiple sequence alignment in fasta (.fasta, .fas) +or nexus (.nexus, .nex) format, OR +\item For test data, a string with the name of a model supported by treepplr +(see \code{\link[=tp_model_library]{tp_model_library()}}). }} + +\item{data_file_name}{An optional name for the file created. Ignored if \code{data_input} +is the name of a model from the TreePPL library.} + +\item{dir}{The directory where you want to save the data file in JSON format. +Default is \code{\link[base:tempfile]{base::tempdir()}}. Ignored if \code{data_input} is the name of a model +from the TreePPL library.} } \value{ -a list, see \code{\link[=tp_check_input]{tp_check_input()}} for further details. +The path for the data file that will be used by \code{\link[=tp_run]{tp_run()}}. } \description{ -\code{tp_data} takes data and prepares it to be used by -\code{\link[=tp_treeppl]{tp_treeppl()}}. +Prepare data input for \code{\link[=tp_run]{tp_run()}}. +} +\details{ +\code{data_input}: The name of each list element has to match the name of a model +input, which is defined in the TreePPL model code. +} +\examples{ +\dontrun{ +# Example using a model name supported by TreePPL +input <- tp_data("tree_inference") +input + +# Example using an internal FASTA file (same input data as before, but in fasta format) +fasta_file <- system.file("extdata", "tree_inference.fasta", package = "treepplr") +input <- tp_data(fasta_file) +input +} + } diff --git a/man/tp_expected_input.Rd b/man/tp_expected_input.Rd new file mode 100644 index 0000000..a312c05 --- /dev/null +++ b/man/tp_expected_input.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\name{tp_expected_input} +\alias{tp_expected_input} +\title{List expected input variables for a model} +\usage{ +tp_expected_input(model) +} +\arguments{ +\item{model}{A TreePPL model} +} +\value{ +The expected input data for a given TreePPL model. +} +\description{ +List expected input variables for a model +} diff --git a/man/tp_fp_fetch.Rd b/man/tp_fp_fetch.Rd deleted file mode 100644 index 482ca9b..0000000 --- a/man/tp_fp_fetch.Rd +++ /dev/null @@ -1,11 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{tp_fp_fetch} -\alias{tp_fp_fetch} -\title{Fetch the latest version of treeppl} -\usage{ -tp_fp_fetch() -} -\description{ -Fetch the latest version of treeppl -} diff --git a/man/tp_json_to_phylo.Rd b/man/tp_json_to_phylo.Rd index 63f4c61..b966027 100644 --- a/man/tp_json_to_phylo.Rd +++ b/man/tp_json_to_phylo.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tree_convertion.R +% Please edit documentation in R/data.R \name{tp_json_to_phylo} \alias{tp_json_to_phylo} \title{Convert TreePPL multi-line JSON to R phylo/multiPhylo object with associated @@ -11,7 +11,7 @@ tp_json_to_phylo(json_out) \item{json_out}{One of two options: \itemize{ \item A list of TreePPL output in parsed JSON format (output from -\code{\link[=tp_treeppl]{tp_treeppl()}}), OR +\code{\link[=tp_run]{tp_run()}}), OR \item The full path of the json file containing the TreePPL output. }} } diff --git a/man/tp_list.Rd b/man/tp_list.Rd index 398c14d..f6d953e 100644 --- a/man/tp_list.Rd +++ b/man/tp_list.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R +% Please edit documentation in R/data.R \name{tp_list} \alias{tp_list} \title{Create a flat list} diff --git a/man/tp_mcmc_convergence.Rd b/man/tp_mcmc_convergence.Rd new file mode 100644 index 0000000..9ac120a --- /dev/null +++ b/man/tp_mcmc_convergence.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/post_treatment.R +\name{tp_mcmc_convergence} +\alias{tp_mcmc_convergence} +\title{Check for convergence across multiple MCMC runs.} +\usage{ +tp_mcmc_convergence(treeppl_out) +} +\arguments{ +\item{treeppl_out}{a data frame outputted by \code{\link[=tp_parse_mcmc]{tp_parse_mcmc()}}.} +} +\value{ +Gelman and Rubin's convergence diagnostic +} +\description{ +Check for convergence across multiple MCMC runs. +} diff --git a/man/tp_model.Rd b/man/tp_model.Rd index 31e4ced..7d9183e 100644 --- a/man/tp_model.Rd +++ b/man/tp_model.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/getter.R +% Please edit documentation in R/compile.R \name{tp_model} \alias{tp_model} \title{Import a TreePPL model} @@ -11,16 +11,14 @@ tp_model(model_input) \itemize{ \item The full path of the model file that contains the TreePPL code, OR \item A string with the name of a model supported by treepplr -(see \code{\link[=tp_model_names]{tp_model_names()}}), OR +(see \code{\link[=tp_model_library]{tp_model_library()}}), OR \item A string containing the entire TreePPL code. }} } \value{ -A TreePPL model (S3). A structured list with the -string containing the TreePPL model and a class -(\code{\link[=tp_model_names]{tp_model_names()}} or "custom") +The path to the TreePPL model file } \description{ \code{tp_model} takes TreePPL code and prepares it to be used by -\code{\link[=tp_treeppl]{tp_treeppl()}}. +\code{\link[=tp_compile]{tp_compile()}}. } diff --git a/man/tp_model_library.Rd b/man/tp_model_library.Rd new file mode 100644 index 0000000..5b327e9 --- /dev/null +++ b/man/tp_model_library.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{tp_model_library} +\alias{tp_model_library} +\title{Model names supported by treepplr} +\usage{ +tp_model_library() +} +\value{ +A list of model names. +} +\description{ +Provides a list of all models in the TreePPL model library. +} diff --git a/man/tp_model_names.Rd b/man/tp_model_names.Rd deleted file mode 100644 index c9ab0bc..0000000 --- a/man/tp_model_names.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{tp_model_names} -\alias{tp_model_names} -\title{Model names supported by treepplr} -\usage{ -tp_model_names() -} -\value{ -A list of model names. -} -\description{ -Provides a list of all model names supported by treepplr. -The names can also be used to find data for these models -(see \link{tp_data}). -} diff --git a/man/tp_parse_host_rep.Rd b/man/tp_parse_host_rep.Rd index fb84097..66f7df7 100644 --- a/man/tp_parse_host_rep.Rd +++ b/man/tp_parse_host_rep.Rd @@ -8,7 +8,7 @@ tp_parse_host_rep(treeppl_out) } \arguments{ \item{treeppl_out}{a character vector giving the TreePPL json output -produced by \link{tp_treeppl}.} +produced by \link{tp_run}.} } \value{ A list (n = n_runs) of data frames with the output from inference diff --git a/man/tp_parse.Rd b/man/tp_parse_mcmc.Rd similarity index 52% rename from man/tp_parse.Rd rename to man/tp_parse_mcmc.Rd index da161f8..1922c9f 100644 --- a/man/tp_parse.Rd +++ b/man/tp_parse_mcmc.Rd @@ -1,18 +1,18 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/post_treatment.R -\name{tp_parse} -\alias{tp_parse} -\title{Parse simple TreePPL json output} +\name{tp_parse_mcmc} +\alias{tp_parse_mcmc} +\title{Parse simple TreePPL json MCMC output} \usage{ -tp_parse(treeppl_out) +tp_parse_mcmc(treeppl_out) } \arguments{ \item{treeppl_out}{a character vector giving the TreePPL json output -produced by \link{tp_treeppl}.} +produced by \link{tp_run} using an MCMC method.} } \value{ A data frame with the output from inference in TreePPL. } \description{ -\code{tp_parse} takes TreePPL json output and returns a data.frame +\code{tp_parse_mcmc} takes TreePPL json MCMC output and returns a data.frame } diff --git a/man/tp_parse_smc.Rd b/man/tp_parse_smc.Rd new file mode 100644 index 0000000..6db9ed1 --- /dev/null +++ b/man/tp_parse_smc.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/post_treatment.R +\name{tp_parse_smc} +\alias{tp_parse_smc} +\title{Parse simple TreePPL json SMC output} +\usage{ +tp_parse_smc(treeppl_out) +} +\arguments{ +\item{treeppl_out}{a character vector giving the TreePPL json output +produced by \link{tp_run} using an SMC method.} +} +\value{ +A data frame with the output from inference in TreePPL. +} +\description{ +\code{tp_parse_smc} takes TreePPL json SMC output and returns a data.frame +} +\details{ +Particles with -Inf weight are removed. +} diff --git a/man/tp_phylo_to_tpjson.Rd b/man/tp_phylo_to_tpjson.Rd index 1677cef..bad16bb 100644 --- a/man/tp_phylo_to_tpjson.Rd +++ b/man/tp_phylo_to_tpjson.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tree_convertion.R +% Please edit documentation in R/data.R \name{tp_phylo_to_tpjson} \alias{tp_phylo_to_tpjson} \title{Convert phylo obj to TreePPL tree} diff --git a/man/tp_phylo_to_tppl_tree.Rd b/man/tp_phylo_to_tppl_tree.Rd index 57e25ef..08427d6 100644 --- a/man/tp_phylo_to_tppl_tree.Rd +++ b/man/tp_phylo_to_tppl_tree.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tree_convertion.R +% Please edit documentation in R/data.R \name{tp_phylo_to_tppl_tree} \alias{tp_phylo_to_tppl_tree} \title{Convert phylo to a tppl_tree} diff --git a/man/tp_run.Rd b/man/tp_run.Rd index cfc0852..c00624c 100644 --- a/man/tp_run.Rd +++ b/man/tp_run.Rd @@ -1,38 +1,65 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/runner.R +% Please edit documentation in R/run.R \name{tp_run} \alias{tp_run} \title{Run a TreePPL program} \usage{ tp_run( - model_file_name = "tmp_model_file", - data_file_name = "tmp_data_file", - n_runs = 1 + compiled_model, + data, + n_runs = NULL, + n_sweeps = NULL, + dir = NULL, + out_file_name = "out", + ... ) } \arguments{ -\item{model_file_name}{a character vector giving a model name.} +\item{compiled_model}{a \link[base:character]{base::character} with the full path to the compiled model +outputted by \link{tp_compile}.} -\item{data_file_name}{a character vector giving a data name.} +\item{data}{a \link[base:character]{base::character} with the full path to the data file in TreePPL +JSON format (as outputted by \link{tp_data}).} -\item{n_runs}{a \link[base:integer]{base::integer} giving the number of runs (mcmc)/sweaps (smc).} +\item{n_runs}{When using MCMC, a \link[base:integer]{base::integer} giving the number of runs to be done.} + +\item{n_sweeps}{When using SMC, a \link[base:integer]{base::integer} giving the number of SMC sweeps to be done.} + +\item{dir}{a \link[base:character]{base::character} with the full path to the directory where you +want to save the output. Default is \code{\link[base:tempfile]{base::tempdir()}}.} + +\item{out_file_name}{a \link[base:character]{base::character} with the name of the output file in +JSON format. Default is "out".} + +\item{...}{See \link{tp_run_options} for all supported arguments.} } \value{ A list of TreePPL output in parsed JSON format. } \description{ -\code{tp_treeppl} execute TreePPL and return TreePPL output (string JSON format). +Run TreePPL and return output. } -\details{ -#' +\examples{ +\dontrun{ +# When using SMC +# compile model and create SMC inference machinery +exe_path <- tp_compile(model = "coin", method = "smc-bpf", particles = 2000) -\code{model_file_name} : a character vector giving to \link{tp_treeppl} as -a model name. Use a \link{tp_stored_data} name if you have already -write your model with \link{tp_treeppl}. +# prepare data +data_path <- tp_data(data_input = "coin") -\code{data_file_name} : a character vector giving to \link{tp_treeppl} -a data name. Use a \link{tp_stored_data} name if you have already write -your data with \link{tp_treeppl}. +# run TreePPL +result <- tp_run(exe_path, data_path, n_sweeps = 2) -\code{n_runs} : The number of run (mcmc) / sweap (smc) used for the inference. + +# When using MCMC +# compile model and create MCMC inference machinery +exe_path <- tp_compile(model = "coin", method = "mcmc-naive", iterations = 2000) + +# prepare data +data_path <- tp_data(data_input = "coin") + +# run TreePPL +result <- tp_run(exe_path, data_path, n_runs = 2) +} } diff --git a/man/tp_run_options.Rd b/man/tp_run_options.Rd new file mode 100644 index 0000000..f31b7d4 --- /dev/null +++ b/man/tp_run_options.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/run.R +\name{tp_run_options} +\alias{tp_run_options} +\title{Options that can be passed to a TreePPL program} +\usage{ +tp_run_options() +} +\value{ +A string with the output from the executable's help +} +\description{ +Options that can be passed to a TreePPL program +} diff --git a/man/tp_smc_convergence.Rd b/man/tp_smc_convergence.Rd index 732c60c..dc4df43 100644 --- a/man/tp_smc_convergence.Rd +++ b/man/tp_smc_convergence.Rd @@ -2,17 +2,16 @@ % Please edit documentation in R/post_treatment.R \name{tp_smc_convergence} \alias{tp_smc_convergence} -\title{Check for convergence across multiple SMC sweeps/runs} +\title{Check for convergence across multiple SMC sweeps.} \usage{ tp_smc_convergence(treeppl_out) } \arguments{ -\item{treeppl_out}{a character vector giving the TreePPL json output -produced by \link{tp_treeppl}.} +\item{treeppl_out}{a data frame outputted by \code{\link[=tp_parse_smc]{tp_parse_smc()}}.} } \value{ Variance in the normalizing constants across SMC sweeps. } \description{ -Check for convergence across multiple SMC sweeps/runs +Check for convergence across multiple SMC sweeps. } diff --git a/man/tp_stored_compiled.Rd b/man/tp_stored_compiled.Rd deleted file mode 100644 index e7f34ec..0000000 --- a/man/tp_stored_compiled.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{tp_stored_compiled} -\alias{tp_stored_compiled} -\title{List of compiled models in \link[base:tempfile]{base::tempdir}} -\usage{ -tp_stored_compiled() -} -\value{ -A list of compiled model file names. -} -\description{ -Provides a list of all the compiled model file names currently -stored in \link[base:tempfile]{base::tempdir}. -} diff --git a/man/tp_stored_data.Rd b/man/tp_stored_data.Rd deleted file mode 100644 index 8340929..0000000 --- a/man/tp_stored_data.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{tp_stored_data} -\alias{tp_stored_data} -\title{Data file names stored by user in \link[base:tempfile]{base::tempdir} using -\link{tp_write}} -\usage{ -tp_stored_data() -} -\value{ -A list of data file names. -} -\description{ -Provides a list of all the data file names currently -stored in \link[base:tempfile]{base::tempdir}. To verify if there is a -matching model file, see \link{tp_stored_model}. -} diff --git a/man/tp_stored_model.Rd b/man/tp_stored_model.Rd deleted file mode 100644 index 0c4e3ee..0000000 --- a/man/tp_stored_model.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/utils.R -\name{tp_stored_model} -\alias{tp_stored_model} -\title{Model file names stored by user in \link[base:tempfile]{base::tempdir} using -\link{tp_write}} -\usage{ -tp_stored_model() -} -\value{ -A list of model file names. -} -\description{ -Provides a list of all the model file names currently -stored in \link[base:tempfile]{base::tempdir}. To verify if there is a -matching data file, see \link{tp_stored_data}. -} diff --git a/man/tp_treeppl.Rd b/man/tp_treeppl.Rd deleted file mode 100644 index f43a9c7..0000000 --- a/man/tp_treeppl.Rd +++ /dev/null @@ -1,153 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/runner.R -\name{tp_treeppl} -\alias{tp_treeppl} -\title{Compile and run a TreePPL program} -\usage{ -tp_treeppl( - model = NULL, - model_file_name = "tmp_model_file", - data = NULL, - data_file_name = "tmp_data_file", - compile_model = TRUE, - samples = 1000, - seed = NULL, - n_runs = 1, - method = "smc-bpf", - align = FALSE, - cps = "none", - delay = NULL, - kernel = NULL, - mcmc_lw_gprob = NULL, - pmcmc_particles = NULL, - prune = FALSE, - subsample = NULL, - resample = NULL -) -} -\arguments{ -\item{model}{a TreePPL model (S3).} - -\item{model_file_name}{a character vector giving a model name.} - -\item{data}{a phyjson object (S3).} - -\item{data_file_name}{a character vector giving a data name.} - -\item{compile_model}{a \link[base:logical]{base::logical} to tell if the model need to be -compile} - -\item{samples}{a \link[base:integer]{base::integer} giving the number of samples (mcmc) or -particules (smc).} - -\item{seed}{a \link[base:numeric]{base::numeric} to use as a random seed.} - -\item{n_runs}{a \link[base:integer]{base::integer} giving the number of run (mcmc)/sweap (smc).} - -\item{method}{a character vector giving the inference method name.} - -\item{align}{a \link[base:logical]{base::logical} to tell if need to align the model.} - -\item{cps}{a character vector giving the configuration of CPS transformation.} - -\item{delay}{a character vector giving the configuration of delayed sampling.} - -\item{kernel}{a \link[base:numeric]{base::numeric} value giving the driftScale for driftKernel -in MCMC.} - -\item{mcmc_lw_gprob}{a \link[base:numeric]{base::numeric} probability of performing a global -MCMC step.} - -\item{pmcmc_particles}{a \link[base:integer]{base::integer} number of particles for the smc -proposal computation} - -\item{prune}{a \link[base:logical]{base::logical} to tell if the model will try to be pruned.} - -\item{subsample}{a \link[base:integer]{base::integer} number of draw to subsample from the -posterior distribution.} - -\item{resample}{a character vector giving the selected resample placement -method} -} -\value{ -A list of TreePPL output in parsed JSON format. -} -\description{ -\code{tp_treeppl} execute TreePPL and return TreePPL output (string JSON format). -} -\details{ -This function takes TreePPL object (S3) and phyjson object (S3), -compile TreePPL model, run it with data and returning TreePPL output. - -TreePPL need to be install on your computer and the PATH set for R/RSTUDIO -(see \href{https://treeppl.org/docs/Howtos}{install} manual). -The executable and the output files will be written in R's \code{\link[base:tempfile]{base::tempdir()}}. - -\code{model} : A TreePPL model (S3), see \link{tp_model} for further details. -Use 'NULL' if you have previously provide an model. Check already provide -model with \link{tp_stored_model}. - -\code{model_file_name} : a character vector giving to \link{tp_treeppl} as -a model name. Use a \link{tp_stored_data} name if you have already -write your model with \link{tp_treeppl}. - -\code{data} : A list, see \code{\link[=tp_check_input]{tp_check_input()}} for further -details. Use 'NULL' if you have previously provide data. Check already -provide data with \link{tp_stored_data}. - -\code{data_file_name} : a character vector giving to \link{tp_treeppl} -a data name. Use a \link{tp_stored_data} name if you have already write -your data with \link{tp_treeppl}. - -\code{compile_model} : a \link[base:logical]{base::logical} telling if the model need to be compiled. -Can be use to avoid to compile a model again in R's \code{\link[base:tempfile]{base::tempdir()}} -if you have already compile a \code{model} in a previous call of -\link{tp_treeppl}. Check already compile model -with \link{tp_stored_compiled}. - -\code{samples} : The number of samples (mcmc) / particules (smc) during inference. - -\code{seed} : The random seed to use. Using 'NULL' initialized randomly. - -\code{n_runs} : The number of run (mcmc) / sweap (smc) used for the inference. - -\code{method} : Inference method to be used. The selected inference method. -The supported methods are: is-lw, smc-bpf, smc-apf, mcmc-lightweight, -mcmc-trace, mcmc-naive, pmcmc-pimh. - -The following options are highly dependable of the method used. -Check [not implemented yet] for more information. - -\code{align} : Whether or not to align the model for certain inference algorithms. - -\code{cps} : Configuration of CPS transformation (only applicable to certain -inference algorithms). The supported options are: none, partial, and full. - -\code{delay} : The model is transformed to an efficient representation if -possible. The supported options are: static or dynamic. Use 'NULL' to ignore. - -\code{kernel} : The value of the driftScale for driftKernel in MCMC. Use 'NULL' -to ignore. Use in conjuction with \code{method} mcmc-lightweight". -Use 'NULL' to ignore - -\code{mcmc_lw_gprob} : The probability of performing a global MH step -(non-global means only modify a single sample in the previous trace). -Use in conjuction with \code{method} mcmc-lightweight". Use 'NULL' to ignore - -\code{pmcmc_particles} : The number of particles for the smc proposal computation. -This option is used if one of the following methods are used: pmcmc-*. -Use 'NULL' to ignore - -\code{prune} : The model is pruned if possible. - -\code{subsample} : The number of draw to subsample from the posterior -distribution. Use in conjuction with \code{method} smc-apf or smc-bpf. -Use 'NULL' to ignore. - -\code{resample}: The selected resample placement method, for inference algorithms -where applicable. The supported methods are: -likelihood (resample immediately after all likelihood updates), -align (resample after aligned likelihood updates, forces --align), -and manual (sample only at manually defined resampling locations). -Use 'NULL' to ignore. -} diff --git a/man/tp_treeppl_json.Rd b/man/tp_treeppl_json.Rd index 2cc5ebd..557b778 100644 --- a/man/tp_treeppl_json.Rd +++ b/man/tp_treeppl_json.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tree_convertion.R +% Please edit documentation in R/data.R \name{tp_treeppl_json} \alias{tp_treeppl_json} \title{Convert a tppl_tree to TreePPL json str} diff --git a/man/tp_write.Rd b/man/tp_write.Rd deleted file mode 100644 index 0f2fa39..0000000 --- a/man/tp_write.Rd +++ /dev/null @@ -1,45 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/runner.R -\name{tp_write} -\alias{tp_write} -\title{Prepare input for \code{\link[=tp_compile]{tp_compile()}}} -\usage{ -tp_write( - model = NULL, - model_file_name = "tmp_model_file", - data = NULL, - data_file_name = "tmp_data_file" -) -} -\arguments{ -\item{model}{a TreePPL model (S3).} - -\item{model_file_name}{a character vector giving a model name.} - -\item{data}{a phyjson object (S3).} - -\item{data_file_name}{a character vector giving a data name.} -} -\description{ -\code{tp_write} writes an JSON file to be used by \code{\link[=tp_compile]{tp_compile()}}. -} -\details{ -This function takes TreePPL object (S3) and phyjson object (S3) and write -them in \code{\link[base:tempfile]{base::tempdir()}}. - -\code{model} : A TreePPL model (S3), see \link{tp_model} for further details. -Use 'NULL' if you have previously provide an model. Check already provide -model with \link{tp_stored_model}. - -\code{model_file_name} : a character vector giving to \link{tp_treeppl} as -a model name. Use a \link{tp_stored_data} name if you have already -write your model with \link{tp_write}. - -\code{data} : A list, see \code{\link[=tp_check_input]{tp_check_input()}} for further -details. Use 'NULL' if you have previously provide data. Check already -provide data with \link{tp_stored_data}. - -\code{data_file_name} : a character vector giving to \link{tp_treeppl} -a data name. Use a \link{tp_stored_data} name if you have already write -your data with \link{tp_write}. -} diff --git a/man/tp_write_data.Rd b/man/tp_write_data.Rd new file mode 100644 index 0000000..19bde93 --- /dev/null +++ b/man/tp_write_data.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\name{tp_write_data} +\alias{tp_write_data} +\title{Write data to file} +\usage{ +tp_write_data(data_list, data_file_name = "tmp_data_file", dir = tp_tempdir()) +} +\arguments{ +\item{data_list}{A named list of data input} + +\item{data_file_name}{An optional name for the file created} + +\item{dir}{The directory where you want to save the data file in JSON format. +Default is \code{\link[base:tempfile]{base::tempdir()}}.} +} +\value{ +The path to the created file +} +\description{ +Write data to file +} diff --git a/man/tp_write_model.Rd b/man/tp_write_model.Rd new file mode 100644 index 0000000..a7c07a5 --- /dev/null +++ b/man/tp_write_model.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/compile.R +\name{tp_write_model} +\alias{tp_write_model} +\title{Write out a custom model to tp_tempdir()} +\usage{ +tp_write_model(model, model_file_name = "tmp_model_file") +} +\arguments{ +\item{model}{A string containing the entire TreePPL code.} + +\item{model_file_name}{An optional name to the file created} +} +\value{ +The path to the file created +} +\description{ +Write out a custom model to tp_tempdir() +} diff --git a/man/tree_age_cumul.Rd b/man/tree_age_cumul.Rd index 7835d47..ef8f304 100644 --- a/man/tree_age_cumul.Rd +++ b/man/tree_age_cumul.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tree_convertion.R +% Please edit documentation in R/data.R \name{tree_age_cumul} \alias{tree_age_cumul} \title{Calculate age in a tppl_tree} diff --git a/tests/testthat/test-compile.R b/tests/testthat/test-compile.R new file mode 100644 index 0000000..26e125e --- /dev/null +++ b/tests/testthat/test-compile.R @@ -0,0 +1,94 @@ +temp_dir <- treepplr::tp_tempdir(temp_dir = NULL) +setwd(temp_dir) +require(testthat) +require(crayon) + +cat(crayon::yellow("\nTest-compile : Compilation.\n")) + + +test_that("Test-compile_1a : tp_compile SMC", { + cat("\tTest-compile_1a : tp_compile SMC \n") + + model <- treepplr::tp_model("coin") + + treepplr::tp_compile(model, method = "smc-bpf", particles = 10) + + expect_no_error(readBin(paste0(temp_dir, "coin.exe"), "raw", 10e6)) +}) + + +test_that("Test-compile_1b : tp_compile MCMC", { + cat("\tTest-compile_1b : tp_compile MCMC \n") + + model_right <- treepplr::tp_model("coin") + + treepplr::tp_compile(model_right, method = "mcmc-lightweight", iterations = 10) + + expect_no_error(readBin(paste0(temp_dir, "coin.exe"), "raw", 10e6)) + +}) + + +test_that("Test-compile_2a : tp_model model name", { + cat("\tTest-compile_2a : tp_model\n") + + model <- treepplr::tp_model("coin") + + version <- list.files("/tmp", pattern = "treeppl", full.names = FALSE) + version <- sort(version, decreasing = TRUE)[1] + + model_right = system(paste0("find /tmp/", version," -name coin.tppl"), + intern = T) + + names(model_right) <- "coin" + + expect_equal(model, model_right) +}) + + +test_that("Test-compile_2b : tp_model model path ", { + cat("\tTest-compile_2b : tp_model\n") + + version <- list.files("/tmp", pattern = "treeppl", full.names = FALSE) + version <- sort(version, decreasing = TRUE)[1] + + model_right = system(paste0("find /tmp/", version," -name coin.tppl"), + intern = T) + model <- treepplr::tp_model(model_right) + names(model_right) <- "custom_model" + + expect_equal(model, model_right) +}) + + +test_that("Test-compile_3a : tp_write path", { + cat("\tTest-compile_3a : tp_write\n") + + path <- treepplr::tp_write_model("T : bla bla bla") + expect_true(file.exists(path)) + +}) + + +test_that("Test-compile_3b : tp_write content", { + cat("\tTest-compile_3b : tp_write\n") + + content_right <- "M : bla bla bla" + path <- treepplr::tp_write_model(content_right) + + content <- readLines(path, warn = FALSE) + expect_equal(content_right, content) + +}) + + +test_that("Test-compile_4 : tp_model model string ", { + cat("\tTest-compile_4 : tp_model\n") + + model_string <- "S : bla bla bla" + model <- treepplr::tp_model(model_string) + model_right <- paste0(temp_dir, "tmp_model_file.tppl") + names(model_right) <- "custom_model" + + expect_equal(model, model_right) +}) diff --git a/tests/testthat/test-data.R b/tests/testthat/test-data.R new file mode 100644 index 0000000..ec66b3a --- /dev/null +++ b/tests/testthat/test-data.R @@ -0,0 +1,72 @@ +temp_dir <- treepplr::tp_tempdir(temp_dir = NULL) +setwd(temp_dir) +require(testthat) +require(crayon) + +cat(crayon::yellow("\nTest-data : Import and convert data.\n")) + +test_that("Test-data_1a : tp_data name", { + cat("\tTest-data_1a \n") + + version <- list.files("/tmp", pattern = "treeppl", full.names = FALSE) + version <- sort(version, decreasing = TRUE)[1] + + data_right <- system(paste0("find /tmp/", version," -name testdata_coin.json"), + intern = T) + + data <- treepplr::tp_data("coin") + + expect_equal(data, data_right) + +}) + + +test_that("Test-data_1b : tp_data content", { + cat("\tTest-data_1b \n") + + data_right <- + tp_list(coinflips = + c( + TRUE, + TRUE, + TRUE, + FALSE, + TRUE, + FALSE, + FALSE, + TRUE, + TRUE, + FALSE, + FALSE, + FALSE, + TRUE, + FALSE, + TRUE, + FALSE, + FALSE, + TRUE, + FALSE, + FALSE + ) + ) + + path <- treepplr::tp_data("coin") + content <- readLines(path, warn = FALSE) + data <- jsonlite::fromJSON(content) + + expect_equal(data_right, data) + +}) + + +test_that("Test-data_2a : tp_write_data path", { + cat("\tTest-data_2a \n") + + path_right <- paste0(temp_dir, "tmp_data_file.json") + data <- list(test = "test_data") + data_list <- tp_list(list(data)) + path <- treepplr::tp_write_data(data_list) + + expect_equal(path, path_right) + +}) diff --git a/tests/testthat/test-getter.R b/tests/testthat/test-getter.R deleted file mode 100644 index 0527ecf..0000000 --- a/tests/testthat/test-getter.R +++ /dev/null @@ -1,174 +0,0 @@ -temp_dir <- treepplr::tp_tempdir(temp_dir = NULL) -setwd(temp_dir) -require(testthat) -require(crayon) - -cat(crayon::yellow("\nTest-getter : Import data and model.\n")) - -testthat::test_that("Test-getter_1a : tp_model model path", { - cat("\tTest-getter_1a : tp_model\n") - - model <- - tp_model(paste0( - system.file("extdata", package = "treepplr"), - treepplr:::sep(), - "coin.tppl" - )) - model_right <- - "/*\n * File: coin.tppl\n * Description: Simplest meaningful probabilistic program. Evaluates how likely it is that a coin is fair, given data.\n */\n\n/**\n * Conditions the likelihood of the computation\n * on an observed datapoint to come from a particular Bernoulli experiment\n * Parameters:\n * datapoint: Real\n * probability: Real in (0, 1), the probability of True in the Bernoulli experiment\n * Returns: nothing\n * Side-effects: reweighs the computation\n */\nfunction flip(datapoint: Bool, probability: Real) {\n observe datapoint ~ Bernoulli(probability);\n}\n\n/*\n * Model function\n * Data:\n * coinflips: Bool[]\n * Prior:\n * p ~ Beta(2, 2)\n * Posterior:\n * p | coinflips\n */\nmodel function coinModel(coinflips: Bool[]) => Real {\n // Uncomment if you want to test the input\n //printLn(\"Input:\");\n //let coinStr = apply(bool2string, coinflips);\n //printLn(join(coinStr));\n assume p ~ Beta(2.0, 2.0); // prior\n let n = length(coinflips);\n for i in 1 to n {\n flip(coinflips[i], p); // likelihood\n }\n return(p); // posterior\n}\n" - class(model_right) <- "custom" - - testthat::expect_equal(model, model_right) -}) - -testthat::test_that("Test-getter_1b : tp_model model name ", { - cat("\tTest-getter_1b : tp_model\n") - - model <- tp_model("coin") - model_right <- - "/*\n * File: coin.tppl\n * Description: Simplest meaningful probabilistic program. Evaluates how likely it is that a coin is fair, given data.\n */\n\n/**\n * Conditions the likelihood of the computation\n * on an observed datapoint to come from a particular Bernoulli experiment\n * Parameters:\n * datapoint: Real\n * probability: Real in (0, 1), the probability of True in the Bernoulli experiment\n * Returns: nothing\n * Side-effects: reweighs the computation\n */\nfunction flip(datapoint: Bool, probability: Real) {\n observe datapoint ~ Bernoulli(probability);\n}\n\n/*\n * Model function\n * Data:\n * coinflips: Bool[]\n * Prior:\n * p ~ Beta(2, 2)\n * Posterior:\n * p | coinflips\n */\nmodel function coinModel(coinflips: Bool[]) => Real {\n // Uncomment if you want to test the input\n //printLn(\"Input:\");\n //let coinStr = apply(bool2string, coinflips);\n //printLn(join(coinStr));\n assume p ~ Beta(2.0, 2.0); // prior\n let n = length(coinflips);\n for i in 1 to n {\n flip(coinflips[i], p); // likelihood\n }\n return(p); // posterior\n}\n" - class(model_right) <- "coin" - - testthat::expect_equal(model, model_right) -}) - -testthat::test_that("Test-getter_1c : tp_model model string ", { - cat("\tTest-getter_1c : tp_model\n") - - model_right <- - "/*\n * File: coin.tppl\n * Description: Simplest meaningful probabilistic program. Evaluates how likely it is that a coin is fair, given data.\n */\n\n/**\n * Conditions the likelihood of the computation\n * on an observed datapoint to come from a particular Bernoulli experiment\n * Parameters:\n * datapoint: Real\n * probability: Real in (0, 1), the probability of True in the Bernoulli experiment\n * Returns: nothing\n * Side-effects: reweighs the computation\n */\nfunction flip(datapoint: Bool, probability: Real) {\n observe datapoint ~ Bernoulli(probability);\n}\n\n/*\n * Model function\n * Data:\n * coinflips: Bool[]\n * Prior:\n * p ~ Beta(2, 2)\n * Posterior:\n * p | coinflips\n */\nmodel function coinModel(coinflips: Bool[]) => Real {\n // Uncomment if you want to test the input\n //printLn(\"Input:\");\n //let coinStr = apply(bool2string, coinflips);\n //printLn(join(coinStr));\n assume p ~ Beta(2.0, 2.0); // prior\n let n = length(coinflips);\n for i in 1 to n {\n flip(coinflips[i], p); // likelihood\n }\n return(p); // posterior\n}\n" - model <- tp_model(model_right) - class(model_right) <- "custom" - - testthat::expect_equal(model, model_right) -}) - -testthat::test_that("Test-getter_2a : tp_data data path", { - cat("\tTest-getter_2a : tp_data\n") - - data <- - tp_data(paste0( - system.file("extdata", package = "treepplr"), - treepplr:::sep(), - "coin.json" - )) - data_right <- - tp_list( - coinflips = c( - FALSE, - TRUE, - TRUE, - TRUE, - FALSE, - TRUE, - TRUE, - FALSE, - FALSE, - TRUE, - FALSE, - FALSE, - FALSE, - FALSE, - FALSE, - FALSE, - FALSE, - TRUE, - TRUE, - TRUE - ) - ) - - testthat::expect_equal(data, data_right) -}) - -testthat::test_that("Test-getter_2b : tp_data data name", { - cat("\tTest-getter_2b : tp_data\n") - - data <- tp_data("coin") - data_right <- - tp_list( - coinflips = c( - FALSE, - TRUE, - TRUE, - TRUE, - FALSE, - TRUE, - TRUE, - FALSE, - FALSE, - TRUE, - FALSE, - FALSE, - FALSE, - FALSE, - FALSE, - FALSE, - FALSE, - TRUE, - TRUE, - TRUE - ) - ) - - testthat::expect_equal(data, data_right) -}) - -testthat::test_that("Test-getter_2a : tp_data data string", { - cat("\tTest-getter_2c : tp_data\n") - - data <- - tp_data(list( - c( - FALSE, - TRUE, - TRUE, - TRUE, - FALSE, - TRUE, - TRUE, - FALSE, - FALSE, - TRUE, - FALSE, - FALSE, - FALSE, - FALSE, - FALSE, - FALSE, - FALSE, - TRUE, - TRUE, - TRUE - ) - )) - - data_right <- - tp_list( - c( - FALSE, - TRUE, - TRUE, - TRUE, - FALSE, - TRUE, - TRUE, - FALSE, - FALSE, - TRUE, - FALSE, - FALSE, - FALSE, - FALSE, - FALSE, - FALSE, - FALSE, - TRUE, - TRUE, - TRUE - ) - ) - - testthat::expect_equal(data, data_right) -}) - diff --git a/tests/testthat/test-run.R b/tests/testthat/test-run.R new file mode 100644 index 0000000..55aa6a7 --- /dev/null +++ b/tests/testthat/test-run.R @@ -0,0 +1,33 @@ +temp_dir <- treepplr::tp_tempdir(temp_dir = NULL) +setwd(temp_dir) +require(testthat) +require(crayon) + +cat(crayon::yellow("\nTest-run : Running TreePPL.\n")) + +test_that("Test-run_1a : tp_run", { + cat("\tTest-run_1a : tp_run \n") + + model <- treepplr::tp_model("coin") + exe <- treepplr::tp_compile(model, method = "smc-bpf", particles = 2) + data <- treepplr::tp_data("coin") + + treepplr::tp_run(exe, data, n_sweeps = 1) + + expect_no_error(readBin(paste0(temp_dir, "out.json"), "raw", 10e6)) + +}) + + +test_that("Test-run_1a : tp_run custom name", { + cat("\tTest-run_1a : tp_run \n") + + model <- treepplr::tp_model("coin") + exe <- treepplr::tp_compile(model, method = "smc-bpf", particles = 2) + data <- treepplr::tp_data("coin") + + treepplr::tp_run(exe, data, n_sweeps = 1, out_file_name = "test_out") + + expect_no_error(readBin(paste0(temp_dir, "test_out.json"), "raw", 10e6)) + +}) diff --git a/tests/testthat/test-runner.R b/tests/testthat/test-runner.R deleted file mode 100644 index 4da41da..0000000 --- a/tests/testthat/test-runner.R +++ /dev/null @@ -1,189 +0,0 @@ -temp_dir <- treepplr::tp_tempdir(temp_dir = NULL) -setwd(temp_dir) -require(testthat) -require(crayon) - -cat(crayon::yellow("\nTest-runner : Write, compile and run.\n")) - -testthat::test_that("Test-setter_0a: tp_write", { - cat("\tTest-setter_0a : tp_write\n") - - model_right <- - "/*\n * File: coin.tppl\n * Description: Simplest meaningful probabilistic program. Evaluates how likely it is that a coin is fair, given data.\n * Compilation:\n * tpplc models/lang/coin.tppl models/data/examples.mc out.mc && mi compile out.mc\n * Execution: ./out 100 1\n */\n\n/**\n * Conditions the likelihood of the computation \n * on an observed datapoint to come from a particular Bernoulli experiment \n * Parameters:\n * datapoint: Real\n * probability: Real in (0, 1), the probability of True in the Bernoulli experiment\n * Returns: nothing\n * Side-effects: reweighs the computation\n */\nfunction flip(datapoint: Bool, probability: Real) {\n observe datapoint ~ Bernoulli(probability);\n}\n\n/*\n * Model function\n * Data:\n * coinflips: Bool[]\n * Prior: \n * p ~ Beta(2, 2)\n * Posterior:\n * p | coinflips\n */\nmodel function coinModel(coinflips: Bool[]) => Real {\n // Uncomment if you want to test the input\n //printLn(\"Input:\");\n //let coinStr = apply(bool2string, coinflips);\n //printLn(join(coinStr));\n assume p ~ Beta(2.0, 2.0); // prior\n let n = length(coinflips);\n for i in 1 to n {\n flip(coinflips[i], p); // likelihood\n }\n return(p); // posterior\n}\n" - class(model_right) <- "custom" - data_right <- - list( - coinflips = c( - FALSE, - TRUE, - TRUE, - TRUE, - FALSE, - TRUE, - TRUE, - FALSE, - FALSE, - TRUE, - FALSE, - FALSE, - FALSE, - FALSE, - FALSE, - FALSE, - FALSE, - TRUE, - TRUE, - TRUE - ) - ) - - treepplr:::tp_write(model = model_right, data = data_right) - - model <- tp_model(paste0(temp_dir, "tmp_model_file.tppl")) - data <- tp_data(paste0(temp_dir, "tmp_data_file.json")) - - expect_equal(model, model_right) - expect_equal(data, data_right) -}) - -testthat::test_that("Test-runner_1a : tp_compile", { - cat("\tTest-runner_1a : tp_compile without option \n") - - model_right <- - "/*\n * File: coin.tppl\n * Description: Simplest meaningful probabilistic program. Evaluates how likely it is that a coin is fair, given data.\n * Compilation:\n * tpplc models/lang/coin.tppl models/data/examples.mc out.mc && mi compile out.mc\n * Execution: ./out 100 1\n */\n\n/**\n * Conditions the likelihood of the computation \n * on an observed datapoint to come from a particular Bernoulli experiment \n * Parameters:\n * datapoint: Real\n * probability: Real in (0, 1), the probability of True in the Bernoulli experiment\n * Returns: nothing\n * Side-effects: reweighs the computation\n */\nfunction flip(datapoint: Bool, probability: Real) {\n observe datapoint ~ Bernoulli(probability);\n}\n\n/*\n * Model function\n * Data:\n * coinflips: Bool[]\n * Prior: \n * p ~ Beta(2, 2)\n * Posterior:\n * p | coinflips\n */\nmodel function coinModel(coinflips: Bool[]) => Real {\n // Uncomment if you want to test the input\n //printLn(\"Input:\");\n //let coinStr = apply(bool2string, coinflips);\n //printLn(join(coinStr));\n assume p ~ Beta(2.0, 2.0); // prior\n let n = length(coinflips);\n for i in 1 to n {\n flip(coinflips[i], p); // likelihood\n }\n return(p); // posterior\n}\n" - class(model_right) <- "custom" - data_right <- list( - coinflips = c( - FALSE, - TRUE, - TRUE, - TRUE, - FALSE, - TRUE, - TRUE, - FALSE, - FALSE, - TRUE, - FALSE, - FALSE, - FALSE, - FALSE, - FALSE, - FALSE, - FALSE, - TRUE, - TRUE, - TRUE - ) - ) - model_data_right <- list(model = model_right, data = data_right) - - treepplr:::tp_write(model = model_right, data = data_right) - - treepplr:::tp_compile() - - expect_no_error(readBin(paste0(temp_dir, "tmp_model_file.exe"), "raw", 10e6)) -}) - -testthat::test_that("Test-runner_1b : tp_compile", { - cat("\tTest-runner_1b : tp_compile with option \n") - - model <- tp_model("coin") - data <- tp_data("coin") - - treepplr:::tp_write( - model = model, - model_file_name = "coin", - data = data, - data_file_name = "coin" - ) - treepplr:::tp_compile(model_file_name = "coin", - method = "smc-bpf") - - expect_no_error(readBin(paste0(temp_dir, "coin.exe"), "raw", 10e6)) - -}) - -testthat::test_that("Test-runner_2a : tp_run", { - cat("\tTest-runner_2a : tp_run without option \n") - - model_right <- - "/*\n * File: coin.tppl\n * Description: Simplest meaningful probabilistic program. Evaluates how likely it is that a coin is fair, given data.\n * Compilation:\n * tpplc models/lang/coin.tppl models/data/examples.mc out.mc && mi compile out.mc\n * Execution: ./out 100 1\n */\n\n/**\n * Conditions the likelihood of the computation \n * on an observed datapoint to come from a particular Bernoulli experiment \n * Parameters:\n * datapoint: Real\n * probability: Real in (0, 1), the probability of True in the Bernoulli experiment\n * Returns: nothing\n * Side-effects: reweighs the computation\n */\nfunction flip(datapoint: Bool, probability: Real) {\n observe datapoint ~ Bernoulli(probability);\n}\n\n/*\n * Model function\n * Data:\n * coinflips: Bool[]\n * Prior: \n * p ~ Beta(2, 2)\n * Posterior:\n * p | coinflips\n */\nmodel function coinModel(coinflips: Bool[]) => Real {\n // Uncomment if you want to test the input\n //printLn(\"Input:\");\n //let coinStr = apply(bool2string, coinflips);\n //printLn(join(coinStr));\n assume p ~ Beta(2.0, 2.0); // prior\n let n = length(coinflips);\n for i in 1 to n {\n flip(coinflips[i], p); // likelihood\n }\n return(p); // posterior\n}\n" - class(model_right) <- "custom" - data_right <- list( - coinflips = c( - FALSE, - TRUE, - TRUE, - TRUE, - FALSE, - TRUE, - TRUE, - FALSE, - FALSE, - TRUE, - FALSE, - FALSE, - FALSE, - FALSE, - FALSE, - FALSE, - FALSE, - TRUE, - TRUE, - TRUE - ) - ) - model_data_right <- list(model = model_right, data = data_right) - - treepplr:::tp_write(model = model_right, data = data_right) - - treepplr:::tp_compile() - - out <- treepplr:::tp_run() - - expect_no_error(readBin(paste0(temp_dir, "tmp_model_file_out.json"), "raw", 10e6)) -}) - -testthat::test_that("Test-runner_2b : tp_run", { - cat("\tTest-runner_2b : tp_run with option \n") - - model <- tp_model("coin") - data <- tp_data("coin") - - treepplr:::tp_write( - model = model, - model_file_name = "coin", - data = data, - data_file_name = "coin" - ) - - treepplr:::tp_compile(model_file_name = "coin", - samples = 1000, - method = "smc-bpf") - - out <- treepplr:::tp_run( - model_file_name = "coin", - data_file_name = "coin", - n_runs = 1 - ) - - expect_no_error(readBin(paste0(temp_dir, "coin_out.json"), "raw", 10e6)) -}) - -testthat::test_that("Test-runner_3a : tp_treppl", { - cat("\tTest-runner_3a : tp_treppl with hostrep model \n") - - model <- tp_model("hostrep3states") - data <- tp_data("hostrep3states") - - out <- tp_treeppl( - model = model, - model_file_name = "hostrep", - data = data, - data_file_name = "hostrep", - samples = 10 - ) - - expect_no_error(tp_parse_host_rep(out, n_runs = 1)) -}) diff --git a/tests/testthat/test-tree_convertion.R b/tests/testthat/test-tree_convertion.R index 795498b..5d23695 100644 --- a/tests/testthat/test-tree_convertion.R +++ b/tests/testthat/test-tree_convertion.R @@ -57,7 +57,7 @@ testthat::test_that("Test-json_2 : tp_phylo_to_tppl_tree", { testthat::expect_equal(ex_tree, res_tree[[2]]) }) -ressources <- readRDS(paste0(system.file("extdata", package = "treepplr"), +resources <- readRDS(paste0(system.file("extdata", package = "treepplr"), treepplr:::sep(), "Alcedinidae_tre.rds")) ref <- jsonlite::fromJSON(paste0(system.file("extdata", package = "treepplr"), diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R deleted file mode 100644 index dfeaa4d..0000000 --- a/tests/testthat/test-utils.R +++ /dev/null @@ -1,77 +0,0 @@ -temp_dir <- treepplr::tp_tempdir(temp_dir = NULL) -setwd(temp_dir) -require(testthat) -require(crayon) - -cat(crayon::yellow("\nTest-utils : Test utilitary function for treepplr.\n")) - -options(warn=-1) -tmp <- list.files(tp_tempdir()) -list_na <- do.call(rbind,lapply(c("exe","tppl","json") , function(x) { - paste0(stringr::str_extract(tmp, paste0(".*(?=\\.", x, ")")),".",x)})) - list <- list_na[!is.na(list_na)] -do.call(file.remove, list(list)) -options(warn=0) - -lis <- - c( - "test.json", - "test_out.json", - "testt_out.json", - "test1.json", - "test1_out.json", - "test.tppl", - "test1.tppl", - "testx.exe", - "testx1.exe" - ) -sapply(lis, function(x) write(".", file = x)) - -testthat::test_that("Test-utils_1a : stored_files", { - cat("\tTest-utils_1a : stored_files with tppl\n") - - res <- treepplr:::stored_files("tppl") - - testthat::expect_equal(res, c("test", "test1")) -}) - -testthat::test_that("Test-utils_1b : stored_files", { - cat("\tTest-utils_1b : stored_files with json\n") - - res <- treepplr:::stored_files("json") - - testthat::expect_equal(res, c("test", "test_out", "test1", "test1_out", - "testt_out")) -}) - -testthat::test_that("Test-utils_1c : stored_files", { - cat("\tTest-utils_1c : stored_files with exe\n") - - res <- treepplr:::stored_files("exe") - - testthat::expect_equal(res, c("testx", "testx1")) -}) - -testthat::test_that("Test-utils_2 : tp_stored_model", { - cat("\tTest-utils_2 : tp_stored_model with tppl\n") - - res <- treepplr::tp_stored_model() - - testthat::expect_equal(res, c("test", "test1")) -}) - -testthat::test_that("Test-utils_3 : tp_stored_data", { - cat("\tTest-utils_3 : tp_stored_data\n") - - res <- treepplr::tp_stored_data() - - testthat::expect_equal(res, c("test", "test1")) -}) - -testthat::test_that("Test-utils_4 : tp_stored_compiled", { - cat("\tTest-utils_4 : tp_stored_compiled\n") - - res <- treepplr::tp_stored_compiled() - - testthat::expect_equal(res, c("testx", "testx1")) -}) diff --git a/vignettes/coin-example.Rmd b/vignettes/coin-example.Rmd index 59a899a..908db1e 100644 --- a/vignettes/coin-example.Rmd +++ b/vignettes/coin-example.Rmd @@ -36,26 +36,21 @@ library(dplyr) library(ggplot2) ``` -## Load model and data files +## Understanding the coin model -Now we can load the coin model script (the probabilistic program describing -the coin flipping model) provided with the *treepplr* package using the following command: +The coin model is one of the most basic models in the TreePPL model library. You +can find all available models and some information about them [here](https://treeppl.org/docs/model-library). -```{r} -model <- tp_model("coin") -``` - -You can load any TreePPL model script into *treepplr* using the same command, -replacing `"coin"` with the file name of your script. +If you want to look at the TreePPL code here in R, you can use the following functions: -To view the coin model script, simply type +```{r, eval = FALSE} +model_path <- tp_model("coin") +``` ```{r, eval=FALSE} -cat(model) +readr::read_file(model_path) ``` -which will print the script. - The main part of the model is defined in a function called `coinModel`: ``` @@ -119,25 +114,46 @@ This completes the TreePPL description of the coin flipping model. All TreePPL model descriptions essentially follow the same format. For a more complete coverage of the TreePPL language, see the language overview in the [TreePPL online documentation](https://treeppl.org/docs/). + +## Model compilation and inference strategy + +TreePPL offers a variety of inference methods. Different methods work best for +different models. Here, we will use sequential Monte Carlo (SMC), specifically +the bootstrap particle filter version (the `method=smc-bpf` option). + +The function `tp_compile()` has many optional arguments that allow you to select among the inference +methods supported by TreePPL, and setting relevant options for each one of them. +For an up-to-date description of available inference strategies supported, +see `tp_compile_options()`. + +Now let's compile the model to en executable that also contains the necessary machinery +to run the chosen inference method. + +```{r, eval = FALSE} +exe_path <- tp_compile(model = "coin", method = "smc-bpf", particles = 1000) +``` + +## Data + Now we can start analyzing the model by inferring the value of `p` given some observed sequence of coin flips. To do this, we need to provide the observations in a suitable format. To load the example data provided in the *treepplr* package, use: -```{r} -data <- tp_data("coin") +```{r, eval=FALSE} +data <- tp_data(data_input = "coin") ``` -We can look at the structure of the input data using +We can look at the structure of the input data using: -```{r} -str(data) +```{r, eval=FALSE} +jsonlite::fromJSON(data) ``` -The data are provided as an R list. -Each element in the list is named using the corresponding argument name expected by the TreePPL model function. -The arguments can be given in any order in the list. -In our case, the model function only takes one argument called `coinflips`, -so the R list only contains one element named `coinflips`. +```` +$coinflips + [1] TRUE TRUE TRUE FALSE TRUE FALSE FALSE TRUE TRUE FALSE FALSE FALSE TRUE FALSE TRUE FALSE FALSE TRUE FALSE FALSE +```` + The TreePPL compiler requires data in JSON format. The conversion from R variables to appropriate JSON code understood by TreePPL @@ -145,25 +161,24 @@ is done automatically by *treepplr* for supported data types. For instance, logical vectors in R are converted to sequences of Booleans (`Bool[]`) in TreePPL. +In R, the data are collected in a list. +Each element in the list is named using the corresponding argument name expected by the TreePPL model function. +The arguments can be given in any order in the list. +In our case, the model function only takes one argument called `coinflips`, +so the R list only contains one element named `coinflips`. + + ## Run TreePPL -Now we can compile and run the TreePPL program, inferring the posterior distribution +Now we can run the TreePPL program, inferring the posterior distribution of `p` conditioned on the input data. -This is done using the *tp_treeppl()* function provided by *treepplr*. -The function has many optional arguments that allow you to select among the inference -methods supported by TreePPL, and setting relevant options for each one of them. -For an up-to-date description of available inference strategies supported, -see the documentation of the `tp_treeppl` function. +This is done using the `tp_run` function. -Here, we will use the default settings for inference methods. -It uses sequential Monte Carlo (SMC), specifically the bootstrap particle filter version -(the `method=smc-bpf` option). -It runs 100 sweeps (100 SMC runs, if you wish), using 100 particles for each sweep -(`n_runs=100, samples=100`). +Let's run 10 sweeps (10 SMC runs, if you wish). ```{r, eval=FALSE} -output_list <- tp_treeppl(model = model, data = data, samples = 100, n_runs = 100) +output_list <- tp_run(compiled_model = exe_path, data = data, n_sweeps = 10) ``` ```{r, echo=FALSE} @@ -185,29 +200,27 @@ and the likelihood weight of each returned value (one weight for each particle). In SMC we can assess the quality of the inference by running several sweeps and comparing their normalizing constants to test if they generated similar estimates of the posterior distribution. A popular argument from the SMC -literature suggests that the SMC estimate is accurate if the variance of the -estimates of the normalizing constant across sweeps is lower than 1. +literature suggests that the SMC estimate is accurate if the variance of the +estimates of the normalizing constant across sweeps is lower than 1. -To check the variance of the normalizing constant, use the *tp_smc_convergence()* function. +To check the variance of the normalizing constant, use the `tp_smc_convergence()` function. +But first, we'll use the `tp_parse_smc()` function to convert the results returned by TreePPL into an R data frame +of appropriately weighted values, taking both the particle weights and normalizing constants +(the sweep weights, if you wish) into account. Note that both the particle weights and +normalizing constants are given in log units. ```{r} -tp_smc_convergence(output_list) +output <- tp_parse_smc(output_list) + +tp_smc_convergence(output) ``` It seems that our run provides a quite accurate estimate of the posterior distribution, as the variance is much smaller than 1.0. It is also easy to plot the sampled values. -The *tp_parse()* function helps us convert the results returned by TreePPL into an R list -of appropriately weighted values, taking both the particle weights and normalizing constants -(the sweep weights, if you wish) into account. Note that both the particle weights and -normalizing constants are given in log units. ```{r, fig.height=5, fig.width=5} -output <- tp_parse(output_list) %>% - dplyr::mutate(total_lweight = log_weight + norm_const) %>% - dplyr::mutate(norm_weight = exp(total_lweight - max(.$total_lweight))) - ggplot2::ggplot(output, ggplot2::aes(samples, weight = norm_weight)) + ggplot2::geom_histogram(ggplot2::aes(y = ggplot2::after_stat(density)), col = "white", fill = "lightblue", binwidth=0.01) + diff --git a/vignettes/constant-rate-birth.Rmd b/vignettes/constant-rate-birth.Rmd deleted file mode 100644 index b4d8a0a..0000000 --- a/vignettes/constant-rate-birth.Rmd +++ /dev/null @@ -1,82 +0,0 @@ ---- -title: "Constant rate birth (CRB) model" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Constant-Rate-Birth} - %\VignetteEncoding{UTF-8} - %\VignetteEngine{knitr::rmarkdown} -editor_options: - chunk_output_type: console ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>", - warning = FALSE, - message = FALSE -) - -options(rmarkdown.html_vignette.check_title = FALSE) -``` - -```{r setup} -library(treepplr) -library(ape) -library(ggplot2) -library(magrittr) -``` - - -## Data - -We will use an example (random) tree that comes with the package. - -```{r, fig.width=6, fig.height=4} -tree <- ape::read.tree(system.file( - "extdata/crb_tree_15_tips.tre", package = "treepplr")) -ape::plot.phylo(tree, cex = 0.5) -axisPhylo() -``` - -We need to convert the tree to a **TreePPL** readable format and read the CRB model. - -```{r} -data <- tp_phylo_to_tpjson(tree, age="tip-to-root") -model <- tp_model(system.file("extdata/crb.tppl", package = "treepplr")) -``` - - -## Run treeppl - -Compile and run the TreePPL program with standard inference settings. - -```{r, eval=FALSE} -output_list <- tp_treeppl(model = model, data = data) -``` - -```{r, echo=FALSE} -output_list <- readRDS("rdata/crb/output_crb.rds") -``` - - -## Plot posterior - -TreePPL outputs the log weight of each sample, so first we need to get the -normalized weights and then we can plot the posterior distribution produced. - -```{r, fig.height=4, fig.width=6} -# turn list into a data frame where each row represents one sample -# and calculate normalized weights from log weights and normalizing constants. -output <- tp_parse(output_list) %>% - dplyr::mutate(total_lweight = log_weight + norm_const) %>% - dplyr::mutate(norm_weight = exp(total_lweight - max(.$total_lweight))) - -ggplot2::ggplot(output, ggplot2::aes(samples, weight = norm_weight)) + - ggplot2::geom_histogram(ggplot2::aes(y = ggplot2::after_stat(density)), - col = "white", fill = "lightblue", binwidth=0.04) + - ggplot2::geom_density() + - ggplot2::theme_bw() - -``` - diff --git a/vignettes/crbd-example.Rmd b/vignettes/crbd-example.Rmd new file mode 100644 index 0000000..5c80c16 --- /dev/null +++ b/vignettes/crbd-example.Rmd @@ -0,0 +1,47 @@ +--- +title: "Constant-rate Birth Death example" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{crbd-example} + %\VignetteEncoding{UTF-8} + %\VignetteEngine{knitr::rmarkdown} +editor_options: + chunk_output_type: console +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + warning = FALSE, + message = FALSE +) + +options(rmarkdown.html_vignette.check_title = FALSE) +``` + +```{r setup} +library(treepplr) +``` + +```{r, eval = FALSE} +exe_path <- tp_compile("crbd", "smc-apf", particles = 5000) +data_path <- tp_data("crbd") + +output_list <- tp_run(exe_path, data_path, n_sweeps = 4) +``` + +```{r, echo = FALSE} +output_list <- readRDS("rdata/crbd/output_crbd.rds") +``` + + +```{r} +output <- tp_parse_smc(output_list) + +tp_smc_convergence(output) +``` + + + + diff --git a/vignettes/hostrep-example.Rmd b/vignettes/hostrep-example.Rmd deleted file mode 100644 index 91f7efe..0000000 --- a/vignettes/hostrep-example.Rmd +++ /dev/null @@ -1,152 +0,0 @@ ---- -title: "Host repertoire model example" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{hostrep-example} - %\VignetteEncoding{UTF-8} - %\VignetteEngine{knitr::rmarkdown} -editor_options: - chunk_output_type: console ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>", - warning = FALSE, - message = FALSE -) -``` - -```{r, message=FALSE} -library(treepplr) -library(ggplot2) -library(magrittr) -library(dplyr) -library(ape) - -if(!require("evolnets", quietly = TRUE)) { - library(devtools) - if (!require("BiocManager", quietly = TRUE)) - install.packages("BiocManager") - library(BiocManager) - BiocManager::install("ggtree") - devtools::install_github("maribraga/evolnets") - library(ggtree) - library(evolnets) -} else { - library(evolnets) -} - -``` - -This is a very simplified workflow in **treepplr** for analysis of host repertoire evolution. The data used here is simulated and the inference is too simple. A real analysis would include many other steps such as testing of different inference methods and check of convergence. - -The purpose of this vignette is to show how to run **TreePPL** with **treepplr**, and how to process the output with **evolnets**. - -## Load model and data files - -Load the 3-state host repertoire model and example data available within **treepplr**. - -```{r} -model <- tp_model("hostrep3states") -data <- tp_data("hostrep3states") -``` - -## Run treeppl - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/vignettes/rdata/coin/output_coin.rds b/vignettes/rdata/coin/output_coin.rds index c59473f..bea0d62 100644 Binary files a/vignettes/rdata/coin/output_coin.rds and b/vignettes/rdata/coin/output_coin.rds differ diff --git a/vignettes/rdata/crb/output_crb.rds b/vignettes/rdata/crb/output_crb.rds deleted file mode 100644 index 8667349..0000000 Binary files a/vignettes/rdata/crb/output_crb.rds and /dev/null differ diff --git a/vignettes/rdata/crbd/output_crbd.rds b/vignettes/rdata/crbd/output_crbd.rds new file mode 100644 index 0000000..fd55dec Binary files /dev/null and b/vignettes/rdata/crbd/output_crbd.rds differ diff --git a/vignettes/rdata/hostrep/output_hostrep3states.rds b/vignettes/rdata/hostrep/output_hostrep3states.rds deleted file mode 100644 index f8c3f65..0000000 Binary files a/vignettes/rdata/hostrep/output_hostrep3states.rds and /dev/null differ diff --git a/vignettes/rdata/tree_inference/output_tree_inference.rds b/vignettes/rdata/tree_inference/output_tree_inference.rds deleted file mode 100644 index e6222b1..0000000 Binary files a/vignettes/rdata/tree_inference/output_tree_inference.rds and /dev/null differ diff --git a/vignettes/tree_inference.Rmd b/vignettes/tree_inference.Rmd deleted file mode 100644 index aa95c00..0000000 --- a/vignettes/tree_inference.Rmd +++ /dev/null @@ -1,90 +0,0 @@ ---- -title: "Tree inference" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{tree_inference} - %\VignetteEncoding{UTF-8} - %\VignetteEngine{knitr::rmarkdown} -editor_options: - chunk_output_type: console ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>", - warning = FALSE, - message = FALSE -) - -options(rmarkdown.html_vignette.check_title = FALSE) -``` - -```{r setup} -library(treepplr) -library(dplyr) -library(ggplot2) -library(ape) -``` - -## Load model and data files - -Load the tree inference model and example data available within `treepplr`. - -```{r} -model <- tp_model("tree_inference") -data <- tp_data("tree_inference") -``` - -The data in this example is a toy dataset ... - -```{r} -str(data) -``` - -## Run TreePPL - -Now we can compile and run the TreePPL program. The function `tp_treeppl()` has -many optional arguments to change the inference method used. Here, we will use - -```{r, eval=FALSE} -output_list <- tp_treeppl(model = model, data = data, method = "smc-apf", - samples = 1000, subsample = 5, resample = "manual", - n_runs = 10) -``` - -```{r, echo=FALSE} -output_list <- readRDS("rdata/tree_inference/output_tree_inference.rds") -``` - -The result is a list of sweeps, each one containing 3 elements: the sampled -parameter values and the weights (in log scale) for each of the 5 particles in -each sweep, and the normalizing constant for the whole sweep. - -```{r} -str(output_list,max.level = 2) -``` - -## Plot the posterior distribution - -In this example we will check that the different runs converged to the same posterior, -by checking the variance in the normalizing constant among runs. - -```{r, fig.height=5, fig.width=5} -tp_smc_convergence(output_list) -``` - -There are different ways to summarize the posterior distribution of trees. In -this examples, we calculate the the MAP (Maximum A Posteriori) tree. We do this -by finding the single tree topology that has the highest posterior probability, -and then using `ape::consensus` to compute average branch lengths for all sampled -trees with the MAP topology. - -```{r, fig.height=4, fig.width=5} -out_trees <- tp_json_to_phylo(output_list) - -map_tree <- tp_map_tree(out_trees) -plot(map_tree) -axisPhylo() -``` - diff --git a/vignettes/treepplr.Rmd b/vignettes/treepplr.Rmd new file mode 100644 index 0000000..7329690 --- /dev/null +++ b/vignettes/treepplr.Rmd @@ -0,0 +1,166 @@ +--- +title: "Quick package overview" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{overview} + %\VignetteEncoding{UTF-8} + %\VignetteEngine{knitr::rmarkdown} +editor_options: + chunk_output_type: console +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + warning = FALSE, + message = FALSE, + eval = FALSE +) + +options(rmarkdown.html_vignette.check_title = FALSE) +``` + +```{r setup, echo=FALSE} +library(treepplr) +``` + +`treepplr` is an interface for using the TreePPL program. All functions start +with `tp_` to easily distinguish them from functions from other packages. + +The three necessary parts for doing analysis with TreePPL are: model, data and +inference machinery. + +## Model + +You can choose a model in TreePPL language from our +[library](https://treeppl.org/docs/model-library) or you can write your own model. + +To list all available models in our library, use `tp_model_library()` to retrieve +the models within the [TreePPL github repository](https://github.com/treeppl/treeppl/tree/main/models). + +```{r} +model_lib <- tp_model_library() +``` + +To use one of these models you just need it's name in `model_lib$model_name`. + +If you want to use your own custom model, you will need to write it in TreePPL +language and pass it to an R object that contains either the full path to the +`.tppl` file containing the model, or a string with the full model. + +```{r} +# import a model from file +model_path <- "path/to/my_model.tppl" +``` + +## Data + + + + + + + + + +TreePPL only reads a custom JSON format, so `treepplr` converts a variety of +input data to this format and writes to file, which will then be used by TreePPL. +Here are some examples: + +```{r} +# for models that only need a phylogenetic tree +phylo <- ape::read.tree(file = "path/to/your/file.tre") +data_path <- tp_data(data_input = phylo) + +# or sequence data +fasta_file <- "path/to/your/file.fasta" +data_path <- tp_data(data_input = fasta_file) +``` + +As for models, you can also use test datasets from the TreePPL library by passing +the name of the model (we'll come back to this later). + +## Inference method + +TreePPL offers a variety of inference methods. Different methods work best for +different models. See the [model library](https://treeppl.org/docs/model-library) +for our recommendations of which inference methods to choose for each model. + + + + + + + + + +## Compilation + +Once you have chosen the model and the inference method you want to use, you can +compile your model to en executable that also contains the necessary machinery +to run the chosen inference method. + +```{r} +# Using a model from the library and a Sequential Monte Carlo method +exe_path <- tp_compile(model = "crbd", method = "smc-apf", particles = 10000) + +# Using a custom model and a Markov chain Monte Carlo method +exe_path <- tp_compile(model = model_path, method = "mcmc-lightweight", + iterations = 10000) +``` + + +## Running + +Now you are ready to run your analysis. All you have to do is to pass your data +to the compiled executable and choose how many independent runs you want to do. + +```{r} +output <- tp_run(compiled_model = exe_path, data = data_path, n_runs = 4) +``` + + +## Convergence + +Then you can parse your output to produce a data frame and check for convergence. + +```{r} +# If using SMC +output_df_smc <- tp_parse_smc(output) + +tp_smc_convergence(output_df_smc) +``` + +```{r} +# If using MCMC +output_df_mcmc <- tp_parse_mcmc(output) +``` + + +## Post-processing + +Different models produce different outputs and thus require different post-processing. +See the model-specific tutorials for ways to process your TreePPL. + + + + + + + + + + + + + + + + + + + + + +