From 465c56c9126e0acdb784a2b982993f71341fcee0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Feb 2026 12:50:11 +0000 Subject: [PATCH 01/11] Initial plan From 271af51132d85c2a7e3f2de313bb1ba462695ce3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Feb 2026 13:42:05 +0000 Subject: [PATCH 02/11] WIP: Add split_frequencies C++ function using ClusterTable approach Co-authored-by: ms609 <1695515+ms609@users.noreply.github.com> --- R/RcppExports.R | 4 ++ R/Support.R | 56 ++++++++++++--- src/RcppExports.cpp | 12 ++++ src/consensus.cpp | 165 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 226 insertions(+), 11 deletions(-) diff --git a/R/RcppExports.R b/R/RcppExports.R index af2c634a..8117f656 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -25,6 +25,10 @@ as_newick <- function(edge) { .Call(`_TreeTools_as_newick`, edge) } +split_frequencies <- function(trees) { + .Call(`_TreeTools_split_frequencies`, trees) +} + consensus_tree <- function(trees, p) { .Call(`_TreeTools_consensus_tree`, trees, p) } diff --git a/R/Support.R b/R/Support.R index 6bcf22f7..8be881c9 100644 --- a/R/Support.R +++ b/R/Support.R @@ -37,18 +37,52 @@ SplitFrequency <- function(reference, forest = NULL) { if (length(unique(lapply(lapply(forest, TipLabels), sort))) > 1) { stop("All trees must bear identical labels") } - forestSplits <- do.call(c, as.Splits(forest, tipLabels = TipLabels(forest[[1]]))) - dup <- duplicated(forestSplits) - ret <- forestSplits[[!dup]] - logicals <- vapply(seq_along(forestSplits), - function(cf) ret %in% forestSplits[[cf]], - logical(sum(!dup))) - count <- if (is.null(dim(logicals))) { - sum(logicals) - } else { - rowSums(logicals) + tipLabels <- TipLabels(forest[[1]]) + forest <- RenumberTips(forest, tipLabels) + forest <- Preorder(forest) + result <- split_frequencies(forest) + splits <- result[["splits"]] + counts <- result[["counts"]] + nTip <- length(tipLabels) + nbin <- ncol(splits) + if (nrow(splits) == 0) { + ret <- structure(splits, + nTip = nTip, + tip.label = tipLabels, + class = "Splits") + attr(ret, "count") <- integer(0) + return(ret) + } + # The ClusterTable outputs clusters (clades); normalize so bit 0 (tip 1) + # is not in the set (matching as.Splits convention) + nTipMod <- nTip %% 8L + lastByteMask <- if (nTipMod == 0L) as.raw(0xff) else as.raw(bitwShiftL(1L, nTipMod) - 1L) + keep <- logical(nrow(splits)) + for (i in seq_len(nrow(splits))) { + val <- splits[i, ] + # Count bits set (to filter trivial splits) + nBits <- sum(vapply(as.integer(val), function(b) sum(as.integer(intToBits(b))), integer(1))) + if (nBits < 2L || nBits > nTip - 2L) next # trivial split + # Normalize: if bit 0 is NOT set, complement to match as.Splits format + if (!as.logical(as.integer(val[1]) %% 2L)) { + for (j in seq_along(val)) { + splits[i, j] <- as.raw(bitwXor(as.integer(val[j]), 0xffL)) + } + # Mask last byte + if (nTipMod > 0L) { + splits[i, nbin] <- as.raw(bitwAnd(as.integer(splits[i, nbin]), + as.integer(lastByteMask))) + } + } + keep[i] <- TRUE } - attr(ret, "count") <- unname(count) + splits <- splits[keep, , drop = FALSE] + counts <- counts[keep] + ret <- structure(splits, + nTip = nTip, + tip.label = tipLabels, + class = "Splits") + attr(ret, "count") <- counts ret } else { referenceSplits <- as.Splits(reference) diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 72e00596..ccf10bbe 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -85,6 +85,17 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// split_frequencies +List split_frequencies(const List trees); +RcppExport SEXP _TreeTools_split_frequencies(SEXP treesSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const List >::type trees(treesSEXP); + rcpp_result_gen = Rcpp::wrap(split_frequencies(trees)); + return rcpp_result_gen; +END_RCPP +} // consensus_tree RawMatrix consensus_tree(const List trees, const NumericVector p); RcppExport SEXP _TreeTools_consensus_tree(SEXP treesSEXP, SEXP pSEXP) { @@ -478,6 +489,7 @@ static const R_CallMethodDef CallEntries[] = { {"_TreeTools_ape_neworder_phylo", (DL_FUNC) &_TreeTools_ape_neworder_phylo, 5}, {"_TreeTools_ape_neworder_pruningwise", (DL_FUNC) &_TreeTools_ape_neworder_pruningwise, 5}, {"_TreeTools_as_newick", (DL_FUNC) &_TreeTools_as_newick, 1}, + {"_TreeTools_split_frequencies", (DL_FUNC) &_TreeTools_split_frequencies, 1}, {"_TreeTools_consensus_tree", (DL_FUNC) &_TreeTools_consensus_tree, 2}, {"_TreeTools_descendant_edges", (DL_FUNC) &_TreeTools_descendant_edges, 3}, {"_TreeTools_descendant_edges_single", (DL_FUNC) &_TreeTools_descendant_edges_single, 5}, diff --git a/src/consensus.cpp b/src/consensus.cpp index e3502992..93a3b02b 100644 --- a/src/consensus.cpp +++ b/src/consensus.cpp @@ -6,6 +6,7 @@ using namespace Rcpp; #include /* for fill */ #include /* for array */ +#include /* for map */ using TreeTools::ct_stack_threshold; using TreeTools::ct_max_leaves_heap; @@ -142,6 +143,170 @@ RawMatrix calc_consensus_tree( (splits_found < ntip_3) ? ret(Range(0, splits_found - 1), _) : ret; } +// Helper template function to compute split frequencies for all splits +// Like calc_consensus_tree but without threshold or early exit +template +List calc_split_frequencies( + const List& trees, + StackContainer& S +) { + int32 v = 0; + int32 w = 0; + int32 L, R, N, W; + + const int32 n_trees = trees.length(); + + std::vector tables; + tables.reserve(n_trees); + for (int32 i = 0; i < n_trees; ++i) { + tables.emplace_back(TreeTools::ClusterTable(Rcpp::List(trees(i)))); + } + + const int32 n_tip = tables[0].N(); + const int32 ntip_3 = n_tip - 3; + const int32 nbin = (n_tip + 7) / 8; // bytes per row in packed output + + int32* split_count; + std::array split_stack; + std::vector split_heap; + if (n_tip <= ct_stack_threshold) { + split_count = split_stack.data(); + } else { + split_heap.resize(n_tip); + split_count = split_heap.data(); + } + + StackEntry *const S_start = S.data(); + + // Use a map to store unique splits and their counts + // Key: split bit pattern; Value: index in output + std::map, int32> split_map; + std::vector> split_patterns; + std::vector counts; + + for (int32 i = 0; i < n_trees; ++i) { + if (tables[i].NOSWX(ntip_3)) { + continue; + } + + std::fill(split_count, split_count + n_tip, 1); + + for (int32 j = i + 1; j < n_trees; ++j) { + ASSERT(tables[i].N() == tables[j].N()); + + tables[i].CLEAR(); + + tables[j].TRESET(); + tables[j].READT(&v, &w); + + int32 j_pos = 0; + StackEntry* S_top = S_start; // Empty the stack S + + do { + if (CT_IS_LEAF(v)) { + const auto enc_v = tables[i].ENCODE(v); + *S_top++ = {enc_v, enc_v, 1, 1}; + } else { + const StackEntry& entry = *--S_top; + L = entry.L; R = entry.R; N = entry.N; + W = 1 + entry.W; + w -= entry.W; + while (w) { + const StackEntry& next = *--S_top; + L = std::min(L, next.L); + R = std::max(R, next.R); + N += next.N; + W += next.W; + w -= next.W; + } + + *S_top++ = {L, R, N, W}; + + ++j_pos; + + if (!tables[j].GETSWX(&j_pos)) { + if (N == R - L + 1) { + if (tables[i].CLUSTONL(L, R)) { + tables[j].SETSWX(j_pos); + ASSERT(L > 0); + ++split_count[L - 1]; + } else if (tables[i].CLUSTONR(L, R)) { + tables[j].SETSWX(j_pos); + ASSERT(R > 0); + ++split_count[R - 1]; + } + } + } + } + tables[j].NVERTEX_short(&v, &w); + } while (v); + } + + for (int32 k = 0; k < n_tip; ++k) { + const int32 start = tables[i].X_left(k + 1); + const int32 end = tables[i].X_right(k + 1); + if (start == 0 && end == 0) continue; // No valid cluster at this position + + // Build the bit pattern for this split + std::vector pattern(nbin, 0); + for (int32 j = start; j <= end; ++j) { + const int32 leaf_idx = tables[i].DECODE(j) - 1; // 0-based + const int32 byte_idx = leaf_idx >> 3; + const int32 bit_idx = leaf_idx & 7; + pattern[byte_idx] |= (Rbyte(1) << bit_idx); + } + + auto it = split_map.find(pattern); + if (it == split_map.end()) { + // New split: record it with count from this reference tree + const int32 idx = split_patterns.size(); + split_map[pattern] = idx; + split_patterns.push_back(std::move(pattern)); + counts.push_back(split_count[k]); + } + // If already found, the first reference tree that found it has the + // correct total count (it compared against all later trees). + } + } + + const int32 splits_found = split_patterns.size(); + RawMatrix ret(splits_found, nbin); + + for (int32 r = 0; r < splits_found; ++r) { + for (int32 c = 0; c < nbin; ++c) { + ret(r, c) = split_patterns[r][c]; + } + } + + IntegerVector count_vec(counts.begin(), counts.end()); + + return List::create( + Named("splits") = ret, + Named("counts") = count_vec + ); +} + +// [[Rcpp::export]] +List split_frequencies(const List trees) { + try { + TreeTools::ClusterTable temp_table(Rcpp::List(trees(0))); + const int32 n_tip = temp_table.N(); + + if (n_tip <= ct_stack_threshold) { + std::array S; + return calc_split_frequencies(trees, S); + } else { + std::vector S(n_tip); + return calc_split_frequencies(trees, S); + } + } catch(const std::exception& e) { + Rcpp::stop(e.what()); + } + + ASSERT(false && "Unreachable code in split_frequencies"); + return List(); +} + // trees is a list of objects of class phylo, all with the same tip labels // (try RenumberTips(trees, trees[[1]])) // Per #168, unexpected behaviour if root position differs in non-preorder trees From d080ac6b41d106872e940105a7e61f2bc557ed0a Mon Sep 17 00:00:00 2001 From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com> Date: Mon, 16 Feb 2026 15:22:29 +0000 Subject: [PATCH 03/11] Support `counts` parameter --- NAMESPACE | 2 ++ NEWS.md | 3 ++- R/Splits.R | 47 +++++++++++++++++++++++++++++++---- tests/testthat/test-Support.R | 15 +++++++++-- 4 files changed, 59 insertions(+), 8 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 40000661..3372bf5d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -254,12 +254,14 @@ S3method(print,ClusterTable) S3method(print,Splits) S3method(print,TreeNumber) S3method(rev,Splits) +S3method(sort,Splits) S3method(sort,multiPhylo) S3method(summary,ClusterTable) S3method(summary,Splits) S3method(t,Splits) S3method(tail,Splits) S3method(unique,Splits) +S3method(xtfrm,Splits) export(.CompatibleRaws) export(.CompatibleSplit) export(.RandomParent) diff --git a/NEWS.md b/NEWS.md index 28642bf7..803a0296 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # TreeTools 2.1.0.9000 (2026-02-16) # -- Add Splits and phylo methods for `SplitInformation()`. +- `SplitInformation()` supports `Splits` and `phylo` objects. +- `sort` and `order` support `Splits` objects. - `SplitFrequency(reference = NULL)` returns frequency of all splits. diff --git a/R/Splits.R b/R/Splits.R index 98c95e2b..8c10d09c 100644 --- a/R/Splits.R +++ b/R/Splits.R @@ -315,6 +315,7 @@ as.logical.Splits <- function(x, tipLabels = attr(x, "tip.label"), ...) { print.Splits <- function(x, details = FALSE, ...) { nTip <- attr(x, "nTip") tipLabels <- attr(x, "tip.label") + count <- attr(x, "count") trivial <- TrivialSplits(x) cat(dim(x)[1], "bipartition", ifelse(dim(x)[1] == 1, "split", "splits"), if(any(trivial)) paste0("(", sum(trivial), " trivial)"), @@ -324,9 +325,9 @@ print.Splits <- function(x, details = FALSE, ...) { } else { if (nTip) { if (nTip == 1) { - paste("tip,", tipLabels[1]) + paste("tip,", tipLabels[[1]]) } else { - paste("tips,", tipLabels[1], "..", tipLabels[nTip]) + paste("tips,", tipLabels[[1]], "..", tipLabels[[nTip]]) } } else { "tips" @@ -347,16 +348,52 @@ print.Splits <- function(x, details = FALSE, ...) { } cat("\n ", paste0(rep.int(" ", max(nameLengths)), collapse = ""), paste0(rep_len(c(1:9, " "), nTip), collapse = "")) - - for (i in seq_len(dim(x)[1])) { + + nSplits <- dim(x)[[1]] + splitCounts <- if (!is.null(count) && length(count) == nSplits) { + paste0("\UD7 ", count) + } else { + if (length(count) != nSplits) { + warning("\"count\" attribute does not match number of splits") + } + character() + } + + for (i in seq_len(nSplits)) { split <- x[i, , drop = FALSE] cat("\n", splitNames[i], "", paste(ifelse(as.logical(rawToBits(split)[seq_len(nTip)]), "*", "."), - collapse = "")) + collapse = ""), + splitCounts[i]) } } } +#' @family Splits operations +#' @export +sort.Splits <- function(x, decreasing = TRUE, ...) { + newOrder <- order(x, decreasing = decreasing, ...) + count <- attr(x, "count") + if (is.null(count)) { + x[[newOrder]] + } else { + structure(x[[newOrder]], count = count[newOrder]) + } +} + +# Underpins `order` +#' @family Splits operations +#' @export +xtfrm.Splits <- function(x) { + count <- attr(x, "count") + newOrder <- xtfrm(as.integer(x)) + if (is.null(count)) { + newOrder + } else { + order(count, newOrder) + } +} + #' @family Splits operations #' @importFrom utils head #' @export diff --git a/tests/testthat/test-Support.R b/tests/testthat/test-Support.R index 8ad2cbb2..60e012c5 100644 --- a/tests/testthat/test-Support.R +++ b/tests/testthat/test-Support.R @@ -14,8 +14,19 @@ test_that("Node supports calculated correctly", { expect_error(SplitFrequency(NULL, treeSample), "must bear identical") sameTips <- KeepTip(treeSample, TipLabels(treeSample$correct)) sameSplits <- do.call(c, as.Splits(sameTips)) - expect_equal(SplitFrequency(sameTips), - structure(sameSplits[[!duplicated(sameSplits)]], + + expect_split_counts <- function(object, expected) { + counted_splits <- function(x) { + count <- attr(x, "count") + newOrder <- order(order(x), count, decreasing = TRUE) + structure(x[[newOrder]], count = count[newOrder]) + } + expect_equal(counted_splits(object), counted_splits(expected)) + } + + expect_split_counts( + SplitFrequency(sameTips), + structure(PolarizeSplits(sameSplits[[!duplicated(sameSplits)]], 1), count = c(4, 4, 4, 3, 1, 1, 1, 1, 1)) ) From 67d9f07d07702a9162feada95b5db2c4179cbe8b Mon Sep 17 00:00:00 2001 From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com> Date: Mon, 16 Feb 2026 15:37:08 +0000 Subject: [PATCH 04/11] Sort for testing --- R/Splits.R | 14 +++++++------- tests/testthat/test-Support.R | 22 ++++++++-------------- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/R/Splits.R b/R/Splits.R index 8c10d09c..507fa41e 100644 --- a/R/Splits.R +++ b/R/Splits.R @@ -350,13 +350,13 @@ print.Splits <- function(x, details = FALSE, ...) { paste0(rep_len(c(1:9, " "), nTip), collapse = "")) nSplits <- dim(x)[[1]] - splitCounts <- if (!is.null(count) && length(count) == nSplits) { - paste0("\UD7 ", count) - } else { + splitCounts <- if (!is.null(count)) { if (length(count) != nSplits) { warning("\"count\" attribute does not match number of splits") } - character() + paste0("\UD7 ", count) + } else { + rep("", nSplits) } for (i in seq_len(nSplits)) { @@ -386,11 +386,11 @@ sort.Splits <- function(x, decreasing = TRUE, ...) { #' @export xtfrm.Splits <- function(x) { count <- attr(x, "count") - newOrder <- xtfrm(as.integer(x)) + splitRanking <- as.integer(x) if (is.null(count)) { - newOrder + splitRanking } else { - order(count, newOrder) + count + (splitRanking / max(splitRanking)) } } diff --git a/tests/testthat/test-Support.R b/tests/testthat/test-Support.R index 60e012c5..e956e969 100644 --- a/tests/testthat/test-Support.R +++ b/tests/testthat/test-Support.R @@ -14,25 +14,19 @@ test_that("Node supports calculated correctly", { expect_error(SplitFrequency(NULL, treeSample), "must bear identical") sameTips <- KeepTip(treeSample, TipLabels(treeSample$correct)) sameSplits <- do.call(c, as.Splits(sameTips)) - - expect_split_counts <- function(object, expected) { - counted_splits <- function(x) { - count <- attr(x, "count") - newOrder <- order(order(x), count, decreasing = TRUE) - structure(x[[newOrder]], count = count[newOrder]) - } - expect_equal(counted_splits(object), counted_splits(expected)) - } - expect_split_counts( - SplitFrequency(sameTips), - structure(PolarizeSplits(sameSplits[[!duplicated(sameSplits)]], 1), - count = c(4, 4, 4, 3, 1, 1, 1, 1, 1)) + expect_equal( + SplitFrequency(sameTips) |> + PolarizeSplits(1) |> sort() |> unname(), + structure(sameSplits[[!duplicated(sameSplits)]], + count = c(4, 4, 4, 3, 1, 1, 1, 1, 1)) |> + PolarizeSplits(1) |> sort() |> unname() ) + monoSplit <- ape::read.tree(text = "((a, b, c, d), (e, f, g));") expect_equal(SplitFrequency(list(monoSplit)), - structure(as.Splits(monoSplit), count = 1)) + structure(unname(as.Splits(monoSplit)), count = 1)) # Internal nodes on each side of root balanced <- ape::read.tree(text="((D, (E, (F, out))), (C, (A, B)));") From 749a7ae25c320e07cb23d595547e96a387075020 Mon Sep 17 00:00:00 2001 From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com> Date: Mon, 16 Feb 2026 15:41:24 +0000 Subject: [PATCH 05/11] Print split counts --- tests/testthat/test-Splits.R | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-Splits.R b/tests/testthat/test-Splits.R index 05f8d99e..1386f199 100644 --- a/tests/testthat/test-Splits.R +++ b/tests/testthat/test-Splits.R @@ -12,12 +12,12 @@ test_that("as.Splits()", { "\n")[[1]], c("1 bipartition split dividing 4 tips, t1 .. t4", " 1234", - " ..**", "", + " ..** ", "", " Tip 1: t1\t Tip 2: t2\t Tip 3: t3\t Tip 4: t4\t")) logical80 <- c(rep(TRUE, 40), rep(FALSE, 16), rep(TRUE, 24)) expect_equal(strsplit(capture_output(print( as.Splits(logical80), detail = TRUE)), "\n")[[1]][3], - paste0(c(" ", ifelse(logical80, "*", ".")), collapse = "") + paste0(c(" ", ifelse(logical80, "*", "."), " "), collapse = "") ) expect_equal(as.logical(as.logical(as.Splits(logical80))), logical80) expect_equal(as.logical(as.Splits(c(A, A, B, B))), @@ -416,10 +416,15 @@ test_that("print.Splits()", { expect_equal( capture.output(print(PolarizeSplits(sp4, 1), details = TRUE)), c( "1 bipartition split dividing 4 tips, t1 .. t4", " 1234", - paste0(" ", num, " **.."))) + paste0(" ", num, " **.. "))) expect_equal(capture.output(print(PolarizeSplits(sp4, 4), details = TRUE)), c( "1 bipartition split dividing 4 tips, t1 .. t4", " 1234", - paste0(" ", num, " ..**"))) + paste0(" ", num, " ..** "))) + expect_equal( + capture.output(print(structure(PolarizeSplits(sp4, 4), count = 2), + details = TRUE)), + c( "1 bipartition split dividing 4 tips, t1 .. t4", " 1234", + paste0(" ", num, " ..** \UD7 2"))) }) test_that("head,tail.Splits()", { From 9714365e221f1a85fa625870adbc47ee63e20a00 Mon Sep 17 00:00:00 2001 From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com> Date: Mon, 16 Feb 2026 16:01:58 +0000 Subject: [PATCH 06/11] printing small split sets --- R/Splits.R | 46 ++++++++++++++++++++---------------- tests/testthat/test-Splits.R | 10 ++++++++ 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/R/Splits.R b/R/Splits.R index 507fa41e..09b56e61 100644 --- a/R/Splits.R +++ b/R/Splits.R @@ -346,25 +346,28 @@ print.Splits <- function(x, details = FALSE, ...) { splitNames <- character(length(x)) nameLengths = 0L } - cat("\n ", paste0(rep.int(" ", max(nameLengths)), collapse = ""), + if (any(splitNames)) { + cat("\n ", paste0(rep.int(" ", max(nameLengths)), collapse = ""), paste0(rep_len(c(1:9, " "), nTip), collapse = "")) - - nSplits <- dim(x)[[1]] - splitCounts <- if (!is.null(count)) { - if (length(count) != nSplits) { - warning("\"count\" attribute does not match number of splits") + + + nSplits <- dim(x)[[1]] + splitCounts <- if (!is.null(count)) { + if (length(count) != nSplits) { + warning("\"count\" attribute does not match number of splits") + } + paste0("\UD7 ", count) + } else { + rep("", nSplits) + } + + for (i in seq_len(nSplits)) { + split <- x[i, , drop = FALSE] + cat("\n", splitNames[i], "", + paste(ifelse(as.logical(rawToBits(split)[seq_len(nTip)]), "*", "."), + collapse = ""), + splitCounts[i]) } - paste0("\UD7 ", count) - } else { - rep("", nSplits) - } - - for (i in seq_len(nSplits)) { - split <- x[i, , drop = FALSE] - cat("\n", splitNames[i], "", - paste(ifelse(as.logical(rawToBits(split)[seq_len(nTip)]), "*", "."), - collapse = ""), - splitCounts[i]) } } } @@ -427,11 +430,14 @@ tail.Splits <- function(x, n = 6L, ...) { summary.Splits <- function(object, ...) { print(object, details = TRUE, ...) nTip <- attr(object, "nTip") - if (is.null(attr(object, "tip.label"))) { + tipLabels <- attr(object, "tip.label") + if (is.null(tipLabels)) { cat("\n\nTips not labelled.") } else { - cat("\n\n", paste0("Tip ", seq_len(nTip), ": ", attr(object, "tip.label"), - "\t", c(character(4L), "\n")[seq_len(min(nTip, 5L))])) + if (length(tipLabels) > 0) { + cat("\n\n", paste0("Tip ", seq_len(nTip), ": ", tipLabels, + "\t", c(character(4L), "\n")[seq_len(min(nTip, 5L))])) + } } } diff --git a/tests/testthat/test-Splits.R b/tests/testthat/test-Splits.R index 1386f199..fac3b969 100644 --- a/tests/testthat/test-Splits.R +++ b/tests/testthat/test-Splits.R @@ -425,6 +425,16 @@ test_that("print.Splits()", { details = TRUE)), c( "1 bipartition split dividing 4 tips, t1 .. t4", " 1234", paste0(" ", num, " ..** \UD7 2"))) + + expect_equal( + capture.output(summary(as.Splits(SingleTaxonTree()))), + c("0 bipartition splits dividing 1 tip, t1", "", " Tip 1: t1\t") + ) + + expect_equal( + capture.output(summary(as.Splits(ZeroTaxonTree()))), + c("0 bipartition splits dividing 0 tips") + ) }) test_that("head,tail.Splits()", { From ede0e832794c7465231a84ce34e2b1fa3cd021c5 Mon Sep 17 00:00:00 2001 From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com> Date: Mon, 16 Feb 2026 16:10:09 +0000 Subject: [PATCH 07/11] Checks --- R/Splits.R | 5 +++-- man/PolarizeSplits.Rd | 2 +- tests/testthat/test-Splits.R | 15 +++++++++++++++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/R/Splits.R b/R/Splits.R index 09b56e61..bd57e1ff 100644 --- a/R/Splits.R +++ b/R/Splits.R @@ -346,7 +346,7 @@ print.Splits <- function(x, details = FALSE, ...) { splitNames <- character(length(x)) nameLengths = 0L } - if (any(splitNames)) { + if (length(splitNames) > 0) { cat("\n ", paste0(rep.int(" ", max(nameLengths)), collapse = ""), paste0(rep_len(c(1:9, " "), nTip), collapse = "")) @@ -627,7 +627,7 @@ rev.Splits <- function(x) { #' Polarize splits on a single taxon #' -#' @param x Object of class [`Splits`]. +#' @param x Object that can be coerced into class [`Splits`]. #' @param pole Numeric, character or logical vector identifying tip that will #' polarize each split. #' @@ -636,6 +636,7 @@ rev.Splits <- function(x) { #' @family Splits operations #' @export PolarizeSplits <- function(x, pole = 1L) { + x <- as.Splits(x) nTip <- attr(x, "nTip") if (is.logical(pole)) { pole <- which(pole)[[1]] diff --git a/man/PolarizeSplits.Rd b/man/PolarizeSplits.Rd index 0505ee4a..011fa370 100644 --- a/man/PolarizeSplits.Rd +++ b/man/PolarizeSplits.Rd @@ -7,7 +7,7 @@ PolarizeSplits(x, pole = 1L) } \arguments{ -\item{x}{Object of class \code{\link{Splits}}.} +\item{x}{Object that can be coerced into class \code{\link{Splits}}.} \item{pole}{Numeric, character or logical vector identifying tip that will polarize each split.} diff --git a/tests/testthat/test-Splits.R b/tests/testthat/test-Splits.R index fac3b969..aaa20792 100644 --- a/tests/testthat/test-Splits.R +++ b/tests/testthat/test-Splits.R @@ -377,6 +377,13 @@ test_that("match.Splits()", { expect_equal(c(4, 3, 999, 2, 1), match(as.Splits(tree1, tree2), col2, nomatch = 999)) expect_equal(c(5, 4, 2, 1), match(col2, as.Splits(tree1, tree2))) + + expect_equal(match(sort(as.Splits(tree1)), + sort(as.Splits(tree2))), 5:1) + expect_equal(match(sort(as.Splits(tree1)), + sort(as.Splits(tree2, tree1))), 5:1) + expect_equal(match(sort(PolarizeSplits(as.Splits(tree1), 1)), + sort(PolarizeSplits(as.Splits(tree2, tree1), 1))), 1:5) }) test_that("duplicated.Splits(internal)", { @@ -426,6 +433,14 @@ test_that("print.Splits()", { c( "1 bipartition split dividing 4 tips, t1 .. t4", " 1234", paste0(" ", num, " ..** \UD7 2"))) + expect_warning(expect_equal( + capture.output(print( + structure(PolarizeSplits(BalancedTree(5), 5), count = 2), + details = TRUE)), + c( "2 bipartition splits dividing 5 tips, t1 .. t5", " 12345", + " 7 ...** \UD7 2", + " 8 ..*** NA"))) + expect_equal( capture.output(summary(as.Splits(SingleTaxonTree()))), c("0 bipartition splits dividing 1 tip, t1", "", " Tip 1: t1\t") From c4f1eedc983d8e3376124c246f85a3c2ee5d34f2 Mon Sep 17 00:00:00 2001 From: "Martin R. Smith" <1695515+ms609@users.noreply.github.com> Date: Mon, 16 Feb 2026 16:16:09 +0000 Subject: [PATCH 08/11] Catch zero-length forest --- R/Support.R | 11 +++++------ tests/testthat/test-Support.R | 4 ++++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/R/Support.R b/R/Support.R index 8be881c9..848141ba 100644 --- a/R/Support.R +++ b/R/Support.R @@ -37,6 +37,9 @@ SplitFrequency <- function(reference, forest = NULL) { if (length(unique(lapply(lapply(forest, TipLabels), sort))) > 1) { stop("All trees must bear identical labels") } + if (length(forest) == 0) { + return(structure(forest, count = integer())) + } tipLabels <- TipLabels(forest[[1]]) forest <- RenumberTips(forest, tipLabels) forest <- Preorder(forest) @@ -46,12 +49,8 @@ SplitFrequency <- function(reference, forest = NULL) { nTip <- length(tipLabels) nbin <- ncol(splits) if (nrow(splits) == 0) { - ret <- structure(splits, - nTip = nTip, - tip.label = tipLabels, - class = "Splits") - attr(ret, "count") <- integer(0) - return(ret) + return(structure(splits, nTip = nTip, tip.label = tipLabels, + count = integer(), class = "Splits")) } # The ClusterTable outputs clusters (clades); normalize so bit 0 (tip 1) # is not in the set (matching as.Splits convention) diff --git a/tests/testthat/test-Support.R b/tests/testthat/test-Support.R index e956e969..b8a60847 100644 --- a/tests/testthat/test-Support.R +++ b/tests/testthat/test-Support.R @@ -28,6 +28,10 @@ test_that("Node supports calculated correctly", { expect_equal(SplitFrequency(list(monoSplit)), structure(unname(as.Splits(monoSplit)), count = 1)) + + expect_equal(SplitFrequency(as.Splits(monoSplit)[[FALSE]]), + structure(as.Splits(monoSplit)[[FALSE]], count = integer())) + # Internal nodes on each side of root balanced <- ape::read.tree(text="((D, (E, (F, out))), (C, (A, B)));") freq <- SplitFrequency(balanced, treeSample) From 6ed9e9e36a0e4f510431bf7a708ad1bf79433c4c Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Tue, 17 Feb 2026 07:49:30 +0000 Subject: [PATCH 09/11] r<4.1 <| --- tests/testthat/test-Support.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-Support.R b/tests/testthat/test-Support.R index b8a60847..5eeb5342 100644 --- a/tests/testthat/test-Support.R +++ b/tests/testthat/test-Support.R @@ -16,11 +16,13 @@ test_that("Node supports calculated correctly", { sameSplits <- do.call(c, as.Splits(sameTips)) expect_equal( - SplitFrequency(sameTips) |> - PolarizeSplits(1) |> sort() |> unname(), + unname(sort(PolarizeSplits(pole = 1, + SplitFrequency(sameTips) + ))), + unname(sort(PolarizeSplits(pole = 1, structure(sameSplits[[!duplicated(sameSplits)]], - count = c(4, 4, 4, 3, 1, 1, 1, 1, 1)) |> - PolarizeSplits(1) |> sort() |> unname() + count = c(4, 4, 4, 3, 1, 1, 1, 1, 1)) + ))) ) From d9a52316709c13dd7fec353946b5425a0fa6108f Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Tue, 17 Feb 2026 08:02:03 +0000 Subject: [PATCH 10/11] Coverage --- R/Support.R | 12 +++++++++--- tests/testthat/test-Support.R | 7 ++++++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/R/Support.R b/R/Support.R index 848141ba..b105f416 100644 --- a/R/Support.R +++ b/R/Support.R @@ -34,6 +34,7 @@ SplitFrequency <- function(reference, forest = NULL) { if (is.null(reference) || is.null(forest)) { if (is.null(forest)) forest <- reference + if (inherits(forest, "phylo")) forest <- list(forest) if (length(unique(lapply(lapply(forest, TipLabels), sort))) > 1) { stop("All trees must bear identical labels") } @@ -41,6 +42,11 @@ SplitFrequency <- function(reference, forest = NULL) { return(structure(forest, count = integer())) } tipLabels <- TipLabels(forest[[1]]) + if (length(tipLabels) < 4) { + return(structure(matrix(raw()), nTip = length(tipLabels), + tip.label = tipLabels, count = integer(), + class = "Splits")) + } forest <- RenumberTips(forest, tipLabels) forest <- Preorder(forest) result <- split_frequencies(forest) @@ -48,9 +54,9 @@ SplitFrequency <- function(reference, forest = NULL) { counts <- result[["counts"]] nTip <- length(tipLabels) nbin <- ncol(splits) - if (nrow(splits) == 0) { - return(structure(splits, nTip = nTip, tip.label = tipLabels, - count = integer(), class = "Splits")) + if (nrow(splits) == 0) { # Not been able to hit these lines - included just in case + return(structure(splits, nTip = nTip, tip.label = tipLabels, # nocov + count = integer(), class = "Splits")) # nocov } # The ClusterTable outputs clusters (clades); normalize so bit 0 (tip 1) # is not in the set (matching as.Splits convention) diff --git a/tests/testthat/test-Support.R b/tests/testthat/test-Support.R index 5eeb5342..8d7812a2 100644 --- a/tests/testthat/test-Support.R +++ b/tests/testthat/test-Support.R @@ -27,13 +27,18 @@ test_that("Node supports calculated correctly", { monoSplit <- ape::read.tree(text = "((a, b, c, d), (e, f, g));") - expect_equal(SplitFrequency(list(monoSplit)), + expect_equal(SplitFrequency(monoSplit), structure(unname(as.Splits(monoSplit)), count = 1)) + expect_equal(SplitFrequency(SingleTaxonTree()), + structure(as.Splits(SingleTaxonTree()), count = integer())) expect_equal(SplitFrequency(as.Splits(monoSplit)[[FALSE]]), structure(as.Splits(monoSplit)[[FALSE]], count = integer())) + expect_equal(SplitFrequency(StarTree(67)), + structure(as.Splits(StarTree(67)), count = integer())) + # Internal nodes on each side of root balanced <- ape::read.tree(text="((D, (E, (F, out))), (C, (A, B)));") freq <- SplitFrequency(balanced, treeSample) From 2174acf6288edf92caad20f203e20fd0c7bd6e54 Mon Sep 17 00:00:00 2001 From: RevBayes analysis <1695515+ms609@users.noreply.github.com> Date: Tue, 17 Feb 2026 08:06:44 +0000 Subject: [PATCH 11/11] Test heap --- tests/testthat/test-Support.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/testthat/test-Support.R b/tests/testthat/test-Support.R index 8d7812a2..7b249527 100644 --- a/tests/testthat/test-Support.R +++ b/tests/testthat/test-Support.R @@ -45,6 +45,9 @@ test_that("Node supports calculated correctly", { expect_equal(freq, c("9" = 4, "10" = 4, "11" = 4, "12" = 4, "13" = 3)[names(freq)]) + skip_if(!isTRUE(options("runSlowTests"))) + expect_equal(SplitFrequency(c(PectinateTree(8200), PectinateTree(8200))), + structure(as.Splits(PectinateTree(8200)), count = rep(2, 8197))) }) test_that("Node support colours consistent", {