From b6e49744b9c27034dc54bb892be5a74742c08989 Mon Sep 17 00:00:00 2001
From: Zhian Kamvar <kamvarz@science.oregonstate.edu>
Date: Sat, 16 Jul 2016 16:38:53 -0700
Subject: [PATCH 01/16] Create internal function for weight/index switch

---
 R/epi_self_match.R  | 28 +++++++++++------------
 R/internal.R        | 49 ++++++++++++++++++++++++++++++++++++++++
 man/getIndexList.Rd | 54 +++++++++++++++++++++++++++++++++++++++++++++
 man/matchEpiData.Rd | 25 +++++++++++----------
 4 files changed, 130 insertions(+), 26 deletions(-)
 create mode 100644 man/getIndexList.Rd

diff --git a/R/epi_self_match.R b/R/epi_self_match.R
index f3b57c4..6fb57be 100644
--- a/R/epi_self_match.R
+++ b/R/epi_self_match.R
@@ -25,22 +25,23 @@
 #' case <- x[["CaseInformationForm.csv"]]
 #' lab <- x[["LaboratoryResultsForm7.csv"]]
 #'
+#' funlist <- list(
+#'              list(d1vars = "ID",
+#'                   d2vars = "ID",
+#'                   fun = "nameDists",
+#'                   extraparams = NULL,
+#'                   weight = 1),
+#'              list(d1vars = c("Surname", "OtherNames"),
+#'                   d2vars = c("SurnameLab", "OtherNameLab"),
+#'                   fun = "nameDists",
+#'                   extraparams = NULL,
+#'                   weight = 0.5)
+#'            )
 #' # This will get all of the indices that match the ID and Names with a
 #' # threshold of 0.25
 #' res <- matchEpiData(dat1 = case,
 #'                     dat2 = lab,
-#'                     funlist = list(
-#'                     list(d1vars = "ID",
-#'                          d2vars = "ID",
-#'                          fun = "nameDists",
-#'                          extraparams = NULL,
-#'                          weight = 1),
-#'                     list(d1vars = c("Surname", "OtherNames"),
-#'                          d2vars = c("SurnameLab", "OtherNameLab"),
-#'                          fun = "nameDists",
-#'                          extraparams = NULL,
-#'                          weight = 0.5)
-#'                     ),
+#'                     funlist = funlist,
 #'                     thresh = 0.25)
 #' # List of indices
 #' res
@@ -59,8 +60,7 @@ matchEpiData <- function(dat1, dat2 = NULL, funlist = list(), thresh = 0.05, giv
   # For processing in the shiny app, it's useful to display either the weights
   # as a named index vector or the indices themselves
   if (!giveWeight){
-    names(out) <- NULL
-    out <- lapply(out, lapply, function(i) as.integer(names(i)))
+    out <- getIndexList(out)
   }
   return(out)
 }
diff --git a/R/internal.R b/R/internal.R
index 89fbcbc..f62af2f 100644
--- a/R/internal.R
+++ b/R/internal.R
@@ -66,3 +66,52 @@ splitComma <- function(dat)
   }
   return(new_dat)
 }
+
+
+#' return a list of indices from matchEpiData
+#'
+#' The output of matchEpiData can either be a list of indices or a named list of
+#' weights. If it's a named list of weights, this function will return the list
+#' of indices.
+#'
+#' @param matchList a list of lists of weights.
+#'
+#' @return an unnamed list of lists of indices
+#' @keywords internal
+#' @examples
+#' ## Loading Data
+#' indata <- system.file("files", package = "epimatch")
+#' indata <- dir(indata, full.names = TRUE)
+#' x <- lapply(indata, read.csv, stringsAsFactors = FALSE)
+#' names(x) <- basename(indata)
+#'
+#' # We will use one data set from the case information and lab results
+#' case <- x[["CaseInformationForm.csv"]]
+#' lab <- x[["LaboratoryResultsForm7.csv"]]
+#'
+#' funlist <- list(
+#'              list(d1vars = "ID",
+#'                   d2vars = "ID",
+#'                   fun = "nameDists",
+#'                   extraparams = NULL,
+#'                   weight = 1),
+#'              list(d1vars = c("Surname", "OtherNames"),
+#'                   d2vars = c("SurnameLab", "OtherNameLab"),
+#'                   fun = "nameDists",
+#'                   extraparams = NULL,
+#'                   weight = 0.5)
+#'            )
+#' # This will get all of the indices that match the ID and Names with a
+#' # threshold of 0.25
+#' res <- matchEpiData(dat1 = case,
+#'                     dat2 = lab,
+#'                     funlist = funlist,
+#'                     thresh = 0.25,
+#'                     giveWeight = TRUE)
+#' res               # List of weights
+#' getIndexList(res) # List of indices
+getIndexList <- function(matchList){
+  out <- lapply(matchList, lapply, function(i) as.integer(names(i)))
+  names(out) <- NULL
+  return(out)
+}
diff --git a/man/getIndexList.Rd b/man/getIndexList.Rd
new file mode 100644
index 0000000..a473a45
--- /dev/null
+++ b/man/getIndexList.Rd
@@ -0,0 +1,54 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/internal.R
+\name{getIndexList}
+\alias{getIndexList}
+\title{return a list of indices from matchEpiData}
+\usage{
+getIndexList(matchList)
+}
+\arguments{
+\item{matchList}{a list of lists of weights.}
+}
+\value{
+an unnamed list of lists of indices
+}
+\description{
+The output of matchEpiData can either be a list of indices or a named list of
+weights. If it's a named list of weights, this function will return the list
+of indices.
+}
+\examples{
+## Loading Data
+indata <- system.file("files", package = "epimatch")
+indata <- dir(indata, full.names = TRUE)
+x <- lapply(indata, read.csv, stringsAsFactors = FALSE)
+names(x) <- basename(indata)
+
+# We will use one data set from the case information and lab results
+case <- x[["CaseInformationForm.csv"]]
+lab <- x[["LaboratoryResultsForm7.csv"]]
+
+funlist <- list(
+             list(d1vars = "ID",
+                  d2vars = "ID",
+                  fun = "nameDists",
+                  extraparams = NULL,
+                  weight = 1),
+             list(d1vars = c("Surname", "OtherNames"),
+                  d2vars = c("SurnameLab", "OtherNameLab"),
+                  fun = "nameDists",
+                  extraparams = NULL,
+                  weight = 0.5)
+           )
+# This will get all of the indices that match the ID and Names with a
+# threshold of 0.25
+res <- matchEpiData(dat1 = case,
+                    dat2 = lab,
+                    funlist = funlist,
+                    thresh = 0.25,
+                    giveWeight = TRUE)
+res               # List of weights
+getIndexList(res) # List of indices
+}
+\keyword{internal}
+
diff --git a/man/matchEpiData.Rd b/man/matchEpiData.Rd
index ef94468..92bde0d 100644
--- a/man/matchEpiData.Rd
+++ b/man/matchEpiData.Rd
@@ -51,22 +51,23 @@ names(x) <- basename(indata)
 case <- x[["CaseInformationForm.csv"]]
 lab <- x[["LaboratoryResultsForm7.csv"]]
 
+funlist <- list(
+             list(d1vars = "ID",
+                  d2vars = "ID",
+                  fun = "nameDists",
+                  extraparams = NULL,
+                  weight = 1),
+             list(d1vars = c("Surname", "OtherNames"),
+                  d2vars = c("SurnameLab", "OtherNameLab"),
+                  fun = "nameDists",
+                  extraparams = NULL,
+                  weight = 0.5)
+           )
 # This will get all of the indices that match the ID and Names with a
 # threshold of 0.25
 res <- matchEpiData(dat1 = case,
                     dat2 = lab,
-                    funlist = list(
-                    list(d1vars = "ID",
-                         d2vars = "ID",
-                         fun = "nameDists",
-                         extraparams = NULL,
-                         weight = 1),
-                    list(d1vars = c("Surname", "OtherNames"),
-                         d2vars = c("SurnameLab", "OtherNameLab"),
-                         fun = "nameDists",
-                         extraparams = NULL,
-                         weight = 0.5)
-                    ),
+                    funlist = funlist,
                     thresh = 0.25)
 # List of indices
 res

From 43f4aeed4a0bb7ab5a878a626a9e3ee8d1358a9a Mon Sep 17 00:00:00 2001
From: Zhian Kamvar <kamvarz@science.oregonstate.edu>
Date: Sat, 16 Jul 2016 17:47:02 -0700
Subject: [PATCH 02/16] Add collapseValues internal function

---
 R/internal.R        | 25 +++++++++++++++++++++
 man/getIndexList.Rd | 54 ---------------------------------------------
 2 files changed, 25 insertions(+), 54 deletions(-)
 delete mode 100644 man/getIndexList.Rd

diff --git a/R/internal.R b/R/internal.R
index f62af2f..0a5a15e 100644
--- a/R/internal.R
+++ b/R/internal.R
@@ -78,6 +78,7 @@ splitComma <- function(dat)
 #'
 #' @return an unnamed list of lists of indices
 #' @keywords internal
+#' @noRd
 #' @examples
 #' ## Loading Data
 #' indata <- system.file("files", package = "epimatch")
@@ -115,3 +116,27 @@ getIndexList <- function(matchList){
   names(out) <- NULL
   return(out)
 }
+
+#' Collapse columns of a data frame into one
+#'
+#' @param df a data frame with one or more columns
+#' @param sep a separator to collapse the columns with
+#'
+#' @return a single column data frame
+#' @keywords internal
+#' @noRd
+#'
+#' @examples
+#' dat <- data.frame(a = letters, b = sample(100, 26), c = LETTERS)
+#' collapseValues(dat)
+collapseValues <- function(df, sep = "_"){
+  if (!is.data.frame(df)){
+    stop("a data frame is needed")
+  }
+  if (length(df) > 1){
+    dfnames <- paste(names(df), collapse = sep)
+    df <- data.frame(apply(df, 1, paste, collapse = sep), stringsAsFactors = FALSE)
+    names(df) <- dfnames
+  }
+  return(df)
+}
diff --git a/man/getIndexList.Rd b/man/getIndexList.Rd
deleted file mode 100644
index a473a45..0000000
--- a/man/getIndexList.Rd
+++ /dev/null
@@ -1,54 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/internal.R
-\name{getIndexList}
-\alias{getIndexList}
-\title{return a list of indices from matchEpiData}
-\usage{
-getIndexList(matchList)
-}
-\arguments{
-\item{matchList}{a list of lists of weights.}
-}
-\value{
-an unnamed list of lists of indices
-}
-\description{
-The output of matchEpiData can either be a list of indices or a named list of
-weights. If it's a named list of weights, this function will return the list
-of indices.
-}
-\examples{
-## Loading Data
-indata <- system.file("files", package = "epimatch")
-indata <- dir(indata, full.names = TRUE)
-x <- lapply(indata, read.csv, stringsAsFactors = FALSE)
-names(x) <- basename(indata)
-
-# We will use one data set from the case information and lab results
-case <- x[["CaseInformationForm.csv"]]
-lab <- x[["LaboratoryResultsForm7.csv"]]
-
-funlist <- list(
-             list(d1vars = "ID",
-                  d2vars = "ID",
-                  fun = "nameDists",
-                  extraparams = NULL,
-                  weight = 1),
-             list(d1vars = c("Surname", "OtherNames"),
-                  d2vars = c("SurnameLab", "OtherNameLab"),
-                  fun = "nameDists",
-                  extraparams = NULL,
-                  weight = 0.5)
-           )
-# This will get all of the indices that match the ID and Names with a
-# threshold of 0.25
-res <- matchEpiData(dat1 = case,
-                    dat2 = lab,
-                    funlist = funlist,
-                    thresh = 0.25,
-                    giveWeight = TRUE)
-res               # List of weights
-getIndexList(res) # List of indices
-}
-\keyword{internal}
-

From 74cfc087a5c77f9a7de32872af71c88d85da1d83 Mon Sep 17 00:00:00 2001
From: Zhian Kamvar <kamvarz@science.oregonstate.edu>
Date: Sat, 16 Jul 2016 17:48:55 -0700
Subject: [PATCH 03/16] Add tableFromMatch function

I've added a function to return tables from the
match query. It can work in addition or in lieu
of `matchEpiData()`. I have added documentation
for this and also updated the documentation of
`matchEpiData()`.
---
 NAMESPACE              |  1 +
 R/epi_self_match.R     | 10 ++---
 R/tableFromMatch.R     | 88 ++++++++++++++++++++++++++++++++++++++++++
 man/matchEpiData.Rd    | 10 ++---
 man/tablesFromMatch.Rd | 66 +++++++++++++++++++++++++++++++
 5 files changed, 161 insertions(+), 14 deletions(-)
 create mode 100644 R/tableFromMatch.R
 create mode 100644 man/tablesFromMatch.Rd

diff --git a/NAMESPACE b/NAMESPACE
index c03d96a..3e81f22 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -12,6 +12,7 @@ export(matchEpiData)
 export(nameDists)
 export(processFunctionList)
 export(returnMatches)
+export(tablesFromMatch)
 importFrom(lubridate,parse_date_time)
 importFrom(shiny,runApp)
 importFrom(stats,dist)
diff --git a/R/epi_self_match.R b/R/epi_self_match.R
index 6fb57be..99c36a7 100644
--- a/R/epi_self_match.R
+++ b/R/epi_self_match.R
@@ -42,16 +42,12 @@
 #' res <- matchEpiData(dat1 = case,
 #'                     dat2 = lab,
 #'                     funlist = funlist,
-#'                     thresh = 0.25)
+#'                     thresh = 0.25,
+#'                     giveWeight = FALSE)
 #' # List of indices
 #' res
 #'
-#' # Printing out the matching names in decreasing order of matching
-#' invisible(lapply(res, function(i) {
-#'    print(case[i$d1, c("Surname", "OtherNames")])
-#'    print(lab[i$d2, c("SurnameLab", "OtherNameLab")])
-#'    cat("\n\t--------\n")
-#'  }))
+#' tablesFromMatch(case, lab, funlist, matchList = res)
 matchEpiData <- function(dat1, dat2 = NULL, funlist = list(), thresh = 0.05, giveWeight = FALSE){
   the_matrices <- processFunctionList(dat1, dat2, funlist)
   the_weights  <- unlist(lapply(funlist, function(i) i$weight))
diff --git a/R/tableFromMatch.R b/R/tableFromMatch.R
new file mode 100644
index 0000000..a6adcc9
--- /dev/null
+++ b/R/tableFromMatch.R
@@ -0,0 +1,88 @@
+#' Generate a list of tables aggregating the matched values
+#'
+#' @inheritParams processFunctionList
+#' @param matchList a list derived from \code{\link{matchEpiData}}. If this is
+#'   \code{NULL}, a threshold can be supplied instead to calculate the list on
+#'   the fly.
+#'
+#' @return a list of data frames sorted in decreasing order of matching-ness.
+#' @export
+#'
+#' @examples
+#' ## Loading Data
+#' indata <- system.file("files", package = "epimatch")
+#' indata <- dir(indata, full.names = TRUE)
+#' x <- lapply(indata, read.csv, stringsAsFactors = FALSE)
+#' names(x) <- basename(indata)
+#'
+#' # We will use one data set from the case information and lab results
+#' case <- x[["CaseInformationForm.csv"]]
+#' lab <- x[["LaboratoryResultsForm7.csv"]]
+#'
+#' funlist <- list(
+#'              list(d1vars = "ID",
+#'                   d2vars = "ID",
+#'                   fun = "nameDists",
+#'                   extraparams = NULL,
+#'                   weight = 1),
+#'              list(d1vars = c("Surname", "OtherNames"),
+#'                   d2vars = c("SurnameLab", "OtherNameLab"),
+#'                   fun = "nameDists",
+#'                   extraparams = NULL,
+#'                   weight = 0.5)
+#'            )
+#' # This will get all of the indices that match the ID and Names with a
+#' # threshold of 0.25
+#' res <- matchEpiData(dat1 = case,
+#'                     dat2 = lab,
+#'                     funlist = funlist,
+#'                     thresh = 0.25,
+#'                     giveWeight = TRUE)
+#' tablesFromMatch(case, lab, funlist, matchList = res)
+#' tablesFromMatch(case, lab, funlist, matchList = 0.25)
+tablesFromMatch <- function(dat1, dat2 = NULL, funlist = list(), matchList = NULL){
+
+  # Check if incoming data are cromulent
+  dat2exists <- !is.null(dat2) && is.data.frame(dat2)
+  if (!is.data.frame(dat1) | !dat2exists){
+    stop("data must be in data frame format")
+  }
+  # Check if there are any matches at all
+  if (is.null(matchList) || length(matchList) == 0){
+    stop("no matches found!")
+  }
+  # If the matchList is a threshold parameter, pass it to the matchEpiData function.
+  if (is.numeric(matchList) && length(matchList) == 1){
+    matchList <- matchEpiData(dat1, dat2, funlist = funlist, thresh = matchList)
+  } else if (!is.integer(unlist(matchList))){
+    matchList <- getIndexList(matchList)
+  }
+
+  # Grab the tested variables from dat1
+  d1vars <- lapply(funlist, "[[", "d1vars")
+  d1 <- lapply(d1vars, function(i) dat1[i])
+  # Since multiple columns can be evaluated by a single function, we are going
+  # to collapse these columns into one.
+  d1 <- data.frame(lapply(d1, collapseValues))
+
+  if (dat2exists){
+    # Grab the tested variables from dat2 and rename them based off of dat1.
+    # This allows us to bind the data together a lot easier.
+    d2vars <- lapply(funlist, "[[", "d2vars")
+    d2 <- lapply(d2vars, function(i) dat2[i])
+    d2 <- data.frame(lapply(d2, collapseValues))
+    names(d2) <- names(d1)
+  }
+
+  # The output is a list of data frames subset in decreasing order of matching.
+  outlist <- vector(length = length(matchList), mode = "list")
+  for (i in seq(matchList)){
+    indices <- matchList[[i]]
+    if (dat2exists){
+      outlist[[i]] <- rbind(d1[indices$d1, ], d2[indices$d2, ])
+    } else {
+      outlist[[i]] <- d1[indices$d1, ]
+    }
+  }
+  return(outlist)
+}
diff --git a/man/matchEpiData.Rd b/man/matchEpiData.Rd
index 92bde0d..9f6d911 100644
--- a/man/matchEpiData.Rd
+++ b/man/matchEpiData.Rd
@@ -68,15 +68,11 @@ funlist <- list(
 res <- matchEpiData(dat1 = case,
                     dat2 = lab,
                     funlist = funlist,
-                    thresh = 0.25)
+                    thresh = 0.25,
+                    giveWeight = FALSE)
 # List of indices
 res
 
-# Printing out the matching names in decreasing order of matching
-invisible(lapply(res, function(i) {
-   print(case[i$d1, c("Surname", "OtherNames")])
-   print(lab[i$d2, c("SurnameLab", "OtherNameLab")])
-   cat("\\n\\t--------\\n")
- }))
+tablesFromMatch(case, lab, funlist, matchList = res)
 }
 
diff --git a/man/tablesFromMatch.Rd b/man/tablesFromMatch.Rd
new file mode 100644
index 0000000..b076e24
--- /dev/null
+++ b/man/tablesFromMatch.Rd
@@ -0,0 +1,66 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/tableFromMatch.R
+\name{tablesFromMatch}
+\alias{tablesFromMatch}
+\title{Generate a list of tables aggregating the matched values}
+\usage{
+tablesFromMatch(dat1, dat2 = NULL, funlist = list(), matchList = NULL)
+}
+\arguments{
+\item{dat1}{An input linelist}
+
+\item{dat2}{An optional extra linelist}
+
+\item{funlist}{A list containing lists containing:
+\itemize{
+ \item d1vars - variable names for dataset 1
+ \item d2vars - variable names for dataset 2
+ \item fun - function name to process on these variables
+ \item extraparams - extra parameters that need to be applied with the function.
+ \item weights - a weight vector to scale each matrix (not used in processFunctionList).
+}}
+
+\item{matchList}{a list derived from \code{\link{matchEpiData}}. If this is
+\code{NULL}, a threshold can be supplied instead to calculate the list on
+the fly.}
+}
+\value{
+a list of data frames sorted in decreasing order of matching-ness.
+}
+\description{
+Generate a list of tables aggregating the matched values
+}
+\examples{
+## Loading Data
+indata <- system.file("files", package = "epimatch")
+indata <- dir(indata, full.names = TRUE)
+x <- lapply(indata, read.csv, stringsAsFactors = FALSE)
+names(x) <- basename(indata)
+
+# We will use one data set from the case information and lab results
+case <- x[["CaseInformationForm.csv"]]
+lab <- x[["LaboratoryResultsForm7.csv"]]
+
+funlist <- list(
+             list(d1vars = "ID",
+                  d2vars = "ID",
+                  fun = "nameDists",
+                  extraparams = NULL,
+                  weight = 1),
+             list(d1vars = c("Surname", "OtherNames"),
+                  d2vars = c("SurnameLab", "OtherNameLab"),
+                  fun = "nameDists",
+                  extraparams = NULL,
+                  weight = 0.5)
+           )
+# This will get all of the indices that match the ID and Names with a
+# threshold of 0.25
+res <- matchEpiData(dat1 = case,
+                    dat2 = lab,
+                    funlist = funlist,
+                    thresh = 0.25,
+                    giveWeight = TRUE)
+tablesFromMatch(case, lab, funlist, matchList = res)
+tablesFromMatch(case, lab, funlist, matchList = 0.25)
+}
+

From 89f900e6ceb52b8f030dfdc0ce23daef8bcbb623 Mon Sep 17 00:00:00 2001
From: Zhian Kamvar <kamvarz@science.oregonstate.edu>
Date: Thu, 4 Aug 2016 14:09:54 -0400
Subject: [PATCH 04/16] update docs to include library call

---
 R/epi_self_match.R         | 1 +
 R/matchAge.R               | 1 +
 R/matchDate.R              | 1 +
 R/matchGender.R            | 1 +
 R/matchGeneric.R           | 1 +
 R/matchNames.R             | 1 +
 R/process_matching.R       | 1 +
 R/tableFromMatch.R         | 1 +
 man/ageDists.Rd            | 1 +
 man/dateDists.Rd           | 1 +
 man/genderDists.Rd         | 1 +
 man/genericDists.Rd        | 1 +
 man/matchEpiData.Rd        | 1 +
 man/nameDists.Rd           | 1 +
 man/processFunctionList.Rd | 1 +
 man/tablesFromMatch.Rd     | 1 +
 16 files changed, 16 insertions(+)

diff --git a/R/epi_self_match.R b/R/epi_self_match.R
index 99c36a7..f0cffa0 100644
--- a/R/epi_self_match.R
+++ b/R/epi_self_match.R
@@ -16,6 +16,7 @@
 #'
 #' @examples
 #' ## Loading Data
+#' library('epimatch')
 #' indata <- system.file("files", package = "epimatch")
 #' indata <- dir(indata, full.names = TRUE)
 #' x <- lapply(indata, read.csv, stringsAsFactors = FALSE)
diff --git a/R/matchAge.R b/R/matchAge.R
index c037893..f15f357 100644
--- a/R/matchAge.R
+++ b/R/matchAge.R
@@ -14,6 +14,7 @@
 #' @export
 #'
 #' @examples
+#' library('epimatch')
 #' set.seed(999)
 #' x <- data.frame(age = rpois(5, 30), age_class = "YEAR")
 #' y <- data.frame(age = rpois(5, 18), age_class = "MONTH")
diff --git a/R/matchDate.R b/R/matchDate.R
index a3527c6..860dbd8 100644
--- a/R/matchDate.R
+++ b/R/matchDate.R
@@ -14,6 +14,7 @@
 #' @importFrom lubridate parse_date_time
 #'
 #' @examples
+#' library('epimatch')
 #'
 #' # Two data sets in Month-Day-Year and Year-Month-Day format, respectively.
 #' # Note that the way the dates can vary within data sets, but it's assumed
diff --git a/R/matchGender.R b/R/matchGender.R
index 2414b5d..fa6d17c 100644
--- a/R/matchGender.R
+++ b/R/matchGender.R
@@ -11,6 +11,7 @@
 #' @export
 #'
 #' @examples
+#' library('epimatch')
 #' test <- data.frame(c("male", "f", "m"), stringsAsFactors = FALSE)
 #' genderDists(test)
 genderDists <- function(dat1, dat2=NULL,
diff --git a/R/matchGeneric.R b/R/matchGeneric.R
index 5cff81b..025e045 100644
--- a/R/matchGeneric.R
+++ b/R/matchGeneric.R
@@ -7,6 +7,7 @@
 #' @export
 #'
 #' @examples
+#' library('epimatch')
 #' set.seed(9)
 #' x <- data.frame(dat = sample(10, replace = TRUE))
 #' x$let <- letters[x$dat]
diff --git a/R/matchNames.R b/R/matchNames.R
index ed1a64d..a7f1314 100644
--- a/R/matchNames.R
+++ b/R/matchNames.R
@@ -8,6 +8,7 @@
 #' @export
 #'
 #' @examples
+#' library('epimatch')
 #' set.seed(9)
 #' x <- data.frame(x = letters, y = LETTERS, z = 1:26)
 #' x <- x[sample(nrow(x), 10, replace = TRUE), ]
diff --git a/R/process_matching.R b/R/process_matching.R
index 5b3c34c..6cb4806 100644
--- a/R/process_matching.R
+++ b/R/process_matching.R
@@ -20,6 +20,7 @@
 #' @export
 #'
 #' @examples
+#' library('epimatch')
 #' ## Loading Data
 #' indata <- system.file("files", package = "epimatch")
 #' indata <- dir(indata, full.names = TRUE)
diff --git a/R/tableFromMatch.R b/R/tableFromMatch.R
index a6adcc9..607b714 100644
--- a/R/tableFromMatch.R
+++ b/R/tableFromMatch.R
@@ -10,6 +10,7 @@
 #'
 #' @examples
 #' ## Loading Data
+#' library('epimatch')
 #' indata <- system.file("files", package = "epimatch")
 #' indata <- dir(indata, full.names = TRUE)
 #' x <- lapply(indata, read.csv, stringsAsFactors = FALSE)
diff --git a/man/ageDists.Rd b/man/ageDists.Rd
index 039da4a..6c9418f 100644
--- a/man/ageDists.Rd
+++ b/man/ageDists.Rd
@@ -27,6 +27,7 @@ This function will take one or two data frames containing ages
   data frame with a specifier for months or year in the second column.
 }
 \examples{
+library('epimatch')
 set.seed(999)
 x <- data.frame(age = rpois(5, 30), age_class = "YEAR")
 y <- data.frame(age = rpois(5, 18), age_class = "MONTH")
diff --git a/man/dateDists.Rd b/man/dateDists.Rd
index f080758..6857dbc 100644
--- a/man/dateDists.Rd
+++ b/man/dateDists.Rd
@@ -27,6 +27,7 @@ The formats for dates are passed to \pkg{lubridate}'s function
   "ymd", etc.
 }
 \examples{
+library('epimatch')
 
 # Two data sets in Month-Day-Year and Year-Month-Day format, respectively.
 # Note that the way the dates can vary within data sets, but it's assumed
diff --git a/man/genderDists.Rd b/man/genderDists.Rd
index 0405e95..60e5c38 100644
--- a/man/genderDists.Rd
+++ b/man/genderDists.Rd
@@ -27,6 +27,7 @@ A thin wrapper around generic, which converts some known male and female
 codes to binary
 }
 \examples{
+library('epimatch')
 test <- data.frame(c("male", "f", "m"), stringsAsFactors = FALSE)
 genderDists(test)
 }
diff --git a/man/genericDists.Rd b/man/genericDists.Rd
index aa34061..a41b95a 100644
--- a/man/genericDists.Rd
+++ b/man/genericDists.Rd
@@ -18,6 +18,7 @@ a pairwise matrix of scores from 0 (exact match) to 1 (no match)
 generic function for exact matching single columns
 }
 \examples{
+library('epimatch')
 set.seed(9)
 x <- data.frame(dat = sample(10, replace = TRUE))
 x$let <- letters[x$dat]
diff --git a/man/matchEpiData.Rd b/man/matchEpiData.Rd
index 9f6d911..f3e2a4f 100644
--- a/man/matchEpiData.Rd
+++ b/man/matchEpiData.Rd
@@ -42,6 +42,7 @@ this function will take in one or two data sets, a list of functions
 }
 \examples{
 ## Loading Data
+library('epimatch')
 indata <- system.file("files", package = "epimatch")
 indata <- dir(indata, full.names = TRUE)
 x <- lapply(indata, read.csv, stringsAsFactors = FALSE)
diff --git a/man/nameDists.Rd b/man/nameDists.Rd
index 703b01d..c79dd12 100644
--- a/man/nameDists.Rd
+++ b/man/nameDists.Rd
@@ -20,6 +20,7 @@ Clean and match names column by removing punctuation and replacing
 punctuation with an underscore
 }
 \examples{
+library('epimatch')
 set.seed(9)
 x <- data.frame(x = letters, y = LETTERS, z = 1:26)
 x <- x[sample(nrow(x), 10, replace = TRUE), ]
diff --git a/man/processFunctionList.Rd b/man/processFunctionList.Rd
index 66011bb..f213384 100644
--- a/man/processFunctionList.Rd
+++ b/man/processFunctionList.Rd
@@ -29,6 +29,7 @@ column names, and function parameters, and return a list of matrices by
 function.
 }
 \examples{
+library('epimatch')
 ## Loading Data
 indata <- system.file("files", package = "epimatch")
 indata <- dir(indata, full.names = TRUE)
diff --git a/man/tablesFromMatch.Rd b/man/tablesFromMatch.Rd
index b076e24..f8f7605 100644
--- a/man/tablesFromMatch.Rd
+++ b/man/tablesFromMatch.Rd
@@ -32,6 +32,7 @@ Generate a list of tables aggregating the matched values
 }
 \examples{
 ## Loading Data
+library('epimatch')
 indata <- system.file("files", package = "epimatch")
 indata <- dir(indata, full.names = TRUE)
 x <- lapply(indata, read.csv, stringsAsFactors = FALSE)

From fea14c432e51dbe2e44f3f44867de2877ed23a16 Mon Sep 17 00:00:00 2001
From: Zhian Kamvar <kamvarz@science.oregonstate.edu>
Date: Thu, 4 Aug 2016 15:41:12 -0400
Subject: [PATCH 05/16] add collapse argument to output single data frame

---
 R/tableFromMatch.R | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/R/tableFromMatch.R b/R/tableFromMatch.R
index 607b714..dbeb062 100644
--- a/R/tableFromMatch.R
+++ b/R/tableFromMatch.R
@@ -4,6 +4,13 @@
 #' @param matchList a list derived from \code{\link{matchEpiData}}. If this is
 #'   \code{NULL}, a threshold can be supplied instead to calculate the list on
 #'   the fly.
+#' @param collapse When \code{TRUE}, the list of data frames will be collapsed
+#'   into one data frame with three extra columns specifying:
+#'   \itemize{
+#'       \item match group
+#'       \item data set
+#'       \item index in data set
+#'   }
 #'
 #' @return a list of data frames sorted in decreasing order of matching-ness.
 #' @export
@@ -41,7 +48,8 @@
 #'                     giveWeight = TRUE)
 #' tablesFromMatch(case, lab, funlist, matchList = res)
 #' tablesFromMatch(case, lab, funlist, matchList = 0.25)
-tablesFromMatch <- function(dat1, dat2 = NULL, funlist = list(), matchList = NULL){
+tablesFromMatch <- function(dat1, dat2 = NULL, funlist = list(),
+                            matchList = NULL, collapse = TRUE){
 
   # Check if incoming data are cromulent
   dat2exists <- !is.null(dat2) && is.data.frame(dat2)
@@ -85,5 +93,19 @@ tablesFromMatch <- function(dat1, dat2 = NULL, funlist = list(), matchList = NUL
       outlist[[i]] <- d1[indices$d1, ]
     }
   }
+
+  if (collapse){
+    groups <- lapply(lapply(matchList, unlist), length)
+    groups <- rep(seq_along(matchList), groups)
+    indices <- unlist(matchList)
+    data_length <- unlist(lapply(matchList, lapply, length))
+    data_source <- rep(c("d1", "d2"), length(matchList))
+    data_source <- rep(data_source, data_length)
+    outlist <- do.call("rbind", outlist)
+    outlist$groups  <- groups
+    outlist$dataset <- data_source
+    outlist$index   <- indices
+  }
+
   return(outlist)
 }

From 451a6c3f543263573fb43e712d0fe540df1a60c0 Mon Sep 17 00:00:00 2001
From: Zhian Kamvar <kamvarz@science.oregonstate.edu>
Date: Thu, 4 Aug 2016 15:43:14 -0400
Subject: [PATCH 06/16] update documentation

---
 R/epimatch-package.r   |  1 +
 man/epimatch.Rd        |  1 +
 man/tablesFromMatch.Rd | 11 ++++++++++-
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/R/epimatch-package.r b/R/epimatch-package.r
index 2bc77cb..ec75319 100644
--- a/R/epimatch-package.r
+++ b/R/epimatch-package.r
@@ -25,6 +25,7 @@
 #'   \itemize{
 #'      \item \code{\link{processFunctionList}}
 #'      \item \code{\link{matchEpiData}}
+#'      \item \code{\link{tablesFromMatch}}
 #'   }
 #'
 #' @section Dissimilarity Functions: Each dissimilarity function returns a
diff --git a/man/epimatch.Rd b/man/epimatch.Rd
index 612bbe5..96d3767 100644
--- a/man/epimatch.Rd
+++ b/man/epimatch.Rd
@@ -37,6 +37,7 @@ Find matching patient records across tabular datasets
   \itemize{
      \item \code{\link{processFunctionList}}
      \item \code{\link{matchEpiData}}
+     \item \code{\link{tablesFromMatch}}
   }
 }
 
diff --git a/man/tablesFromMatch.Rd b/man/tablesFromMatch.Rd
index f8f7605..2634c90 100644
--- a/man/tablesFromMatch.Rd
+++ b/man/tablesFromMatch.Rd
@@ -4,7 +4,8 @@
 \alias{tablesFromMatch}
 \title{Generate a list of tables aggregating the matched values}
 \usage{
-tablesFromMatch(dat1, dat2 = NULL, funlist = list(), matchList = NULL)
+tablesFromMatch(dat1, dat2 = NULL, funlist = list(), matchList = NULL,
+  collapse = TRUE)
 }
 \arguments{
 \item{dat1}{An input linelist}
@@ -23,6 +24,14 @@ tablesFromMatch(dat1, dat2 = NULL, funlist = list(), matchList = NULL)
 \item{matchList}{a list derived from \code{\link{matchEpiData}}. If this is
 \code{NULL}, a threshold can be supplied instead to calculate the list on
 the fly.}
+
+\item{collapse}{When \code{TRUE}, the list of data frames will be collapsed
+into one data frame with three extra columns specifying:
+\itemize{
+    \item match group
+    \item data set
+    \item index in data set
+}}
 }
 \value{
 a list of data frames sorted in decreasing order of matching-ness.

From e06555b591f684236d6c97c86b586d564dc113ab Mon Sep 17 00:00:00 2001
From: Zhian Kamvar <kamvarz@science.oregonstate.edu>
Date: Thu, 4 Aug 2016 15:48:56 -0400
Subject: [PATCH 07/16] add notes to future self and others

---
 R/tableFromMatch.R | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/R/tableFromMatch.R b/R/tableFromMatch.R
index dbeb062..41d255a 100644
--- a/R/tableFromMatch.R
+++ b/R/tableFromMatch.R
@@ -95,12 +95,23 @@ tablesFromMatch <- function(dat1, dat2 = NULL, funlist = list(),
   }
 
   if (collapse){
+    # This is where we make the data tidy by placing everything in a single data
+    # frame and adding three extra columns that contain the information that was
+    # in the result from `matchEpiData()`.
+
+    ## Give the indices of the highest list -------------------------
     groups <- lapply(lapply(matchList, unlist), length)
     groups <- rep(seq_along(matchList), groups)
-    indices <- unlist(matchList)
+
+    ## Give the source of the data ----------------------------------
     data_length <- unlist(lapply(matchList, lapply, length))
     data_source <- rep(c("d1", "d2"), length(matchList))
     data_source <- rep(data_source, data_length)
+
+    ## Give the indices within each data set ------------------------
+    indices <- unlist(matchList)
+
+    ## Create the resulting tidy data frame and add information -----
     outlist <- do.call("rbind", outlist)
     outlist$groups  <- groups
     outlist$dataset <- data_source

From fff6688c00f9341b487ef272d8a582ec8cfdc904 Mon Sep 17 00:00:00 2001
From: Zhian Kamvar <kamvarz@science.oregonstate.edu>
Date: Thu, 4 Aug 2016 17:03:43 -0400
Subject: [PATCH 08/16] Add argument to return thresholds in tidy data

---
 R/tableFromMatch.R     | 35 +++++++++++++++++++++++++----------
 man/tablesFromMatch.Rd | 18 ++++++++++++------
 2 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/R/tableFromMatch.R b/R/tableFromMatch.R
index 41d255a..2f95679 100644
--- a/R/tableFromMatch.R
+++ b/R/tableFromMatch.R
@@ -5,13 +5,17 @@
 #'   \code{NULL}, a threshold can be supplied instead to calculate the list on
 #'   the fly.
 #' @param collapse When \code{TRUE}, the list of data frames will be collapsed
-#'   into one data frame with three extra columns specifying:
-#'   \itemize{
-#'       \item match group
-#'       \item data set
-#'       \item index in data set
+#'   into one data frame with three or four extra columns specifying:
+#'   \describe{
+#'       \item{groups}{ match group in decreasing order of score}
+#'       \item{dataset}{ data set of origin}
+#'       \item{index}{ index in data set}
+#'       \item{score}{ the score of the matches to the first item in the group.}
 #'   }
 #'
+#' @details This will collect all of the data from \code{\link{matchEpiData}}
+#'   and present it in table format. It will collapse them into tidy tables and,
+#'   if provided, the score of the matches will be provided.
 #' @return a list of data frames sorted in decreasing order of matching-ness.
 #' @export
 #'
@@ -46,7 +50,7 @@
 #'                     funlist = funlist,
 #'                     thresh = 0.25,
 #'                     giveWeight = TRUE)
-#' tablesFromMatch(case, lab, funlist, matchList = res)
+#' tablesFromMatch(case, lab, funlist, matchList = res, collapse = FALSE)
 #' tablesFromMatch(case, lab, funlist, matchList = 0.25)
 tablesFromMatch <- function(dat1, dat2 = NULL, funlist = list(),
                             matchList = NULL, collapse = TRUE){
@@ -60,11 +64,17 @@ tablesFromMatch <- function(dat1, dat2 = NULL, funlist = list(),
   if (is.null(matchList) || length(matchList) == 0){
     stop("no matches found!")
   }
-  # If the matchList is a threshold parameter, pass it to the matchEpiData function.
+  # If the matchList is a threshold parameter, pass it to the matchEpiData
+  # function.
   if (is.numeric(matchList) && length(matchList) == 1){
-    matchList <- matchEpiData(dat1, dat2, funlist = funlist, thresh = matchList)
-  } else if (!is.integer(unlist(matchList))){
-    matchList <- getIndexList(matchList)
+    matchList <- matchEpiData(dat1, dat2, funlist = funlist, thresh = matchList,
+                              giveWeight = TRUE)
+  }
+  # If the incoming list contains numeric values, collect the thresholds and
+  # convert the list to indices.
+  if (!is.integer(unlist(matchList))){
+    theThresholds <- unlist(matchList)
+    matchList     <- getIndexList(matchList)
   }
 
   # Grab the tested variables from dat1
@@ -116,6 +126,11 @@ tablesFromMatch <- function(dat1, dat2 = NULL, funlist = list(),
     outlist$groups  <- groups
     outlist$dataset <- data_source
     outlist$index   <- indices
+
+    ## If the incoming list are thresholds, add them as a column ----
+    if (exists("theThresholds", inherits = FALSE)){
+      outlist$score <- theThresholds
+    }
   }
 
   return(outlist)
diff --git a/man/tablesFromMatch.Rd b/man/tablesFromMatch.Rd
index 2634c90..f35eca4 100644
--- a/man/tablesFromMatch.Rd
+++ b/man/tablesFromMatch.Rd
@@ -26,11 +26,12 @@ tablesFromMatch(dat1, dat2 = NULL, funlist = list(), matchList = NULL,
 the fly.}
 
 \item{collapse}{When \code{TRUE}, the list of data frames will be collapsed
-into one data frame with three extra columns specifying:
-\itemize{
-    \item match group
-    \item data set
-    \item index in data set
+into one data frame with three or four extra columns specifying:
+\describe{
+    \item{groups}{ match group in decreasing order of score}
+    \item{dataset}{ data set of origin}
+    \item{index}{ index in data set}
+    \item{score}{ the score of the matches to the first item in the group.}
 }}
 }
 \value{
@@ -39,6 +40,11 @@ a list of data frames sorted in decreasing order of matching-ness.
 \description{
 Generate a list of tables aggregating the matched values
 }
+\details{
+This will collect all of the data from \code{\link{matchEpiData}}
+  and present it in table format. It will collapse them into tidy tables and,
+  if provided, the score of the matches will be provided.
+}
 \examples{
 ## Loading Data
 library('epimatch')
@@ -70,7 +76,7 @@ res <- matchEpiData(dat1 = case,
                     funlist = funlist,
                     thresh = 0.25,
                     giveWeight = TRUE)
-tablesFromMatch(case, lab, funlist, matchList = res)
+tablesFromMatch(case, lab, funlist, matchList = res, collapse = FALSE)
 tablesFromMatch(case, lab, funlist, matchList = 0.25)
 }
 

From 9061ea37bb80729e670298f0b3e530dac1561156 Mon Sep 17 00:00:00 2001
From: Zhian Kamvar <kamvarz@science.oregonstate.edu>
Date: Thu, 4 Aug 2016 17:08:01 -0400
Subject: [PATCH 09/16] make getIndexList a user-facing function

---
 R/epi_self_match.R  | 49 +++++++++++++++++++++++++++++++++++++++
 R/internal.R        | 50 ----------------------------------------
 man/getIndexList.Rd | 56 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 105 insertions(+), 50 deletions(-)
 create mode 100644 man/getIndexList.Rd

diff --git a/R/epi_self_match.R b/R/epi_self_match.R
index f0cffa0..008cebb 100644
--- a/R/epi_self_match.R
+++ b/R/epi_self_match.R
@@ -61,3 +61,52 @@ matchEpiData <- function(dat1, dat2 = NULL, funlist = list(), thresh = 0.05, giv
   }
   return(out)
 }
+
+#' return a list of indices from matchEpiData
+#'
+#' The output of matchEpiData can either be a list of indices or a named list of
+#' weights. If it's a named list of weights, this function will return the list
+#' of indices.
+#'
+#' @param matchList a list of lists of weights
+#'
+#' @return an unnamed list of lists of indices
+#' @keywords internal utilities
+#' @examples
+#' ## Loading Data
+#' library('epimatch')
+#' indata <- system.file("files", package = "epimatch")
+#' indata <- dir(indata, full.names = TRUE)
+#' x <- lapply(indata, read.csv, stringsAsFactors = FALSE)
+#' names(x) <- basename(indata)
+#'
+#' # We will use one data set from the case information and lab results
+#' case <- x[["CaseInformationForm.csv"]]
+#' lab <- x[["LaboratoryResultsForm7.csv"]]
+#'
+#' funlist <- list(
+#'              list(d1vars = "ID",
+#'                   d2vars = "ID",
+#'                   fun = "nameDists",
+#'                   extraparams = NULL,
+#'                   weight = 1),
+#'              list(d1vars = c("Surname", "OtherNames"),
+#'                   d2vars = c("SurnameLab", "OtherNameLab"),
+#'                   fun = "nameDists",
+#'                   extraparams = NULL,
+#'                   weight = 0.5)
+#'            )
+#' # This will get all of the indices that match the ID and Names with a
+#' # threshold of 0.25
+#' res <- matchEpiData(dat1 = case,
+#'                     dat2 = lab,
+#'                     funlist = funlist,
+#'                     thresh = 0.25,
+#'                     giveWeight = TRUE)
+#' res               # List of weights
+#' getIndexList(res) # List of indices
+getIndexList <- function(matchList){
+  out <- lapply(matchList, lapply, function(i) as.integer(names(i)))
+  names(out) <- NULL
+  return(out)
+}
diff --git a/R/internal.R b/R/internal.R
index 0a5a15e..3e7b62b 100644
--- a/R/internal.R
+++ b/R/internal.R
@@ -67,56 +67,6 @@ splitComma <- function(dat)
   return(new_dat)
 }
 
-
-#' return a list of indices from matchEpiData
-#'
-#' The output of matchEpiData can either be a list of indices or a named list of
-#' weights. If it's a named list of weights, this function will return the list
-#' of indices.
-#'
-#' @param matchList a list of lists of weights.
-#'
-#' @return an unnamed list of lists of indices
-#' @keywords internal
-#' @noRd
-#' @examples
-#' ## Loading Data
-#' indata <- system.file("files", package = "epimatch")
-#' indata <- dir(indata, full.names = TRUE)
-#' x <- lapply(indata, read.csv, stringsAsFactors = FALSE)
-#' names(x) <- basename(indata)
-#'
-#' # We will use one data set from the case information and lab results
-#' case <- x[["CaseInformationForm.csv"]]
-#' lab <- x[["LaboratoryResultsForm7.csv"]]
-#'
-#' funlist <- list(
-#'              list(d1vars = "ID",
-#'                   d2vars = "ID",
-#'                   fun = "nameDists",
-#'                   extraparams = NULL,
-#'                   weight = 1),
-#'              list(d1vars = c("Surname", "OtherNames"),
-#'                   d2vars = c("SurnameLab", "OtherNameLab"),
-#'                   fun = "nameDists",
-#'                   extraparams = NULL,
-#'                   weight = 0.5)
-#'            )
-#' # This will get all of the indices that match the ID and Names with a
-#' # threshold of 0.25
-#' res <- matchEpiData(dat1 = case,
-#'                     dat2 = lab,
-#'                     funlist = funlist,
-#'                     thresh = 0.25,
-#'                     giveWeight = TRUE)
-#' res               # List of weights
-#' getIndexList(res) # List of indices
-getIndexList <- function(matchList){
-  out <- lapply(matchList, lapply, function(i) as.integer(names(i)))
-  names(out) <- NULL
-  return(out)
-}
-
 #' Collapse columns of a data frame into one
 #'
 #' @param df a data frame with one or more columns
diff --git a/man/getIndexList.Rd b/man/getIndexList.Rd
new file mode 100644
index 0000000..f3a9dfe
--- /dev/null
+++ b/man/getIndexList.Rd
@@ -0,0 +1,56 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/epi_self_match.R
+\name{getIndexList}
+\alias{getIndexList}
+\title{return a list of indices from matchEpiData}
+\usage{
+getIndexList(matchList)
+}
+\arguments{
+\item{matchList}{a list of lists of weights}
+}
+\value{
+an unnamed list of lists of indices
+}
+\description{
+The output of matchEpiData can either be a list of indices or a named list of
+weights. If it's a named list of weights, this function will return the list
+of indices.
+}
+\examples{
+## Loading Data
+library('epimatch')
+indata <- system.file("files", package = "epimatch")
+indata <- dir(indata, full.names = TRUE)
+x <- lapply(indata, read.csv, stringsAsFactors = FALSE)
+names(x) <- basename(indata)
+
+# We will use one data set from the case information and lab results
+case <- x[["CaseInformationForm.csv"]]
+lab <- x[["LaboratoryResultsForm7.csv"]]
+
+funlist <- list(
+             list(d1vars = "ID",
+                  d2vars = "ID",
+                  fun = "nameDists",
+                  extraparams = NULL,
+                  weight = 1),
+             list(d1vars = c("Surname", "OtherNames"),
+                  d2vars = c("SurnameLab", "OtherNameLab"),
+                  fun = "nameDists",
+                  extraparams = NULL,
+                  weight = 0.5)
+           )
+# This will get all of the indices that match the ID and Names with a
+# threshold of 0.25
+res <- matchEpiData(dat1 = case,
+                    dat2 = lab,
+                    funlist = funlist,
+                    thresh = 0.25,
+                    giveWeight = TRUE)
+res               # List of weights
+getIndexList(res) # List of indices
+}
+\keyword{internal,}
+\keyword{utilities}
+

From 311647fbfe0da4f1565604a7e55092bd9219b547 Mon Sep 17 00:00:00 2001
From: Zhian Kamvar <kamvarz@science.oregonstate.edu>
Date: Thu, 4 Aug 2016 17:13:04 -0400
Subject: [PATCH 10/16] export getIndexList

---
 NAMESPACE           | 1 +
 R/epi_self_match.R  | 1 +
 man/getIndexList.Rd | 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/NAMESPACE b/NAMESPACE
index 3e81f22..3a4c84a 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -6,6 +6,7 @@ export(dateDists)
 export(distFuns)
 export(genderDists)
 export(genericDists)
+export(getIndexList)
 export(launch)
 export(locationDists)
 export(matchEpiData)
diff --git a/R/epi_self_match.R b/R/epi_self_match.R
index 008cebb..c86b17f 100644
--- a/R/epi_self_match.R
+++ b/R/epi_self_match.R
@@ -72,6 +72,7 @@ matchEpiData <- function(dat1, dat2 = NULL, funlist = list(), thresh = 0.05, giv
 #'
 #' @return an unnamed list of lists of indices
 #' @keywords internal utilities
+#' @export
 #' @examples
 #' ## Loading Data
 #' library('epimatch')
diff --git a/man/getIndexList.Rd b/man/getIndexList.Rd
index f3a9dfe..20c708f 100644
--- a/man/getIndexList.Rd
+++ b/man/getIndexList.Rd
@@ -51,6 +51,6 @@ res <- matchEpiData(dat1 = case,
 res               # List of weights
 getIndexList(res) # List of indices
 }
-\keyword{internal,}
+\keyword{internal}
 \keyword{utilities}
 

From d07d8ca347f06c5fc8e25582123a016becf2ea86 Mon Sep 17 00:00:00 2001
From: Zhian Kamvar <kamvarz@science.oregonstate.edu>
Date: Thu, 4 Aug 2016 17:17:19 -0400
Subject: [PATCH 11/16] update shiny server to conserve the weights

This will allow future development so that we can
give the user the scores/weights in the table that
is output.
---
 inst/shiny/server.R | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/inst/shiny/server.R b/inst/shiny/server.R
index d5fedd6..16a06f8 100644
--- a/inst/shiny/server.R
+++ b/inst/shiny/server.R
@@ -251,11 +251,13 @@ function(input, output, session) {
       hide("findMatchesLoading")
     })
     tryCatch({
-      values$results <- epimatch::matchEpiData(
+      theMatches <- epimatch::matchEpiData(
         dat1 = values$data1, dat2 = values$data2,
         funlist = funlist,
-        thresh = input$threshold
+        thresh = input$threshold,
+        giveWeight = TRUE
       )
+      values$results <- epimatch::getIndexList(theMatches)
       show(selector = ".findMatchesDone")
       delay(1000, hide(selector = ".findMatchesDone", anim = TRUE, animType = "fade",
                        time = 0.5))

From 6f2da4b5d4b92c13fd7ee45867384a09e24bb9c4 Mon Sep 17 00:00:00 2001
From: Zhian Kamvar <kamvarz@science.oregonstate.edu>
Date: Sat, 6 Aug 2016 09:28:45 -0700
Subject: [PATCH 12/16] Remove library calls from functions

---
 R/epi_self_match.R         | 4 ++--
 R/matchAge.R               | 2 +-
 R/matchDate.R              | 2 +-
 R/matchGender.R            | 2 +-
 R/matchGeneric.R           | 2 +-
 R/matchNames.R             | 2 +-
 R/process_matching.R       | 2 +-
 R/tableFromMatch.R         | 2 +-
 man/ageDists.Rd            | 2 +-
 man/dateDists.Rd           | 2 +-
 man/genderDists.Rd         | 2 +-
 man/genericDists.Rd        | 2 +-
 man/getIndexList.Rd        | 2 +-
 man/matchEpiData.Rd        | 2 +-
 man/nameDists.Rd           | 2 +-
 man/processFunctionList.Rd | 2 +-
 man/tablesFromMatch.Rd     | 2 +-
 17 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/R/epi_self_match.R b/R/epi_self_match.R
index c86b17f..75707d1 100644
--- a/R/epi_self_match.R
+++ b/R/epi_self_match.R
@@ -16,7 +16,7 @@
 #'
 #' @examples
 #' ## Loading Data
-#' library('epimatch')
+#' 
 #' indata <- system.file("files", package = "epimatch")
 #' indata <- dir(indata, full.names = TRUE)
 #' x <- lapply(indata, read.csv, stringsAsFactors = FALSE)
@@ -75,7 +75,7 @@ matchEpiData <- function(dat1, dat2 = NULL, funlist = list(), thresh = 0.05, giv
 #' @export
 #' @examples
 #' ## Loading Data
-#' library('epimatch')
+#' 
 #' indata <- system.file("files", package = "epimatch")
 #' indata <- dir(indata, full.names = TRUE)
 #' x <- lapply(indata, read.csv, stringsAsFactors = FALSE)
diff --git a/R/matchAge.R b/R/matchAge.R
index f15f357..b559cf2 100644
--- a/R/matchAge.R
+++ b/R/matchAge.R
@@ -14,7 +14,7 @@
 #' @export
 #'
 #' @examples
-#' library('epimatch')
+#' 
 #' set.seed(999)
 #' x <- data.frame(age = rpois(5, 30), age_class = "YEAR")
 #' y <- data.frame(age = rpois(5, 18), age_class = "MONTH")
diff --git a/R/matchDate.R b/R/matchDate.R
index 860dbd8..1d15c41 100644
--- a/R/matchDate.R
+++ b/R/matchDate.R
@@ -14,7 +14,7 @@
 #' @importFrom lubridate parse_date_time
 #'
 #' @examples
-#' library('epimatch')
+#' 
 #'
 #' # Two data sets in Month-Day-Year and Year-Month-Day format, respectively.
 #' # Note that the way the dates can vary within data sets, but it's assumed
diff --git a/R/matchGender.R b/R/matchGender.R
index fa6d17c..29a1a8d 100644
--- a/R/matchGender.R
+++ b/R/matchGender.R
@@ -11,7 +11,7 @@
 #' @export
 #'
 #' @examples
-#' library('epimatch')
+#' 
 #' test <- data.frame(c("male", "f", "m"), stringsAsFactors = FALSE)
 #' genderDists(test)
 genderDists <- function(dat1, dat2=NULL,
diff --git a/R/matchGeneric.R b/R/matchGeneric.R
index 025e045..4d70b10 100644
--- a/R/matchGeneric.R
+++ b/R/matchGeneric.R
@@ -7,7 +7,7 @@
 #' @export
 #'
 #' @examples
-#' library('epimatch')
+#' 
 #' set.seed(9)
 #' x <- data.frame(dat = sample(10, replace = TRUE))
 #' x$let <- letters[x$dat]
diff --git a/R/matchNames.R b/R/matchNames.R
index a7f1314..a57c339 100644
--- a/R/matchNames.R
+++ b/R/matchNames.R
@@ -8,7 +8,7 @@
 #' @export
 #'
 #' @examples
-#' library('epimatch')
+#' 
 #' set.seed(9)
 #' x <- data.frame(x = letters, y = LETTERS, z = 1:26)
 #' x <- x[sample(nrow(x), 10, replace = TRUE), ]
diff --git a/R/process_matching.R b/R/process_matching.R
index 6cb4806..520acea 100644
--- a/R/process_matching.R
+++ b/R/process_matching.R
@@ -20,7 +20,7 @@
 #' @export
 #'
 #' @examples
-#' library('epimatch')
+#' 
 #' ## Loading Data
 #' indata <- system.file("files", package = "epimatch")
 #' indata <- dir(indata, full.names = TRUE)
diff --git a/R/tableFromMatch.R b/R/tableFromMatch.R
index 2f95679..5d9da97 100644
--- a/R/tableFromMatch.R
+++ b/R/tableFromMatch.R
@@ -21,7 +21,7 @@
 #'
 #' @examples
 #' ## Loading Data
-#' library('epimatch')
+#' 
 #' indata <- system.file("files", package = "epimatch")
 #' indata <- dir(indata, full.names = TRUE)
 #' x <- lapply(indata, read.csv, stringsAsFactors = FALSE)
diff --git a/man/ageDists.Rd b/man/ageDists.Rd
index 6c9418f..a4d4e05 100644
--- a/man/ageDists.Rd
+++ b/man/ageDists.Rd
@@ -27,7 +27,7 @@ This function will take one or two data frames containing ages
   data frame with a specifier for months or year in the second column.
 }
 \examples{
-library('epimatch')
+
 set.seed(999)
 x <- data.frame(age = rpois(5, 30), age_class = "YEAR")
 y <- data.frame(age = rpois(5, 18), age_class = "MONTH")
diff --git a/man/dateDists.Rd b/man/dateDists.Rd
index 6857dbc..2056cbc 100644
--- a/man/dateDists.Rd
+++ b/man/dateDists.Rd
@@ -27,7 +27,7 @@ The formats for dates are passed to \pkg{lubridate}'s function
   "ymd", etc.
 }
 \examples{
-library('epimatch')
+
 
 # Two data sets in Month-Day-Year and Year-Month-Day format, respectively.
 # Note that the way the dates can vary within data sets, but it's assumed
diff --git a/man/genderDists.Rd b/man/genderDists.Rd
index 60e5c38..dfe1204 100644
--- a/man/genderDists.Rd
+++ b/man/genderDists.Rd
@@ -27,7 +27,7 @@ A thin wrapper around generic, which converts some known male and female
 codes to binary
 }
 \examples{
-library('epimatch')
+
 test <- data.frame(c("male", "f", "m"), stringsAsFactors = FALSE)
 genderDists(test)
 }
diff --git a/man/genericDists.Rd b/man/genericDists.Rd
index a41b95a..46a4d27 100644
--- a/man/genericDists.Rd
+++ b/man/genericDists.Rd
@@ -18,7 +18,7 @@ a pairwise matrix of scores from 0 (exact match) to 1 (no match)
 generic function for exact matching single columns
 }
 \examples{
-library('epimatch')
+
 set.seed(9)
 x <- data.frame(dat = sample(10, replace = TRUE))
 x$let <- letters[x$dat]
diff --git a/man/getIndexList.Rd b/man/getIndexList.Rd
index 20c708f..a7ea751 100644
--- a/man/getIndexList.Rd
+++ b/man/getIndexList.Rd
@@ -19,7 +19,7 @@ of indices.
 }
 \examples{
 ## Loading Data
-library('epimatch')
+
 indata <- system.file("files", package = "epimatch")
 indata <- dir(indata, full.names = TRUE)
 x <- lapply(indata, read.csv, stringsAsFactors = FALSE)
diff --git a/man/matchEpiData.Rd b/man/matchEpiData.Rd
index f3e2a4f..49dabfc 100644
--- a/man/matchEpiData.Rd
+++ b/man/matchEpiData.Rd
@@ -42,7 +42,7 @@ this function will take in one or two data sets, a list of functions
 }
 \examples{
 ## Loading Data
-library('epimatch')
+
 indata <- system.file("files", package = "epimatch")
 indata <- dir(indata, full.names = TRUE)
 x <- lapply(indata, read.csv, stringsAsFactors = FALSE)
diff --git a/man/nameDists.Rd b/man/nameDists.Rd
index c79dd12..08aa219 100644
--- a/man/nameDists.Rd
+++ b/man/nameDists.Rd
@@ -20,7 +20,7 @@ Clean and match names column by removing punctuation and replacing
 punctuation with an underscore
 }
 \examples{
-library('epimatch')
+
 set.seed(9)
 x <- data.frame(x = letters, y = LETTERS, z = 1:26)
 x <- x[sample(nrow(x), 10, replace = TRUE), ]
diff --git a/man/processFunctionList.Rd b/man/processFunctionList.Rd
index f213384..3c8b097 100644
--- a/man/processFunctionList.Rd
+++ b/man/processFunctionList.Rd
@@ -29,7 +29,7 @@ column names, and function parameters, and return a list of matrices by
 function.
 }
 \examples{
-library('epimatch')
+
 ## Loading Data
 indata <- system.file("files", package = "epimatch")
 indata <- dir(indata, full.names = TRUE)
diff --git a/man/tablesFromMatch.Rd b/man/tablesFromMatch.Rd
index f35eca4..448b816 100644
--- a/man/tablesFromMatch.Rd
+++ b/man/tablesFromMatch.Rd
@@ -47,7 +47,7 @@ This will collect all of the data from \code{\link{matchEpiData}}
 }
 \examples{
 ## Loading Data
-library('epimatch')
+
 indata <- system.file("files", package = "epimatch")
 indata <- dir(indata, full.names = TRUE)
 x <- lapply(indata, read.csv, stringsAsFactors = FALSE)

From 453d98caf96a9736f3fe15e2557aa853db89d49f Mon Sep 17 00:00:00 2001
From: Zhian Kamvar <kamvarz@science.oregonstate.edu>
Date: Sat, 6 Aug 2016 10:14:07 -0700
Subject: [PATCH 13/16] update Rproj specifications

---
 epimatch.Rproj | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/epimatch.Rproj b/epimatch.Rproj
index cba1b6b..7c89849 100644
--- a/epimatch.Rproj
+++ b/epimatch.Rproj
@@ -17,5 +17,6 @@ StripTrailingWhitespace: Yes
 
 BuildType: Package
 PackageUseDevtools: Yes
-PackageInstallArgs: --no-multiarch --with-keep.source
+PackageInstallArgs: --no-multiarch --with-keep.source --install-tests
+PackageCheckArgs: --as-cran
 PackageRoxygenize: rd,collate,namespace

From 080b7d729464f002fae087ba62b01fbe22ad02a3 Mon Sep 17 00:00:00 2001
From: Zhian Kamvar <kamvarz@science.oregonstate.edu>
Date: Sat, 6 Aug 2016 10:15:10 -0700
Subject: [PATCH 14/16] Add information to list of tables

This addresses Dean's comment:
https://github.com/Hackout3/epimatch/pull/12#issuecomment-238000614
---
 R/tableFromMatch.R     | 52 ++++++++++++++++++++----------------------
 man/tablesFromMatch.Rd | 17 +++++++-------
 2 files changed, 34 insertions(+), 35 deletions(-)

diff --git a/R/tableFromMatch.R b/R/tableFromMatch.R
index 5d9da97..010f0a2 100644
--- a/R/tableFromMatch.R
+++ b/R/tableFromMatch.R
@@ -5,23 +5,25 @@
 #'   \code{NULL}, a threshold can be supplied instead to calculate the list on
 #'   the fly.
 #' @param collapse When \code{TRUE}, the list of data frames will be collapsed
-#'   into one data frame with three or four extra columns specifying:
+#'   into one data frame an extra column specifying the group appended.
+#'
+#' @details This will collect all of the data from \code{\link{matchEpiData}}
+#'   and present it in table format. It will collapse them into tidy tables and,
+#'   if provided, the score of the matches will be provided.
+#' @return a list of data frames sorted in decreasing order of matching-ness
+#'   with three extra columns:
 #'   \describe{
-#'       \item{groups}{ match group in decreasing order of score}
-#'       \item{dataset}{ data set of origin}
+#'       \item{source}{ data set of origin}
 #'       \item{index}{ index in data set}
 #'       \item{score}{ the score of the matches to the first item in the group.}
 #'   }
+#'   When \code{collapse = TRUE}, a fourth column, \code{groups} is appended.
 #'
-#' @details This will collect all of the data from \code{\link{matchEpiData}}
-#'   and present it in table format. It will collapse them into tidy tables and,
-#'   if provided, the score of the matches will be provided.
-#' @return a list of data frames sorted in decreasing order of matching-ness.
 #' @export
 #'
 #' @examples
 #' ## Loading Data
-#' 
+#'
 #' indata <- system.file("files", package = "epimatch")
 #' indata <- dir(indata, full.names = TRUE)
 #' x <- lapply(indata, read.csv, stringsAsFactors = FALSE)
@@ -73,7 +75,7 @@ tablesFromMatch <- function(dat1, dat2 = NULL, funlist = list(),
   # If the incoming list contains numeric values, collect the thresholds and
   # convert the list to indices.
   if (!is.integer(unlist(matchList))){
-    theThresholds <- unlist(matchList)
+    theThresholds <- matchList
     matchList     <- getIndexList(matchList)
   }
 
@@ -93,15 +95,26 @@ tablesFromMatch <- function(dat1, dat2 = NULL, funlist = list(),
     names(d2) <- names(d1)
   }
 
-  # The output is a list of data frames subset in decreasing order of matching.
+  ## The output is a list of data frames subset in decreasing order of matching.
   outlist <- vector(length = length(matchList), mode = "list")
   for (i in seq(matchList)){
     indices <- matchList[[i]]
     if (dat2exists){
-      outlist[[i]] <- rbind(d1[indices$d1, ], d2[indices$d2, ])
+      outdat <- rbind(d1[indices$d1, ], d2[indices$d2, ])
     } else {
-      outlist[[i]] <- d1[indices$d1, ]
+      outdat <- d1[indices$d1, ]
     }
+    ## Give the indices within each data set ------------------------
+    outdat$index <- unlist(indices, use.names = FALSE)
+
+    ## Give the source of the data ----------------------------------
+    outdat$source <- rep(c("d1", "d2"), lapply(indices, length))
+
+    ## If the incoming list are thresholds, add them as a column ----
+    if (exists("theThresholds", inherits = FALSE)){
+      outdat$score <- unlist(theThresholds[[i]])
+    }
+    outlist[[i]] <- outdat
   }
 
   if (collapse){
@@ -113,24 +126,9 @@ tablesFromMatch <- function(dat1, dat2 = NULL, funlist = list(),
     groups <- lapply(lapply(matchList, unlist), length)
     groups <- rep(seq_along(matchList), groups)
 
-    ## Give the source of the data ----------------------------------
-    data_length <- unlist(lapply(matchList, lapply, length))
-    data_source <- rep(c("d1", "d2"), length(matchList))
-    data_source <- rep(data_source, data_length)
-
-    ## Give the indices within each data set ------------------------
-    indices <- unlist(matchList)
-
     ## Create the resulting tidy data frame and add information -----
     outlist <- do.call("rbind", outlist)
     outlist$groups  <- groups
-    outlist$dataset <- data_source
-    outlist$index   <- indices
-
-    ## If the incoming list are thresholds, add them as a column ----
-    if (exists("theThresholds", inherits = FALSE)){
-      outlist$score <- theThresholds
-    }
   }
 
   return(outlist)
diff --git a/man/tablesFromMatch.Rd b/man/tablesFromMatch.Rd
index 448b816..4a6ef8f 100644
--- a/man/tablesFromMatch.Rd
+++ b/man/tablesFromMatch.Rd
@@ -26,16 +26,17 @@ tablesFromMatch(dat1, dat2 = NULL, funlist = list(), matchList = NULL,
 the fly.}
 
 \item{collapse}{When \code{TRUE}, the list of data frames will be collapsed
-into one data frame with three or four extra columns specifying:
-\describe{
-    \item{groups}{ match group in decreasing order of score}
-    \item{dataset}{ data set of origin}
-    \item{index}{ index in data set}
-    \item{score}{ the score of the matches to the first item in the group.}
-}}
+into one data frame an extra column specifying the group appended.}
 }
 \value{
-a list of data frames sorted in decreasing order of matching-ness.
+a list of data frames sorted in decreasing order of matching-ness
+  with three extra columns:
+  \describe{
+      \item{source}{ data set of origin}
+      \item{index}{ index in data set}
+      \item{score}{ the score of the matches to the first item in the group.}
+  }
+  When \code{collapse = TRUE}, a fourth column, \code{groups} is appended.
 }
 \description{
 Generate a list of tables aggregating the matched values

From 98528a50e7d407670b9f6d49ddc1a43562f19896 Mon Sep 17 00:00:00 2001
From: Zhian Kamvar <kamvarz@science.oregonstate.edu>
Date: Sat, 6 Aug 2016 10:18:11 -0700
Subject: [PATCH 15/16] Remove rownames and rename groups to group

This addresses Dean's comment:
https://github.com/Hackout3/epimatch/pull/12#issuecomment-237997712
---
 R/tableFromMatch.R     | 6 ++++--
 man/tablesFromMatch.Rd | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/R/tableFromMatch.R b/R/tableFromMatch.R
index 010f0a2..243e7ba 100644
--- a/R/tableFromMatch.R
+++ b/R/tableFromMatch.R
@@ -17,7 +17,7 @@
 #'       \item{index}{ index in data set}
 #'       \item{score}{ the score of the matches to the first item in the group.}
 #'   }
-#'   When \code{collapse = TRUE}, a fourth column, \code{groups} is appended.
+#'   When \code{collapse = TRUE}, a fourth column, \code{group} is appended.
 #'
 #' @export
 #'
@@ -114,6 +114,7 @@ tablesFromMatch <- function(dat1, dat2 = NULL, funlist = list(),
     if (exists("theThresholds", inherits = FALSE)){
       outdat$score <- unlist(theThresholds[[i]])
     }
+    rownames(outdat) <- NULL
     outlist[[i]] <- outdat
   }
 
@@ -128,7 +129,8 @@ tablesFromMatch <- function(dat1, dat2 = NULL, funlist = list(),
 
     ## Create the resulting tidy data frame and add information -----
     outlist <- do.call("rbind", outlist)
-    outlist$groups  <- groups
+    outlist$group  <- groups
+    rownames(outlist) <- NULL
   }
 
   return(outlist)
diff --git a/man/tablesFromMatch.Rd b/man/tablesFromMatch.Rd
index 4a6ef8f..eb5b582 100644
--- a/man/tablesFromMatch.Rd
+++ b/man/tablesFromMatch.Rd
@@ -36,7 +36,7 @@ a list of data frames sorted in decreasing order of matching-ness
       \item{index}{ index in data set}
       \item{score}{ the score of the matches to the first item in the group.}
   }
-  When \code{collapse = TRUE}, a fourth column, \code{groups} is appended.
+  When \code{collapse = TRUE}, a fourth column, \code{group} is appended.
 }
 \description{
 Generate a list of tables aggregating the matched values

From 74aa866d15813fc44524f8d3810382fa7295c890 Mon Sep 17 00:00:00 2001
From: Zhian Kamvar <kamvarz@science.oregonstate.edu>
Date: Sat, 6 Aug 2016 10:56:57 -0700
Subject: [PATCH 16/16] Add tests for tablesFromMatch

---
 tests/testthat/test-export.R | 55 ++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 tests/testthat/test-export.R

diff --git a/tests/testthat/test-export.R b/tests/testthat/test-export.R
new file mode 100644
index 0000000..37b1b36
--- /dev/null
+++ b/tests/testthat/test-export.R
@@ -0,0 +1,55 @@
+context("tablesFromMatch tests")
+
+indata   <- system.file("files", package = "epimatch")
+indata   <- dir(indata, full.names = TRUE)
+x        <- lapply(indata, read.csv, stringsAsFactors = FALSE)
+names(x) <- basename(indata)
+
+# We will use one data set from the case information and lab results
+case <- x[["CaseInformationForm.csv"]]
+lab <- x[["LaboratoryResultsForm7.csv"]]
+
+funlist <- list(
+             list(d1vars = "ID",
+                  d2vars = "ID",
+                  fun = "nameDists",
+                  extraparams = NULL,
+                  weight = 1),
+             list(d1vars = c("Surname", "OtherNames"),
+                  d2vars = c("SurnameLab", "OtherNameLab"),
+                  fun = "nameDists",
+                  extraparams = NULL,
+                  weight = 0.5)
+           )
+res <- matchEpiData(dat1 = case,
+                    dat2 = lab,
+                    funlist = funlist,
+                    thresh = 0.25,
+                    giveWeight = TRUE)
+l_res <- tablesFromMatch(case, lab, funlist, matchList = res, collapse = FALSE)
+t_res <- tablesFromMatch(case, lab, funlist, matchList = 0.25)
+
+## This not only tests the conversion, but since the input for the tests above
+## are the same, this also tests to ensure that the matchList argument can take
+## the result of matchEpiData AND a threshold.
+test_that("list output and collapsed output are the same", {
+  expect_equal(sum(vapply(l_res, nrow, 1L)), nrow(t_res))
+  expect_equal(do.call("rbind", l_res), t_res[names(l_res[[1]])])
+})
+
+test_that("tablesFromMatch returns a list when collapse = FALSE", {
+  expect_is(l_res, "list")
+  for (i in seq_along(l_res)){
+    dat <- l_res[[i]]
+    expect_is(dat, "data.frame", info = paste("group", i))
+    expect_equal(ncol(dat), ncol(l_res[[i]]))
+  }
+})
+
+test_that("the data frame is tidy", {
+  expect_is(t_res, "data.frame")
+  expect_identical(names(t_res)[length(t_res)], "group")
+  expect_equal(length(unique(t_res$group)), length(l_res))
+  expect_equal(nrow(t_res), 20L)
+  expect_equal(ncol(t_res), 6L)
+})