diff --git a/DESCRIPTION b/DESCRIPTION index 1a39c47..b3f8298 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,6 +22,7 @@ Suggests: gt, omock, OmopSketch, + omopViewer, testthat (>= 3.0.0), knitr, RPostgres, @@ -50,5 +51,5 @@ Imports: URL: https://oxford-pharmacoepi.github.io/phenotypeR/ VignetteBuilder: knitr Remotes: - darwin-eu-dev/CohortCharacteristics - + darwin-eu-dev/CohortCharacteristics, + oxford-pharmacoepi/omopViewer diff --git a/NAMESPACE b/NAMESPACE index 69108db..a9d8ec8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,10 +1,7 @@ # Generated by roxygen2: do not edit by hand export("%>%") -export(codelistDiagnostics) -export(cohortDiagnostics) -export(cohortToPopulationDiagnostics) -export(databaseDiagnostics) +export(phenotype) export(reportDiagnostics) export(shinyDiagnostics) importFrom(magrittr,"%>%") diff --git a/R/codelistDiagnostics.R b/R/codelistDiagnostics.R index 97b480e..97c5863 100644 --- a/R/codelistDiagnostics.R +++ b/R/codelistDiagnostics.R @@ -5,18 +5,7 @@ #' tables as these will be used for deriving concept counts. #' #' @return A summarised result -#' @export -#' -#' @examples -#' \donttest{ -#' library(CohortConstructor) -#' -#' cdm <- mockCohortConstructor(conditionOccurrence = TRUE) -#' -#' cohort <- conceptCohort(cdm = cdm, conceptSet = list(a = 1), name = "cohort") -#' -#' cohort |> attrition() -#' } +#' @noRd codelistDiagnostics <- function(cohort){ cdm <- omopgenerics::cdmReference(cohort) diff --git a/R/cohortDiagnostics.R b/R/cohortDiagnostics.R index dfcf8b4..c87e408 100644 --- a/R/cohortDiagnostics.R +++ b/R/cohortDiagnostics.R @@ -9,9 +9,7 @@ #' large scale characteristics #' #' @return A summarised result -#' @export -#' -#' @examples +#' @noRd cohortDiagnostics <- function(cohort, strata = list(), matchCohort = TRUE, diff --git a/R/cohortToPopulationDiagnostics.R b/R/cohortToPopulationDiagnostics.R index 44ca543..b518b1c 100644 --- a/R/cohortToPopulationDiagnostics.R +++ b/R/cohortToPopulationDiagnostics.R @@ -5,9 +5,7 @@ #' NULL, no sampling will be performed. #' #' @return A summarised result -#' @export -#' -#' @examples +#' @noRd cohortToPopulationDiagnostics <- function(cohort, nSample = 1000){ diff --git a/R/databaseDiagnostics.R b/R/databaseDiagnostics.R index 6cb9b70..9967716 100644 --- a/R/databaseDiagnostics.R +++ b/R/databaseDiagnostics.R @@ -5,9 +5,7 @@ #' @param cdm CDM reference #' #' @return -#' @export -#' -#' @examples +#' @noRd databaseDiagnostics <- function(cdm){ OmopSketch::summariseOmopSnapshot(cdm) diff --git a/R/phenotype.R b/R/phenotype.R new file mode 100644 index 0000000..35eaa8f --- /dev/null +++ b/R/phenotype.R @@ -0,0 +1,32 @@ + +#' Phenotype a cohort +#' +#' @param cohort Cohort +#' @param nSample The number of people to take a random sample for matching to +#' the database population. If NULL, no sampling will be performed and the +#' entire cohorts will be used. +#' +#' @return A summarised result +#' @export +#' +#' @examples +phenotype <- function(cohort, + nSample = 1000){ + + cdm <- omopgenerics::cdmReference(cohort) + + results <- list() + + results[["db_diag"]] <- databaseDiagnostics(cdm) + results[["code_diag"]] <- codelistDiagnostics(cohort) + results[["cohort_diag"]] <- cohortDiagnostics(cohort) + results[["cohort_to_pop_diag"]] <- cohortToPopulationDiagnostics(cohort, + nSample = nSample) + + results <- results |> + vctrs::list_drop_empty() |> + omopgenerics::bind() + + results + +} diff --git a/README.Rmd b/README.Rmd index 39c4278..60155db 100644 --- a/README.Rmd +++ b/README.Rmd @@ -60,26 +60,9 @@ cdm$gibleed <- conceptCohort(cdm = cdm, conceptSet = list(gibleed = 192671L), name = "gibleed") -gibleed_code_diag <- cdm$gibleed |> - codelistDiagnostics() -gibleed_code_diag |> +result <- cdm$gibleed |> + phenotype() +result |> glimpse() ``` -## Cohort diagnostics - -```{r, message=FALSE} -gibleed_cohort_diag <- cdm$gibleed |> - cohortDiagnostics() -gibleed_cohort_diag |> - glimpse() -``` - - -## Combining results -```{r, message=FALSE} -diagnostics <- bind(gibleed_cohort_diag) |> - suppress(minCellCount = 5) -diagnostics |> - glimpse() -``` diff --git a/README.md b/README.md index bceda60..a95508f 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,7 @@ library(CDMConnector) library(phenotypeR) library(CohortConstructor) library(dplyr) +#> Warning: package 'dplyr' was built under R version 4.2.3 con <- DBI::dbConnect(duckdb::duckdb(dbdir = CDMConnector::eunomia_dir())) cdm <- CDMConnector::cdm_from_con(con = con, @@ -56,71 +57,25 @@ cdm$gibleed <- conceptCohort(cdm = cdm, conceptSet = list(gibleed = 192671L), name = "gibleed") -gibleed_code_diag <- cdm$gibleed |> - codelistDiagnostics() -gibleed_code_diag |> +result <- cdm$gibleed |> + phenotype() +#> Warning: The CDM reference containing the cohort must also contain achilles tables. +#> Returning only index event breakdown. +result |> glimpse() -#> Rows: 4 +#> Rows: 3,017 #> Columns: 13 -#> $ result_id 1, 1, 1, 1 +#> $ result_id 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3,… #> $ cdm_name "Synthea synthetic health database", "Synthea synthet… -#> $ group_name "cohort_name &&& codelist_name", "cohort_name &&& cod… -#> $ group_level "gibleed &&& gibleed", "gibleed &&& gibleed", "giblee… -#> $ strata_name "overall", "overall", "overall", "overall" -#> $ strata_level "overall", "overall", "overall", "overall" -#> $ variable_name "overall", "Gastrointestinal hemorrhage", "overall", … -#> $ variable_level NA, "192671", NA, "192671" -#> $ estimate_name "record_count", "record_count", "person_count", "pers… -#> $ estimate_type "integer", "integer", "integer", "integer" -#> $ estimate_value "479", "479", "479", "479" -#> $ additional_name "overall", "source_concept_name &&& source_concept_id… -#> $ additional_level "overall", "Gastrointestinal hemorrhage, unspecified … -``` - -## Cohort diagnostics - -``` r -gibleed_cohort_diag <- cdm$gibleed |> - cohortDiagnostics() -gibleed_cohort_diag |> - glimpse() -#> Rows: 1,814 -#> Columns: 13 -#> $ result_id 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,… -#> $ cdm_name "Synthea synthetic health database", "Synthea synthet… -#> $ group_name "cohort_name", "cohort_name", "cohort_name", "cohort_… -#> $ group_level "gibleed", "gibleed", "gibleed", "gibleed", "gibleed"… -#> $ strata_name "reason", "reason", "reason", "reason", "overall", "o… -#> $ strata_level "Initial qualifying events", "Initial qualifying even… -#> $ variable_name "number_records", "number_subjects", "excluded_record… -#> $ variable_level NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… -#> $ estimate_name "count", "count", "count", "count", "count", "count",… -#> $ estimate_type "integer", "integer", "integer", "integer", "integer"… -#> $ estimate_value "479", "479", "0", "0", "479", "479", "1944-01-20", "… -#> $ additional_name "reason_id", "reason_id", "reason_id", "reason_id", "… -#> $ additional_level "1", "1", "1", "1", "overall", "overall", "overall", … -``` - -## Combining results - -``` r -diagnostics <- bind(gibleed_cohort_diag) |> - suppress(minCellCount = 5) -diagnostics |> - glimpse() -#> Rows: 1,814 -#> Columns: 13 -#> $ result_id 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,… -#> $ cdm_name "Synthea synthetic health database", "Synthea synthet… -#> $ group_name "cohort_name", "cohort_name", "cohort_name", "cohort_… -#> $ group_level "gibleed", "gibleed", "gibleed", "gibleed", "gibleed"… -#> $ strata_name "reason", "reason", "reason", "reason", "overall", "o… -#> $ strata_level "Initial qualifying events", "Initial qualifying even… -#> $ variable_name "number_records", "number_subjects", "excluded_record… +#> $ group_name "overall", "overall", "overall", "overall", "overall"… +#> $ group_level "overall", "overall", "overall", "overall", "overall"… +#> $ strata_name "overall", "overall", "overall", "overall", "overall"… +#> $ strata_level "overall", "overall", "overall", "overall", "overall"… +#> $ variable_name "general", "general", "observation_period", "cdm", "g… #> $ variable_level NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N… -#> $ estimate_name "count", "count", "count", "count", "count", "count",… -#> $ estimate_type "integer", "integer", "integer", "integer", "integer"… -#> $ estimate_value "479", "479", "0", "0", "479", "479", "1944-01-20", "… -#> $ additional_name "reason_id", "reason_id", "reason_id", "reason_id", "… -#> $ additional_level "1", "1", "1", "1", "overall", "overall", "overall", … +#> $ estimate_name "snapshot_date", "person_count", "count", "source_nam… +#> $ estimate_type "date", "integer", "integer", "character", "character… +#> $ estimate_value "2024-09-29", "2694", "5343", "Synthea synthetic heal… +#> $ additional_name "overall", "overall", "overall", "overall", "overall"… +#> $ additional_level "overall", "overall", "overall", "overall", "overall"… ``` diff --git a/man/codelistDiagnostics.Rd b/man/codelistDiagnostics.Rd deleted file mode 100644 index 5e2a515..0000000 --- a/man/codelistDiagnostics.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/codelistDiagnostics.R -\name{codelistDiagnostics} -\alias{codelistDiagnostics} -\title{Run codelist-level diagnostics} -\usage{ -codelistDiagnostics(cohort) -} -\arguments{ -\item{cohort}{A cohort table in a cdm reference. The cohort_codelist -attribute must be populated. The cdm reference must contain achilles -tables as these will be used for deriving concept counts.} -} -\value{ -A summarised result -} -\description{ -Run codelist-level diagnostics -} -\examples{ -\donttest{ -library(CohortConstructor) - -cdm <- mockCohortConstructor(conditionOccurrence = TRUE) - -cohort <- conceptCohort(cdm = cdm, conceptSet = list(a = 1), name = "cohort") - -cohort |> attrition() -} -} diff --git a/man/cohortDiagnostics.Rd b/man/cohortDiagnostics.Rd deleted file mode 100644 index 2454055..0000000 --- a/man/cohortDiagnostics.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cohortDiagnostics.R -\name{cohortDiagnostics} -\alias{cohortDiagnostics} -\title{Run cohort-level diagnostics} -\usage{ -cohortDiagnostics(cohort, strata = list(), matchCohort = TRUE, nSample = NULL) -} -\arguments{ -\item{cohort}{Cohort table in a cdm reference} - -\item{strata}{A list of variables to stratify results. These variables must -have been added as additional columns in the cohort table.} - -\item{matchCohort}{TRUE or FALSE. If TRUE age and sex matched cohorts will -be added for each cohort in the cohort table} - -\item{nSample}{The number of people to take a random sample for running -large scale characteristics} -} -\value{ -A summarised result -} -\description{ -Run cohort-level diagnostics -} diff --git a/man/cohortToPopulationDiagnostics.Rd b/man/cohortToPopulationDiagnostics.Rd deleted file mode 100644 index d58dd55..0000000 --- a/man/cohortToPopulationDiagnostics.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cohortToPopulationDiagnostics.R -\name{cohortToPopulationDiagnostics} -\alias{cohortToPopulationDiagnostics} -\title{Compare characteristics of cohort matched to database population} -\usage{ -cohortToPopulationDiagnostics(cohort, nSample = 1000) -} -\arguments{ -\item{cohort}{Cohort table in a cdm reference} - -\item{nSample}{The number of people to take a random sample for matching. If -NULL, no sampling will be performed.} -} -\value{ -A summarised result -} -\description{ -Compare characteristics of cohort matched to database population -} diff --git a/man/databaseDiagnostics.Rd b/man/databaseDiagnostics.Rd deleted file mode 100644 index 5d16986..0000000 --- a/man/databaseDiagnostics.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/databaseDiagnostics.R -\name{databaseDiagnostics} -\alias{databaseDiagnostics} -\title{Database diagnostics} -\usage{ -databaseDiagnostics(cdm) -} -\arguments{ -\item{cdm}{CDM reference} -} -\description{ -Database diagnostics -} diff --git a/man/phenotype.Rd b/man/phenotype.Rd new file mode 100644 index 0000000..126de58 --- /dev/null +++ b/man/phenotype.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/phenotype.R +\name{phenotype} +\alias{phenotype} +\title{Phenotype a cohort} +\usage{ +phenotype(cohort, nSample = 1000) +} +\arguments{ +\item{cohort}{Cohort} + +\item{nSample}{The number of people to take a random sample for matching to +the database population. If NULL, no sampling will be performed and the +entire cohorts will be used.} +} +\value{ +A summarised result +} +\description{ +Phenotype a cohort +} diff --git a/tests/testthat/test-dbms.R b/tests/testthat/test-dbms.R index b4cc04b..317c143 100644 --- a/tests/testthat/test-dbms.R +++ b/tests/testthat/test-dbms.R @@ -17,16 +17,9 @@ test_that("eunomia", { cdm$meds <- CohortConstructor::conceptCohort(cdm = cdm, conceptSet = meds_cs, name = "meds") - result_code_diag <- codelistDiagnostics(cdm$meds) # only partial results without achilles - result_cohort_diag <- cohortDiagnostics(cdm$meds) - result_cohort_to_pop_diag <- cohortToPopulationDiagnostics(cdm$meds) - results <- omopgenerics::bind(result_code_diag, - result_cohort_diag, - result_cohort_to_pop_diag) - + results <- phenotype(cdm$meds) expect_no_error(shinyDiagnostics(result = results)) - # omopViewer::exportStaticApp(results) - + omopViewer::exportStaticApp(results) }) test_that("postgres test", { @@ -56,24 +49,20 @@ test_that("postgres test", { name = "drugs") cdm <- omopgenerics::bind(cdm$asthma, cdm$drugs, name = "my_cohort") - result_code_diag <- codelistDiagnostics(cdm$my_cohort) - # shiny with only codelist results - expect_no_error(shinyDiagnostics(result_code_diag)) - expect_no_error(CodelistGenerator::tableCohortCodeUse(result_code_diag)) - expect_no_error(CodelistGenerator::tableAchillesCodeUse(result_code_diag)) - expect_no_error(CodelistGenerator::tableOrphanCodes(result_code_diag)) - - result_cohort_diag <- cohortDiagnostics(cdm$my_cohort) - expect_no_error(reportDiagnostics(result = result_code_diag)) - # shiny with only cohort diagnostics results - expect_no_error(shinyDiagnostics(result = result_cohort_diag)) - - result_cohort_pop_diag <- cohortToPopulationDiagnostics(cdm$my_cohort) - expect_no_error(shinyDiagnostics(result = result_cohort_pop_diag)) + results <- phenotype(cdm$my_cohort) + expect_no_error(shinyDiagnostics(result = results)) + expect_no_error(CodelistGenerator::tableCohortCodeUse(results)) + expect_no_error(CodelistGenerator::tableAchillesCodeUse(results)) + expect_no_error(CodelistGenerator::tableOrphanCodes(results)) - # shiny with all results - result_all <- omopgenerics::bind(result_code_diag, result_cohort_diag) - expect_no_error(shinyDiagnostics(result = result_all)) + expect_no_error(CohortCharacteristics::tableCharacteristics(results)) + expect_no_error(CohortCharacteristics::tableCohortAttrition(results)) + expect_no_error(CohortCharacteristics::tableCohortOverlap(results)) + expect_no_error(CohortCharacteristics::tableCohortTiming(results)) + expect_no_error(CohortCharacteristics::tableLargeScaleCharacteristics(results)) + results$variable_name <- CodelistGenerator:::tidyWords(results$variable_name) + # omopViewer::exportStaticApp(results) + expect_no_error(shinyDiagnostics(result = results)) CDMConnector::cdm_disconnect(cdm = cdm) diff --git a/tests/testthat/test-phenotype.R b/tests/testthat/test-phenotype.R new file mode 100644 index 0000000..d2ee2be --- /dev/null +++ b/tests/testthat/test-phenotype.R @@ -0,0 +1,34 @@ +test_that("multiplication works", { + cdm_local <- omock::mockCdmReference() |> + omock::mockPerson(nPerson = 100) |> + omock::mockObservationPeriod() |> + omock::mockConditionOccurrence() |> + omock::mockDrugExposure() |> + omock::mockObservation() |> + omock::mockMeasurement() |> + omock::mockCohort(name = "my_cohort", + numberCohorts = 2) + cdm_local$visit_occurrence <- dplyr::tibble( + person_id = 1L, + visit_occurrence_id = 1L, + visit_concept_id = 1L, + visit_start_date = as.Date("2000-01-01"), + visit_end_date = as.Date("2000-01-01"), + visit_type_concept_id = 1L + ) + cdm_local$procedure_occurrence <- dplyr::tibble( + person_id = 1L, + procedure_occurrence_id = 1L, + procedure_concept_id = 1L, + procedure_date = as.Date("2000-01-01"), + procedure_type_concept_id = 1L + ) + + db <- DBI::dbConnect(duckdb::duckdb()) + cdm <- CDMConnector::copyCdmTo(con = db, cdm = cdm_local, + schema ="main", overwrite = TRUE) + + my_result <- phenotype(cdm$my_cohort) + omopViewer::exportStaticApp(my_result) + + }) diff --git a/vignettes/a02_CodelistDiagnostics.Rmd b/vignettes/a02_CodelistDiagnostics.Rmd index c6412da..4137b98 100644 --- a/vignettes/a02_CodelistDiagnostics.Rmd +++ b/vignettes/a02_CodelistDiagnostics.Rmd @@ -58,6 +58,6 @@ cdm$injuries |> ## Summarising code use ```{r} -code_diag <- codelistDiagnostics(cohort = cdm$injuries) +code_diag <- phenotype(cdm$injuries) tableCohortCodeUse(code_diag) ```