Merge branch 'description' of https://github.com/OHDSI/phenotypeR int…

…o description
OHDSI · Oct 18, 2024 · 068b3c8 · 068b3c8
2 parents 3d74343 + f71771c
commit 068b3c8
Show file tree

Hide file tree

Showing 6 changed files with 85 additions and 84 deletions.
diff --git a/R/codelistDiagnostics.R b/R/codelistDiagnostics.R
@@ -49,7 +49,8 @@ codelistDiagnostics <- function(cohort){
   }
 
   cli::cli_bullets(c("*" = "Getting codelists from cohorts"))
-  # get all cohort codelists
+
+   # get all cohort codelists
   all_codelists <- omopgenerics::emptyCodelist()
   for(i in seq_along(cohortIds)){
     all_codelists <- purrr::flatten(list(

diff --git a/R/cohortDiagnostics.R b/R/cohortDiagnostics.R
@@ -1,8 +1,6 @@
 #' Run cohort-level diagnostics
 #'
 #' @inheritParams cohortDoc
-#' @param strata A list of variables to stratify results. These variables must
-#' have been added as additional columns in the cohort table.
 #'
 #' @return A summarised result
 #' @export
@@ -41,21 +39,28 @@
 #'  cdm$my_cohort |> cohortDiagnostics()
 #'  CDMConnector::cdmDisconnect(cdm = cdm)
 #' }
-cohortDiagnostics <- function(cohort,
-                              strata = list()){
+
+cohortDiagnostics <- function(cohort){
 
   cdm <- omopgenerics::cdmReference(cohort)
   cohortName <- omopgenerics::tableName(cohort)
   cohortIds <- omopgenerics::settings(cohort) |>
     dplyr::select("cohort_definition_id") |>
     dplyr::pull()
 
+  prefix <- omopgenerics::tmpPrefix()
+  tempCohortName  <- paste0(prefix, cohortName)
   results <- list()
 
+  cdm[[tempCohortName]]  <- cdm[[cohortName]] |>
+    PatientProfiles::addAge(ageGroup = list(c(0, 17), c(18, 64), c(65, 150))) |>
+    PatientProfiles::addSex() |>
+    CDMConnector::compute(name = tempCohortName, temporary = FALSE)
+
   cli::cli_bullets(c("*" = "Getting cohort summary"))
-  results[["cohort_summary"]] <- cdm[[cohortName]] |>
+  results[["cohort_summary"]] <- cdm[[tempCohortName]] |>
     CohortCharacteristics::summariseCharacteristics(
-      strata = strata,
+      strata = list("age_group", "sex"),
       tableIntersectCount = list(
         "Number visits prior year" = list(
           tableName = "visit_occurrence",
@@ -64,19 +69,29 @@ cohortDiagnostics <- function(cohort,
       )
     )
 
+  cli::cli_bullets(c("*" = "Getting age density"))
+  results[["cohort_density"]] <- cdm[[tempCohortName]] |>
+    PatientProfiles::summariseResult(
+      strata    = "sex",
+      includeOverallStrata = FALSE,
+      variables = "age",
+      estimates = "density") |>
+    suppressMessages()
+
+  omopgenerics::dropTable(cdm, dplyr::starts_with(prefix))
+
   cli::cli_bullets(c("*" = "Getting cohort attrition"))
   results[["cohort_attrition"]] <- cdm[[cohortName]] |>
     CohortCharacteristics::summariseCohortAttrition()
 
   if(length(cohortIds) > 1){
     cli::cli_bullets(c("*" = "Getting cohort overlap"))
     results[["cohort_overlap"]] <-  cdm[[cohortName]] |>
-      CohortCharacteristics::summariseCohortOverlap(strata = strata)
+      CohortCharacteristics::summariseCohortOverlap()
 
     cli::cli_bullets(c("*" = "Getting cohort timing"))
     results[["cohort_timing"]] <- cdm[[cohortName]] |>
-      CohortCharacteristics::summariseCohortTiming(strata = strata,
-                                                   density = TRUE)
+      CohortCharacteristics::summariseCohortTiming(estimates = "density")
     }
 
   results <- results |>

diff --git a/man/cohortDiagnostics.Rd b/man/cohortDiagnostics.Rd
diff --git a/tests/testthat/test-cohortDiagnostics.R b/tests/testthat/test-cohortDiagnostics.R
@@ -26,11 +26,18 @@ test_that("run with a single cohort", {
   db <- DBI::dbConnect(duckdb::duckdb())
   cdm <- CDMConnector::copyCdmTo(con = db, cdm = cdm_local,
                           schema ="main", overwrite = TRUE)
+
   expect_no_error(result <- cdm$my_cohort |>
     cohortDiagnostics())
 
+  # check density is being calculated
+  expect_true(any(stringr::str_detect(
+    omopgenerics::settings(result) |>
+      dplyr::pull("result_type"),
+    "table")))
+
   # cohort and timing and overlap should have been skipped
-  expect_false(any("cohort_overlap" ==
+  expect_false(any("summarise_cohort_overlap" ==
    omopgenerics::settings(result) |>
     dplyr::pull("result_type")))
 
@@ -66,6 +73,12 @@ test_that("run with multiple cohorts", {
   expect_no_error(result <- cdm$my_cohort |>
                     cohortDiagnostics())
 
+  # check density is being calculated
+  expect_true(any(stringr::str_detect(
+    omopgenerics::settings(result) |>
+      dplyr::pull("result_type"),
+    "table")))
+
   # cohort and timing and overlap should have been estimated now we have more than one cohort
   expect_true(any(stringr::str_detect(
     omopgenerics::settings(result) |>

diff --git a/tests/testthat/test-phenotypeDiagnostics.R b/tests/testthat/test-phenotypeDiagnostics.R
@@ -29,65 +29,66 @@ test_that("overall diagnostics function", {
                                  schema ="main", overwrite = TRUE)
 
   # running diagnostics should leave the original cohort unchanged
- cohort_pre <- cdm$my_cohort |>
+  cohort_pre <- cdm$my_cohort |>
     dplyr::collect()
- expect_no_error(my_result <- phenotypeDiagnostics(cdm$my_cohort))
- cohort_post <- cdm$my_cohort |>
-   dplyr::collect()
- expect_identical(cohort_pre,
-                  cohort_post)
+  expect_no_error(my_result <- phenotypeDiagnostics(cdm$my_cohort))
+  cohort_post <- cdm$my_cohort |>
+    dplyr::collect()
+  expect_identical(cohort_pre,
+                   cohort_post)
 
   expect_identical(phenotypeDiagnostics(cdm$my_cohort,
-            databaseDiagnostics = FALSE,
-            codelistDiagnostics = FALSE,
-            cohortDiagnostics = FALSE,
-            matchedDiagnostics = FALSE,
-            populationDiagnostics = FALSE),
-  omopgenerics::emptySummarisedResult())
+                                        databaseDiagnostics = FALSE,
+                                        codelistDiagnostics = FALSE,
+                                        cohortDiagnostics = FALSE,
+                                        matchedDiagnostics = FALSE,
+                                        populationDiagnostics = FALSE),
+                   omopgenerics::emptySummarisedResult())
 
   dd_only <- phenotypeDiagnostics(cdm$my_cohort,
-            databaseDiagnostics = TRUE,
-            codelistDiagnostics = FALSE,
-            cohortDiagnostics = FALSE,
-            matchedDiagnostics = FALSE,
-            populationDiagnostics = FALSE)
+                                  databaseDiagnostics = TRUE,
+                                  codelistDiagnostics = FALSE,
+                                  cohortDiagnostics = FALSE,
+                                  matchedDiagnostics = FALSE,
+                                  populationDiagnostics = FALSE)
   expect_true("summarise_omop_snapshot" %in%
                 (settings(dd_only) |> dplyr::pull("result_type")))
   expect_true("summarise_observation_period" %in%
                 (settings(dd_only) |> dplyr::pull("result_type")))
 
   # codelist diag will be empty currently
   code_diag_only <- phenotypeDiagnostics(cdm$my_cohort,
-            databaseDiagnostics = FALSE,
-            codelistDiagnostics = TRUE,
-            cohortDiagnostics = FALSE,
-            matchedDiagnostics = FALSE,
-            populationDiagnostics = FALSE)
+                                         databaseDiagnostics = FALSE,
+                                         codelistDiagnostics = TRUE,
+                                         cohortDiagnostics = FALSE,
+                                         matchedDiagnostics = FALSE,
+                                         populationDiagnostics = FALSE)
 
   cohort_diag_only <-  phenotypeDiagnostics(cdm$my_cohort,
-            databaseDiagnostics = FALSE,
-            codelistDiagnostics = FALSE,
-            cohortDiagnostics = TRUE,
-            matchedDiagnostics = FALSE,
-            populationDiagnostics = FALSE)
+                                            databaseDiagnostics = FALSE,
+                                            codelistDiagnostics = FALSE,
+                                            cohortDiagnostics = TRUE,
+                                            matchedDiagnostics = FALSE,
+                                            populationDiagnostics = FALSE)
   expect_true(
-   all(c("summarise_characteristics", "summarise_cohort_attrition",
-      "summarise_cohort_attrition",
-      "summarise_cohort_overlap", "summarise_cohort_timing") %in%
-    (settings(cohort_diag_only) |>
-                dplyr::pull("result_type"))))
+    all(c("summarise_characteristics", "summarise_table",
+          "summarise_cohort_attrition",
+          "summarise_cohort_attrition",
+          "summarise_cohort_overlap", "summarise_cohort_timing") %in%
+          (settings(cohort_diag_only) |>
+             dplyr::pull("result_type"))))
 
   cohort_pop_diag_only <-  phenotypeDiagnostics(cdm$my_cohort,
-            databaseDiagnostics = FALSE,
-            codelistDiagnostics = FALSE,
-            cohortDiagnostics = FALSE,
-            matchedDiagnostics = TRUE,
-            populationDiagnostics = FALSE)
+                                                databaseDiagnostics = FALSE,
+                                                codelistDiagnostics = FALSE,
+                                                cohortDiagnostics = FALSE,
+                                                matchedDiagnostics = TRUE,
+                                                populationDiagnostics = FALSE)
   expect_true(
     all(c("summarise_characteristics",
           "summarise_large_scale_characteristics") %in%
           unique(settings(cohort_pop_diag_only) |>
-             dplyr::pull("result_type"))))
+                   dplyr::pull("result_type"))))
 
 
-  })
+})
diff --git a/vignettes/a03_CohortDiagnostics.Rmd b/vignettes/a03_CohortDiagnostics.Rmd
@@ -73,38 +73,12 @@ Our results will include a summary of the overlap between our cohorts. We could
 plotCohortOverlap(cohort_diag, uniqueCombinations = TRUE)
 ```
 
-Moreover, our results will also include a summary of the characteristics of each cohort
+Moreover, our results will also include a summary of the characteristics of each cohort, stratified by age group and sex.
 ```{r}
-tableCharacteristics(cohort_diag)
-```
-
-
-## Cohort diagnostics - stratified by age and sex
-
-Say we wanted the same analyses to be performed, but stratified by age and sex. We can do this by first adding these characteristics to our cohort table.
-
-```{r}
-cdm$injuries <- cdm$injuries |> 
-  addDemographics(ageGroup = list(c(0, 17),
-                                  c(18, 65),
-                                  c(66, Inf))) |> 
-  compute(name = "injuries") |> 
-  omopgenerics::newCohortTable()
-
-cdm$injuries |> 
-  dplyr::glimpse()
-```
-
-Now we can include these additional variables for stratification when running our analyses.
-```{r}
-cohort_diag <- cohortDiagnostics(cdm$injuries, strata = c("sex", "age_group"))
-```
-
-Again we can visualise our results, but this time we have both stratified and overall results.
-```{r}
-plotCohortOverlap(cohort_diag, uniqueCombinations = TRUE, facet = c("age_group", "sex"))
+tableCharacteristics(cohort_diag, groupColumn = c("age_group", "sex"))
 ```
 
+You can also visualise the age distribution:
 ```{r}
-tableCharacteristics(cohort_diag)
+tableCharacteristics(cohort_diag, groupColumn = c("age_group", "sex"))
 ```