Skip to content

Commit

Permalink
Merge branch 'description' of https://github.com/OHDSI/phenotypeR int…
Browse files Browse the repository at this point in the history
…o description
  • Loading branch information
xihang-chen committed Oct 18, 2024
2 parents 3d74343 + f71771c commit 068b3c8
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 84 deletions.
3 changes: 2 additions & 1 deletion R/codelistDiagnostics.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ codelistDiagnostics <- function(cohort){
}

cli::cli_bullets(c("*" = "Getting codelists from cohorts"))
# get all cohort codelists

# get all cohort codelists
all_codelists <- omopgenerics::emptyCodelist()
for(i in seq_along(cohortIds)){
all_codelists <- purrr::flatten(list(
Expand Down
33 changes: 24 additions & 9 deletions R/cohortDiagnostics.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#' Run cohort-level diagnostics
#'
#' @inheritParams cohortDoc
#' @param strata A list of variables to stratify results. These variables must
#' have been added as additional columns in the cohort table.
#'
#' @return A summarised result
#' @export
Expand Down Expand Up @@ -41,21 +39,28 @@
#' cdm$my_cohort |> cohortDiagnostics()
#' CDMConnector::cdmDisconnect(cdm = cdm)
#' }
cohortDiagnostics <- function(cohort,
strata = list()){

cohortDiagnostics <- function(cohort){

cdm <- omopgenerics::cdmReference(cohort)
cohortName <- omopgenerics::tableName(cohort)
cohortIds <- omopgenerics::settings(cohort) |>
dplyr::select("cohort_definition_id") |>
dplyr::pull()

prefix <- omopgenerics::tmpPrefix()
tempCohortName <- paste0(prefix, cohortName)
results <- list()

cdm[[tempCohortName]] <- cdm[[cohortName]] |>
PatientProfiles::addAge(ageGroup = list(c(0, 17), c(18, 64), c(65, 150))) |>
PatientProfiles::addSex() |>
CDMConnector::compute(name = tempCohortName, temporary = FALSE)

cli::cli_bullets(c("*" = "Getting cohort summary"))
results[["cohort_summary"]] <- cdm[[cohortName]] |>
results[["cohort_summary"]] <- cdm[[tempCohortName]] |>
CohortCharacteristics::summariseCharacteristics(
strata = strata,
strata = list("age_group", "sex"),
tableIntersectCount = list(
"Number visits prior year" = list(
tableName = "visit_occurrence",
Expand All @@ -64,19 +69,29 @@ cohortDiagnostics <- function(cohort,
)
)

cli::cli_bullets(c("*" = "Getting age density"))
results[["cohort_density"]] <- cdm[[tempCohortName]] |>
PatientProfiles::summariseResult(
strata = "sex",
includeOverallStrata = FALSE,
variables = "age",
estimates = "density") |>
suppressMessages()

omopgenerics::dropTable(cdm, dplyr::starts_with(prefix))

cli::cli_bullets(c("*" = "Getting cohort attrition"))
results[["cohort_attrition"]] <- cdm[[cohortName]] |>
CohortCharacteristics::summariseCohortAttrition()

if(length(cohortIds) > 1){
cli::cli_bullets(c("*" = "Getting cohort overlap"))
results[["cohort_overlap"]] <- cdm[[cohortName]] |>
CohortCharacteristics::summariseCohortOverlap(strata = strata)
CohortCharacteristics::summariseCohortOverlap()

cli::cli_bullets(c("*" = "Getting cohort timing"))
results[["cohort_timing"]] <- cdm[[cohortName]] |>
CohortCharacteristics::summariseCohortTiming(strata = strata,
density = TRUE)
CohortCharacteristics::summariseCohortTiming(estimates = "density")
}

results <- results |>
Expand Down
5 changes: 1 addition & 4 deletions man/cohortDiagnostics.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 14 additions & 1 deletion tests/testthat/test-cohortDiagnostics.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,18 @@ test_that("run with a single cohort", {
db <- DBI::dbConnect(duckdb::duckdb())
cdm <- CDMConnector::copyCdmTo(con = db, cdm = cdm_local,
schema ="main", overwrite = TRUE)

expect_no_error(result <- cdm$my_cohort |>
cohortDiagnostics())

# check density is being calculated
expect_true(any(stringr::str_detect(
omopgenerics::settings(result) |>
dplyr::pull("result_type"),
"table")))

# cohort and timing and overlap should have been skipped
expect_false(any("cohort_overlap" ==
expect_false(any("summarise_cohort_overlap" ==
omopgenerics::settings(result) |>
dplyr::pull("result_type")))

Expand Down Expand Up @@ -66,6 +73,12 @@ test_that("run with multiple cohorts", {
expect_no_error(result <- cdm$my_cohort |>
cohortDiagnostics())

# check density is being calculated
expect_true(any(stringr::str_detect(
omopgenerics::settings(result) |>
dplyr::pull("result_type"),
"table")))

# cohort and timing and overlap should have been estimated now we have more than one cohort
expect_true(any(stringr::str_detect(
omopgenerics::settings(result) |>
Expand Down
79 changes: 40 additions & 39 deletions tests/testthat/test-phenotypeDiagnostics.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,65 +29,66 @@ test_that("overall diagnostics function", {
schema ="main", overwrite = TRUE)

# running diagnostics should leave the original cohort unchanged
cohort_pre <- cdm$my_cohort |>
cohort_pre <- cdm$my_cohort |>
dplyr::collect()
expect_no_error(my_result <- phenotypeDiagnostics(cdm$my_cohort))
cohort_post <- cdm$my_cohort |>
dplyr::collect()
expect_identical(cohort_pre,
cohort_post)
expect_no_error(my_result <- phenotypeDiagnostics(cdm$my_cohort))
cohort_post <- cdm$my_cohort |>
dplyr::collect()
expect_identical(cohort_pre,
cohort_post)

expect_identical(phenotypeDiagnostics(cdm$my_cohort,
databaseDiagnostics = FALSE,
codelistDiagnostics = FALSE,
cohortDiagnostics = FALSE,
matchedDiagnostics = FALSE,
populationDiagnostics = FALSE),
omopgenerics::emptySummarisedResult())
databaseDiagnostics = FALSE,
codelistDiagnostics = FALSE,
cohortDiagnostics = FALSE,
matchedDiagnostics = FALSE,
populationDiagnostics = FALSE),
omopgenerics::emptySummarisedResult())

dd_only <- phenotypeDiagnostics(cdm$my_cohort,
databaseDiagnostics = TRUE,
codelistDiagnostics = FALSE,
cohortDiagnostics = FALSE,
matchedDiagnostics = FALSE,
populationDiagnostics = FALSE)
databaseDiagnostics = TRUE,
codelistDiagnostics = FALSE,
cohortDiagnostics = FALSE,
matchedDiagnostics = FALSE,
populationDiagnostics = FALSE)
expect_true("summarise_omop_snapshot" %in%
(settings(dd_only) |> dplyr::pull("result_type")))
expect_true("summarise_observation_period" %in%
(settings(dd_only) |> dplyr::pull("result_type")))

# codelist diag will be empty currently
code_diag_only <- phenotypeDiagnostics(cdm$my_cohort,
databaseDiagnostics = FALSE,
codelistDiagnostics = TRUE,
cohortDiagnostics = FALSE,
matchedDiagnostics = FALSE,
populationDiagnostics = FALSE)
databaseDiagnostics = FALSE,
codelistDiagnostics = TRUE,
cohortDiagnostics = FALSE,
matchedDiagnostics = FALSE,
populationDiagnostics = FALSE)

cohort_diag_only <- phenotypeDiagnostics(cdm$my_cohort,
databaseDiagnostics = FALSE,
codelistDiagnostics = FALSE,
cohortDiagnostics = TRUE,
matchedDiagnostics = FALSE,
populationDiagnostics = FALSE)
databaseDiagnostics = FALSE,
codelistDiagnostics = FALSE,
cohortDiagnostics = TRUE,
matchedDiagnostics = FALSE,
populationDiagnostics = FALSE)
expect_true(
all(c("summarise_characteristics", "summarise_cohort_attrition",
"summarise_cohort_attrition",
"summarise_cohort_overlap", "summarise_cohort_timing") %in%
(settings(cohort_diag_only) |>
dplyr::pull("result_type"))))
all(c("summarise_characteristics", "summarise_table",
"summarise_cohort_attrition",
"summarise_cohort_attrition",
"summarise_cohort_overlap", "summarise_cohort_timing") %in%
(settings(cohort_diag_only) |>
dplyr::pull("result_type"))))

cohort_pop_diag_only <- phenotypeDiagnostics(cdm$my_cohort,
databaseDiagnostics = FALSE,
codelistDiagnostics = FALSE,
cohortDiagnostics = FALSE,
matchedDiagnostics = TRUE,
populationDiagnostics = FALSE)
databaseDiagnostics = FALSE,
codelistDiagnostics = FALSE,
cohortDiagnostics = FALSE,
matchedDiagnostics = TRUE,
populationDiagnostics = FALSE)
expect_true(
all(c("summarise_characteristics",
"summarise_large_scale_characteristics") %in%
unique(settings(cohort_pop_diag_only) |>
dplyr::pull("result_type"))))
dplyr::pull("result_type"))))


})
})
34 changes: 4 additions & 30 deletions vignettes/a03_CohortDiagnostics.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -73,38 +73,12 @@ Our results will include a summary of the overlap between our cohorts. We could
plotCohortOverlap(cohort_diag, uniqueCombinations = TRUE)
```

Moreover, our results will also include a summary of the characteristics of each cohort
Moreover, our results will also include a summary of the characteristics of each cohort, stratified by age group and sex.
```{r}
tableCharacteristics(cohort_diag)
```


## Cohort diagnostics - stratified by age and sex

Say we wanted the same analyses to be performed, but stratified by age and sex. We can do this by first adding these characteristics to our cohort table.

```{r}
cdm$injuries <- cdm$injuries |>
addDemographics(ageGroup = list(c(0, 17),
c(18, 65),
c(66, Inf))) |>
compute(name = "injuries") |>
omopgenerics::newCohortTable()
cdm$injuries |>
dplyr::glimpse()
```

Now we can include these additional variables for stratification when running our analyses.
```{r}
cohort_diag <- cohortDiagnostics(cdm$injuries, strata = c("sex", "age_group"))
```

Again we can visualise our results, but this time we have both stratified and overall results.
```{r}
plotCohortOverlap(cohort_diag, uniqueCombinations = TRUE, facet = c("age_group", "sex"))
tableCharacteristics(cohort_diag, groupColumn = c("age_group", "sex"))
```

You can also visualise the age distribution:
```{r}
tableCharacteristics(cohort_diag)
tableCharacteristics(cohort_diag, groupColumn = c("age_group", "sex"))
```

0 comments on commit 068b3c8

Please sign in to comment.