Skip to content

Commit

Permalink
Merge pull request #16 from KWB-R/clean
Browse files Browse the repository at this point in the history
Clean
  • Loading branch information
hsonne authored Sep 27, 2023
2 parents c0eb00a + d8539c3 commit 35ee852
Show file tree
Hide file tree
Showing 22 changed files with 307 additions and 216 deletions.
11 changes: 6 additions & 5 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,25 @@ jobs:
fail-fast: false
matrix:
config:
- {os: macOS-latest, r: 'release'}
- {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
- {os: windows-latest, r: 'devel'}
- {os: macOS-latest, r: 'release'}
- {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
- {os: windows-latest, r: 'devel'}
- {os: windows-latest, r: 'oldrel'}
- {os: windows-latest, r: 'release'}

env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
RSPM: ${{ matrix.config.rspm }}

steps:
- uses: actions/checkout@v2

- uses: r-lib/actions/setup-r@master
- uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.config.r }}

- uses: r-lib/actions/setup-pandoc@master
- uses: r-lib/actions/setup-pandoc@v2

- name: Query dependencies
run: |
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pkgdown.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ jobs:
steps:
- uses: actions/checkout@v2

- uses: r-lib/actions/setup-r@master
- uses: r-lib/actions/setup-r@v2

- uses: r-lib/actions/setup-pandoc@master
- uses: r-lib/actions/setup-pandoc@v2

- name: Query dependencies
run: |
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/pr-commands.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v2
- uses: r-lib/actions/pr-fetch@master
- uses: r-lib/actions/pr-fetch@v2
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: r-lib/actions/setup-r@master
- uses: r-lib/actions/setup-r@v2
- name: Install dependencies
run: Rscript -e 'install.packages(c("remotes", "roxygen2"))' -e 'remotes::install_deps(dependencies = TRUE)'
- name: Document
Expand All @@ -23,7 +23,7 @@ jobs:
run: |
git add man/\* NAMESPACE
git commit -m 'Document'
- uses: r-lib/actions/pr-push@master
- uses: r-lib/actions/pr-push@v2
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
style:
Expand All @@ -34,10 +34,10 @@ jobs:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v2
- uses: r-lib/actions/pr-fetch@master
- uses: r-lib/actions/pr-fetch@v2
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: r-lib/actions/setup-r@master
- uses: r-lib/actions/setup-r@v2
- name: Install dependencies
run: Rscript -e 'install.packages("styler")'
- name: Style
Expand All @@ -46,6 +46,6 @@ jobs:
run: |
git add \*.R
git commit -m 'Style'
- uses: r-lib/actions/pr-push@master
- uses: r-lib/actions/pr-push@v2
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
4 changes: 2 additions & 2 deletions .github/workflows/test-coverage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ jobs:
steps:
- uses: actions/checkout@v2

- uses: r-lib/actions/setup-r@master
- uses: r-lib/actions/setup-r@v2

- uses: r-lib/actions/setup-pandoc@master
- uses: r-lib/actions/setup-pandoc@v2

- name: Query dependencies
run: |
Expand Down
8 changes: 4 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -29,25 +29,25 @@ Imports:
ggplot2,
kwb.file,
kwb.read,
kwb.utils,
kwb.utils (>= 0.13.0),
magrittr,
openxlsx,
rlang,
stringr,
tibble,
tidyr,
wordcloud2,
xml2,
plotly
xml2
Suggests:
covr,
knitr,
plotly,
rmarkdown
Remotes:
github::kwb-r/kwb.file,
github::kwb-r/kwb.read,
github::kwb-r/kwb.utils
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.1.1
RoxygenNote: 7.2.3
VignetteBuilder: knitr
16 changes: 9 additions & 7 deletions R/check_for_differences.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@
#' head(diffs_df)
#'
#' }
check_for_differences <- function(
df_x, df_y, dbg = TRUE) {
check_for_differences <- function(df_x, df_y, dbg = TRUE)
{
name_df_x <- deparse(substitute(df_x))
name_df_y <- deparse(substitute(df_y))

Expand All @@ -49,9 +49,9 @@ check_for_differences <- function(
tidy_name <- function(name) paste0(name, "_tidy")

get_text <- function(name_df, name_value) sprintf(
"Tidying data.frame '%s' and rename 'value' to '%s'. Saving to %s",
name_df, name_value, tidy_name(name_df)
)
"Tidying data.frame '%s' and rename 'value' to '%s'. Saving to %s",
name_df, name_value, tidy_name(name_df)
)

df_x_tidy <- kwb.utils::catAndRun(
messageText = get_text(name_df_x, name_value_x), dbg = dbg,
Expand Down Expand Up @@ -84,10 +84,12 @@ check_for_differences <- function(

diffs_idx <- kwb.utils::catAndRun(
messageText = messageText, dbg = dbg,
expr = which(!sapply(seq_len(nrow(df_xy_tidy)), function(row) identical(
expr = which(
!sapply(seq_len(nrow(df_xy_tidy)), function(row) identical(
df_xy_tidy[[name_value_x]][row],
df_xy_tidy[[name_value_y]][row]
)))
))
)
)

kwb.utils::catIf(dbg, sprintf(
Expand Down
22 changes: 10 additions & 12 deletions R/check_problematic_entries.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,21 @@
#' head(problematic_entries)
#' }
check_problematic_entries <- function(
endnote_list, give_hints = TRUE, dbg = TRUE) {
endnote_list,
give_hints = TRUE,
dbg = TRUE
)
{
name <- deparse(substitute(endnote_list))

entries_org <- kwb.utils::catAndRun(
sprintf(
"Creating data frame from '%s'",
deparse(substitute(endnote_list))
),
sprintf("Creating data frame from '%s'", name),
dbg = dbg,
expr = create_references_df(endnote_list)
)

entries_cleaned <- kwb.utils::catAndRun(
sprintf(
"Creating 'cleaned' data frame from '%s' for comparison",
deparse(substitute(endnote_list))
),
sprintf("Creating 'cleaned' data frame from '%s' for comparison", name),
dbg = dbg,
expr = clean_references_df(endnote_list, give_hints, dbg)
)
Expand All @@ -36,9 +36,7 @@ check_problematic_entries <- function(
identical(entries_org[[col_name]], entries_cleaned[[col_name]])
})

cols_with_problems <- names(which(has_problems))

check_list <- lapply(cols_with_problems, function(column) {
check_list <- lapply(names(which(has_problems)), function(column) {
indices <- which(!sapply(seq_len(nrow(entries_org)), function(i) {
identical(entries_org[[column]][i], entries_cleaned[[column]][i])
}))
Expand Down
18 changes: 10 additions & 8 deletions R/clean_references_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
#' @export
#' @importFrom stringr str_remove_all regex str_trim
#' @importFrom kwb.utils catAndRun
clean_dois <- function(dois, dbg = TRUE) {
clean_dois <- function(dois, dbg = TRUE)
{
remove <- stringr::str_remove_all

kwb.utils::catAndRun("Clean 'DOI'", dbg = dbg, expr = {
Expand All @@ -28,8 +29,8 @@ clean_dois <- function(dois, dbg = TRUE) {
#' @export
#' @importFrom stringr str_remove_all str_replace_all regex str_trim
#' @importFrom kwb.utils catAndRun
clean_project_names <- function(
project_names, give_hints = FALSE, dbg = TRUE) {
clean_project_names <- function(project_names, give_hints = FALSE, dbg = TRUE)
{
kwb.utils::catAndRun("Clean 'Project Names'", dbg = dbg, expr = {
project_names <- project_names %>%
stringr::str_replace_all("\\s+?/", ",") %>%
Expand Down Expand Up @@ -68,8 +69,8 @@ if (FALSE) {
#' @return vector with cleaned author names
#' @export
#' @importFrom kwb.utils catAndRun
clean_author_names <- function(
author_names, give_hints = FALSE, dbg = TRUE) {
clean_author_names <- function(author_names, give_hints = FALSE, dbg = TRUE)
{
kwb.utils::catAndRun(
"No cleaning of author_names implemented yet. Only hints are generated in
case that user defines 'give_hints = TRUE' (default: FALSE)",
Expand All @@ -94,8 +95,8 @@ clean_author_names <- function(
#' @return vector with cleaned accessibility information
#' @export
#' @importFrom stringr str_remove_all str_replace_all regex str_trim
clean_accessibility <- function(
access, give_hints = FALSE, dbg = TRUE) {
clean_accessibility <- function(access, give_hints = FALSE, dbg = TRUE)
{
replace_all <- function(string, pattern, replacement) {
stringr::str_replace_all(
string = string,
Expand Down Expand Up @@ -133,7 +134,8 @@ clean_accessibility <- function(
#' refs_clean_df <- clean_references_df(endnote_list)
#' head(refs_clean_df)
#' }
clean_references_df <- function(endnote_list, give_hints = FALSE, dbg = TRUE) {
clean_references_df <- function(endnote_list, give_hints = FALSE, dbg = TRUE)
{
refs_df <- create_references_df(endnote_list, collapse = TRUE)

refs_df <- kwb.utils::catAndRun(
Expand Down
18 changes: 12 additions & 6 deletions R/create_df_from_endnote_xml.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,29 @@
#' references_df <- create_df_from_endnote_xml()
#' head(references_df)
#'
create_df_from_endnote_xml <- function(endnote_xml = default_xml()) {
create_df_from_endnote_xml <- function(endnote_xml = default_xml())
{
references <- kwb.read::read_xml_as_path_value(endnote_xml)

xml_paths <- references$path %>%
stringr::str_remove_all(pattern = "^/xml/records/record")

references_df <- kwb.utils::asNoFactorDataFrame(cbind(
kwb.file::to_subdir_matrix(xml_paths),
references[, -1]
references[, -1L]
))

references_df[, 1] <- as.numeric(stringr::str_remove_all(
references_df[, 1], "\\[|\\]"
references_df[, 1L] <- as.numeric(stringr::str_remove_all(
references_df[, 1L], "\\[|\\]"
))

n_col <- ncol(references_df)

colnames(references_df) <- c("record_id", paste0("key", 1:(n_col - 2)), "value")
colnames(references_df) <- c(
"record_id",
paste0("key", seq_len(n_col - 2L)),
"value"
)

endnote_df <- dplyr::left_join(
references_df,
Expand All @@ -41,7 +46,8 @@ create_df_from_endnote_xml <- function(endnote_xml = default_xml()) {
add_file_info_attributes(endnote_df, endnote_xml)
}

if (FALSE) {
if (FALSE)
{
abstracts <- references_df %>%
dplyr::filter(.data$key1 == "abstract") %>%
dplyr::group_by(
Expand Down
4 changes: 2 additions & 2 deletions R/create_endnote_list.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
#' str(endnote_list[1]$record)
#' attr(endnote_list, "xml_file_info")
#' attr(endnote_list, "xml_filename_without_extension")
create_endnote_list <- function(endnote_xml = default_xml()) {
create_endnote_list <- function(endnote_xml = default_xml())
{
endnote_list <- xml2::as_list(xml2::read_xml(endnote_xml))$xml$records


add_file_info_attributes(endnote_list, endnote_xml)
}
3 changes: 2 additions & 1 deletion R/create_list_by_pubtype_from_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
#' refs_list_by_pubtype <- create_list_by_pubtype_from_df(refs_df)
#' str(refs_list_by_pubtype, 1)
#' }
create_list_by_pubtype_from_df <- function(refs_df) {
create_list_by_pubtype_from_df <- function(refs_df)
{
refs_df <- refs_df %>%
dplyr::arrange(dplyr::desc(.data$rec_number))

Expand Down
3 changes: 2 additions & 1 deletion R/create_list_with_unique_entries.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
#' unique_entries_list <- create_list_with_unique_entries(refs_df)
#' str(unique_entries_list, 1)
#' }
create_list_with_unique_entries <- function(refs_df) {
create_list_with_unique_entries <- function(refs_df)
{
select_columns <- function(pattern) {
columns <- unique(stringr::str_extract(names(refs_df), pattern))
columns[!is.na(columns)]
Expand Down
Loading

0 comments on commit 35ee852

Please sign in to comment.