Perform transformations on several variables with dplyr::across() and dplyr::pick()

dplyr
across
pick
Author
Affiliations

Layal Christine Lettry

cynkra GmbH

University of Fribourg, Dept. of Informatics, ASAM Group

Published

October 13, 2023

Transforming many variables quickly and easily with just one line of code

Convert character variables into numeric

Suppose you have a data frame with several character variables which you want to transform into numeric ones. You an use the function across().

library(dplyr)
chr_data <- tibble::tribble(
  ~year, ~country,       ~sex,    ~age, ~is_married, ~has_child,
  "1990", "Italy",       "Man",   "28", "No",        "Yes",
  "1990", "France",      "Woman", "68", "Yes",       "No",
  "1990", "France",      "Woman", "22", "No",        "No",
  "1990", "Italy",       "Man",   "56", "Yes",       "Yes",
  "2000", "Switzerland", "Woman", "42", "Yes",       "Yes",
  "2000", "France",      "Woman", "13", "No",        "No",
  "2000", "Italy",       "Man",   "43", "Yes",       "No",
  "1990", "Switzerland", "Woman", "23", "No",        "Yes",
  "1990", "Italy",       "Man",   "36", "Yes",       "Yes",
  "2000", "Switzerland", "Woman", "32", "Yes",       "No",
  "1990", "Switzerland", "Man",   "23", "No",        "No",
  "2000", "France",      "Man",   "63", "Yes",       "Yes"
)

glimpse(chr_data)
Rows: 12
Columns: 6
$ year       <chr> "1990", "1990", "1990", "1990", "2000", "2000", "2000", "19…
$ country    <chr> "Italy", "France", "France", "Italy", "Switzerland", "Franc…
$ sex        <chr> "Man", "Woman", "Woman", "Man", "Woman", "Woman", "Man", "W…
$ age        <chr> "28", "68", "22", "56", "42", "13", "43", "23", "36", "32",…
$ is_married <chr> "No", "Yes", "No", "Yes", "Yes", "No", "Yes", "No", "Yes", …
$ has_child  <chr> "Yes", "No", "No", "Yes", "Yes", "No", "No", "Yes", "Yes", …
# Convert year and age into 
num_data <- 
  chr_data |> 
  mutate(across(c(year, age), as.numeric))

glimpse(num_data)
Rows: 12
Columns: 6
$ year       <dbl> 1990, 1990, 1990, 1990, 2000, 2000, 2000, 1990, 1990, 2000,…
$ country    <chr> "Italy", "France", "France", "Italy", "Switzerland", "Franc…
$ sex        <chr> "Man", "Woman", "Woman", "Man", "Woman", "Woman", "Man", "W…
$ age        <dbl> 28, 68, 22, 56, 42, 13, 43, 23, 36, 32, 23, 63
$ is_married <chr> "No", "Yes", "No", "Yes", "Yes", "No", "Yes", "No", "Yes", …
$ has_child  <chr> "Yes", "No", "No", "Yes", "Yes", "No", "No", "Yes", "Yes", …

Convert character variables into logical

Let’s convert the sex, is_married and has_child character variables into logical ones.

data <- num_data |> 
  mutate(across(c(is_married, has_child), \(x) if_else(x == "Yes", TRUE, FALSE)),
         is_woman = if_else(sex == "Woman", TRUE, FALSE)) |> 
  select(-sex) |> 
  arrange(year, country)

data
# A tibble: 12 × 6
    year country       age is_married has_child is_woman
   <dbl> <chr>       <dbl> <lgl>      <lgl>     <lgl>   
 1  1990 France         68 TRUE       FALSE     TRUE    
 2  1990 France         22 FALSE      FALSE     TRUE    
 3  1990 Italy          28 FALSE      TRUE      FALSE   
 4  1990 Italy          56 TRUE       TRUE      FALSE   
 5  1990 Italy          36 TRUE       TRUE      FALSE   
 6  1990 Switzerland    23 FALSE      TRUE      TRUE    
 7  1990 Switzerland    23 FALSE      FALSE     FALSE   
 8  2000 France         13 FALSE      FALSE     TRUE    
 9  2000 France         63 TRUE       TRUE      FALSE   
10  2000 Italy          43 TRUE       FALSE     FALSE   
11  2000 Switzerland    42 TRUE       TRUE      TRUE    
12  2000 Switzerland    32 TRUE       FALSE     TRUE    

Perform summary statistics on multiple variables

Let’s compute the mean of several variables grouped by year and country.

my_mean <- function(data, group_vars, mean_vars){
  data |> 
    group_by(pick({{ group_vars }})) |> 
    reframe(across({{ mean_vars }}, 
                   ~ mean(., na.rm = TRUE), 
                   .names = "mean_{.col}")
            )
}

data |> 
  my_mean(group_vars = c("year", "country"),
          mean_vars = c("age", "is_married", "has_child", "is_woman"))
# A tibble: 6 × 6
   year country     mean_age mean_is_married mean_has_child mean_is_woman
  <dbl> <chr>          <dbl>           <dbl>          <dbl>         <dbl>
1  1990 France            45           0.5              0             1  
2  1990 Italy             40           0.667            1             0  
3  1990 Switzerland       23           0                0.5           0.5
4  2000 France            38           0.5              0.5           0.5
5  2000 Italy             43           1                0             0  
6  2000 Switzerland       37           1                0.5           1  

References

These examples are inspired from this article’s section.

Citation

BibTeX citation:
@online{lettry2023,
  author = {Lettry, Layal Christine},
  title = {Perform Transformations on Several Variables with
    `Dplyr::across()` and `Dplyr::pick()`},
  date = {2023-10-13},
  url = {https://rdiscovery.netlify.app/posts/2023-10-13_across-pick/},
  langid = {en}
}
For attribution, please cite this work as:
Lettry, Layal Christine. 2023. “Perform Transformations on Several Variables with `Dplyr::across()` and `Dplyr::pick()`.” October 13, 2023. https://rdiscovery.netlify.app/posts/2023-10-13_across-pick/.