Setup

To demonstrate the key features of the smart.data package, we’ll use the mtcars data set:

library(smart.data)
#> Loading required package: magrittr
#> Loading required package: data.table
smart.start()
smrt <- smart.data$new(x = mtcars, name = "smart_cars")

Naming API

# `make_model = "rn"` is needed because row names are retained when the 
#   input data is converted to a `data.table` object
smrt$naming.rule(
    !!!c(make_model = "rn"
             , (\(x) rlang::set_names(x, toupper(x)))(names(mtcars))))$
    enforce.rules(for_naming)

print(head(smrt$data))
#>           make_model  MPG CYL DISP  HP DRAT    WT  QSEC VS AM GEAR CARB
#> 1:         Mazda RX4 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
#> 2:     Mazda RX4 Wag 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
#> 3:        Datsun 710 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
#> 4:    Hornet 4 Drive 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
#> 5: Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
#> 6:           Valiant 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

Taxonomy API

Defining Taxonomy Terms

Next, we’ll define a set of taxonomy terms and map them to subsets of the field names.

Note: if an pending naming map exists, it will automatically be enforced to ensure taxonomy field selection is based on current data names:

Taxonomy terms:

  • identifier: make_model
  • performance: MPG, QSEC, HP
  • metrics: CYL, DISP, DRAT, WG, GEAR, CARB
  • style: VS, AM
identifier <- taxonomy(term = "identifier", desc = "Make and Model"
                                             , fields = c("make_model"))
performance <- taxonomy(term = "performance", desc = "Performance stats"
                                                , fields = c("MPG", "QSEC", "HP"))
metrics <- taxonomy(term = "metrics", desc = "Physical Metrics"
                                        , fields = c("CYL", "DISP", "DRAT", "WT", "GEAR", "CARB"))
style <- taxonomy(term = "characteristics", desc = "Categorical Descriptors"
                                    , fields = c("VS", "AM"))

smrt$
    taxonomy.rule(identifier, performance, metrics, style)$
    enforce.rules(for_usage)

Using Taxonomy Terms

To use the defined terms, class method $use() is invoked. The output can be tailored depending on the arguments supplied:

# Use everything
smrt$use() |> head()
#>           make_model CYL DISP DRAT    WT GEAR CARB  MPG  QSEC  HP VS AM
#> 1:         Mazda RX4   6  160 3.90 2.620    4    4 21.0 16.46 110  0  1
#> 2:     Mazda RX4 Wag   6  160 3.90 2.875    4    4 21.0 17.02 110  0  1
#> 3:        Datsun 710   4  108 3.85 2.320    4    1 22.8 18.61  93  1  1
#> 4:    Hornet 4 Drive   6  258 3.08 3.215    3    1 21.4 19.44 110  1  0
#> 5: Hornet Sportabout   8  360 3.15 3.440    3    2 18.7 17.02 175  0  0
#> 6:           Valiant   6  225 2.76 3.460    3    1 18.1 20.22 105  1  0

# Use 'identifier' fields
smrt$use(identifier) |> head()
#>           make_model
#> 1:         Mazda RX4
#> 2:     Mazda RX4 Wag
#> 3:        Datsun 710
#> 4:    Hornet 4 Drive
#> 5: Hornet Sportabout
#> 6:           Valiant

# Use 'performance' fields
smrt$use(performance) |> head()
#>     MPG  QSEC  HP
#> 1: 21.0 16.46 110
#> 2: 21.0 17.02 110
#> 3: 22.8 18.61  93
#> 4: 21.4 19.44 110
#> 5: 18.7 17.02 175
#> 6: 18.1 20.22 105

# Use 'metrics' fields omitting field "DISP"
smrt$use(metrics, omit=DISP) |> head()
#>    CYL DRAT    WT GEAR CARB
#> 1:   6 3.90 2.620    4    4
#> 2:   6 3.90 2.875    4    4
#> 3:   4 3.85 2.320    4    1
#> 4:   6 3.08 3.215    3    1
#> 5:   8 3.15 3.440    3    2
#> 6:   6 2.76 3.460    3    1

# Use 'style' and 'identifier' fields also retaining field "QSEC
smrt$use(style, identifier, retain = QSEC) |> head()
#>    VS AM        make_model  QSEC
#> 1:  0  1         Mazda RX4 16.46
#> 2:  0  1     Mazda RX4 Wag 17.02
#> 3:  1  1        Datsun 710 18.61
#> 4:  1  0    Hornet 4 Drive 19.44
#> 5:  0  0 Hornet Sportabout 17.02
#> 6:  1  0           Valiant 20.22

# Use 'style' and 'identifier' fields also retaining fields "DISP" and "QSEC"
smrt$use(style, identifier, retain = c(DISP, QSEC)) |> head()
#>    VS AM        make_model DISP  QSEC
#> 1:  0  1         Mazda RX4  160 16.46
#> 2:  0  1     Mazda RX4 Wag  160 17.02
#> 3:  1  1        Datsun 710  108 18.61
#> 4:  1  0    Hornet 4 Drive  258 19.44
#> 5:  0  0 Hornet Sportabout  360 17.02
#> 6:  1  0           Valiant  225 20.22

# Use 'style' and 'identifier' fields also retaining fields "DISP" and "QSEC" 
#       filtering for rows where field 'make_model' contains the pattern "Merc"
smrt$use(style, identifier, retain = c(DISP, QSEC)
                 , subset = grepl("Merc", make_model)) |> head()
#>    VS AM make_model  DISP QSEC
#> 1:  1  0  Merc 240D 146.7 20.0
#> 2:  1  0   Merc 230 140.8 22.9
#> 3:  1  0   Merc 280 167.6 18.3
#> 4:  1  0  Merc 280C 167.6 18.9
#> 5:  0  0 Merc 450SE 275.8 17.4
#> 6:  0  0 Merc 450SL 275.8 17.6

Using the Smart Cache

Each smart.data object can be given an name during class invocation or by assigning directly after the fact. When names are compliant for use with cachem, the class instance can be sent to the cache contained within the package environment:

smrt$cache_mgr(action = register)

# View existing cached objects
get.smart(list.only = TRUE)
#> [1] "smart_cars"

# Verify equivalent calls
identical(get.smart(smart_cars)$data, smrt$data)
#> [1] TRUE

# Use cached objects with taxonomy
get.smart(smart_cars)$
    use(performance, identifier, subset = grepl("Merc", make_model))
#>     MPG QSEC  HP  make_model
#> 1: 24.4 20.0  62   Merc 240D
#> 2: 22.8 22.9  95    Merc 230
#> 3: 19.2 18.3 123    Merc 280
#> 4: 17.8 18.9 123   Merc 280C
#> 5: 16.4 17.4 180  Merc 450SE
#> 6: 17.3 17.6 180  Merc 450SL
#> 7: 15.2 18.0 180 Merc 450SLC

If the underlying data is changed, the cached instance can be updated by using $cache_mgr(action = replace).

The benefit of caching is that the cached instance can be invoked from anywhere in your workflow without worrying about environment scope (except for parallel computing contexts).