define() allows one to operate on data using one or more formula-based definitions. Data transformation and selection can be achieved with formulas or using standard data.table syntax in a procedural manner with a single function call. Each operation operates on the previous result making define() a pipeline of operations in a compact framework.
define(data = NULL, ..., keep.rownames = TRUE, blueprint = NULL)The input data (see 'Details')
(dots_list): Formulas for which the left-hand side (LHS) is an expression containing the operation, and the right-hand side (RHS) contains column names that form a grouping set for the operation (i.e., <expression> ~ col_1 + col_2 + ...):
If the form <LHS>~ . is given, the LHS executes using all columns as the grouping set
If the form <LHS>~ 1 is given, the LHS executes without grouping
If no LHS is given, the operation defaults to selection based on the RHS
See data.table
(EXPERIMENTAL) See blueprint.
The data modified
If data is a smart.data object, the taxonomical references to field names can be accessed by using the use() syntax in the right-hand side of the formula (e.g., ~use(term1, term2) + otherTerm1 + ...).
When using formulas for ... on an empty data.table object, set the argument data to NULL; otherwise, you will get an error.
if (require(smart.data)){
library(smart.data)
smart.start();
taxonomy_list <- list(
identifier = taxonomy(
term = "identifier"
, desc = "Identifier"
, fields = c("rn"))
, category = taxonomy(
term = "category"
, desc = "Category"
, fields = c("cyl", "gear", "carb")
)
);
smart_mt <- smart.data$
new(as.data.table(mtcars[1:10, ], keep.rownames = TRUE))$
taxonomy.rule(!!!taxonomy_list)$
enforce.rules(for_usage)$
cache_mgr(action = upd);
print(test_obj <- define(
smart_mt
, list(j = 1, mpg) ~vs + am + use(identifier, category)
, ~j + mpg
, keep.rownames = FALSE
))
print(test_obj <- define(smart_mt, ~vs + am + use(identifier, category)));
print(define(
test_obj
, `:=`(x = sum(am^2), y = 10) ~ use(identifier, category)
));
print(test_obj)
rm(test_obj)
}
#> Loading required package: smart.data
#> Loading required package: magrittr
#> 'new_data' is not found in the smart-cache
#> j mpg
#> <num> <num>
#> 1: 1 21.0
#> 2: 1 21.0
#> 3: 1 22.8
#> 4: 1 21.4
#> 5: 1 18.7
#> 6: 1 18.1
#> 7: 1 14.3
#> 8: 1 24.4
#> 9: 1 22.8
#> 10: 1 19.2
#> vs am rn cyl gear carb
#> <num> <num> <char> <num> <num> <num>
#> 1: 0 1 Mazda RX4 6 4 4
#> 2: 0 1 Mazda RX4 Wag 6 4 4
#> 3: 1 1 Datsun 710 4 4 1
#> 4: 1 0 Hornet 4 Drive 6 3 1
#> 5: 0 0 Hornet Sportabout 8 3 2
#> 6: 1 0 Valiant 6 3 1
#> 7: 0 0 Duster 360 8 3 4
#> 8: 1 0 Merc 240D 4 4 2
#> 9: 1 0 Merc 230 4 4 2
#> 10: 1 0 Merc 280 6 4 4
#> vs am rn cyl gear carb x y
#> <num> <num> <char> <num> <num> <num> <num> <num>
#> 1: 0 1 Mazda RX4 6 4 4 3 10
#> 2: 0 1 Mazda RX4 Wag 6 4 4 3 10
#> 3: 1 1 Datsun 710 4 4 1 3 10
#> 4: 1 0 Hornet 4 Drive 6 3 1 3 10
#> 5: 0 0 Hornet Sportabout 8 3 2 3 10
#> 6: 1 0 Valiant 6 3 1 3 10
#> 7: 0 0 Duster 360 8 3 4 3 10
#> 8: 1 0 Merc 240D 4 4 2 3 10
#> 9: 1 0 Merc 230 4 4 2 3 10
#> 10: 1 0 Merc 280 6 4 4 3 10
#> vs am rn cyl gear carb
#> <num> <num> <char> <num> <num> <num>
#> 1: 0 1 Mazda RX4 6 4 4
#> 2: 0 1 Mazda RX4 Wag 6 4 4
#> 3: 1 1 Datsun 710 4 4 1
#> 4: 1 0 Hornet 4 Drive 6 3 1
#> 5: 0 0 Hornet Sportabout 8 3 2
#> 6: 1 0 Valiant 6 3 1
#> 7: 0 0 Duster 360 8 3 4
#> 8: 1 0 Merc 240D 4 4 2
#> 9: 1 0 Merc 230 4 4 2
#> 10: 1 0 Merc 280 6 4 4
define()[];
#> Null data.table (0 rows and 0 cols)
define(x = 1:10, y = x * 3)[];
#> x y
#> <int> <num>
#> 1: 1 3
#> 2: 2 6
#> 3: 3 9
#> 4: 4 12
#> 5: 5 15
#> 6: 6 18
#> 7: 7 21
#> 8: 8 24
#> 9: 9 27
#> 10: 10 30
define(x = 1:10, y = x * 3, z = x*y)[];
#> x y z
#> <int> <num> <num>
#> 1: 1 3 3
#> 2: 2 6 12
#> 3: 3 9 27
#> 4: 4 12 48
#> 5: 5 15 75
#> 6: 6 18 108
#> 7: 7 21 147
#> 8: 8 24 192
#> 9: 9 27 243
#> 10: 10 30 300
define(NULL, x = 1:10, y = x * 3, z = x*y, ~x + z)[];
#> x z
#> <int> <num>
#> 1: 1 3
#> 2: 2 12
#> 3: 3 27
#> 4: 4 48
#> 5: 5 75
#> 6: 6 108
#> 7: 7 147
#> 8: 8 192
#> 9: 9 243
#> 10: 10 300
define(data.table(), x = 1:10, y = x * 3, list(z = 10) ~ x)[];
#> x z
#> <int> <num>
#> 1: 1 10
#> 2: 2 10
#> 3: 3 10
#> 4: 4 10
#> 5: 5 10
#> 6: 6 10
#> 7: 7 10
#> 8: 8 10
#> 9: 9 10
#> 10: 10 10
# Predefined operations:
predef_data <- define(smart_mt, x = sum(am^2) ~ use(identifier, category));
redef_data <- define(
smart_mt
# Normally, listing 'x' would throw an error as it would not exist in the data;
# however, since a blueprint is provided from a previous definition, the 'x' variable is
# created within scope before the additional operations execute:
, list(j = 1, mpg, x) ~vs + am + use(identifier, category)
, blueprint = predef_data
);
redef_data;
#> vs am rn cyl gear carb j mpg x
#> <num> <num> <char> <num> <num> <num> <num> <num> <num>
#> 1: 0 1 Mazda RX4 6 4 4 1 21.0 1
#> 2: 0 1 Mazda RX4 Wag 6 4 4 1 21.0 1
#> 3: 1 1 Datsun 710 4 4 1 1 22.8 1
#> 4: 1 0 Hornet 4 Drive 6 3 1 1 21.4 0
#> 5: 0 0 Hornet Sportabout 8 3 2 1 18.7 0
#> 6: 1 0 Valiant 6 3 1 1 18.1 0
#> 7: 0 0 Duster 360 8 3 4 1 14.3 0
#> 8: 1 0 Merc 240D 4 4 2 1 24.4 0
#> 9: 1 0 Merc 230 4 4 2 1 22.8 0
#> 10: 1 0 Merc 280 6 4 4 1 19.2 0
attr(redef_data, "blueprint");
#> An object of class "blueprint"
#> Slot "schema":
#> $x
#> sum(am^2) ~ rn + cyl + gear + carb
#> <environment: 0x0000019ef2bdfd08>
#>
#> [[2]]
#> list(j = 1, mpg, x) ~ vs + am + rn + cyl + gear + carb
#> <environment: 0x0000019ef3816460>
#>
#>
identical(redef_data, define(smart_mt, blueprint = redef_data))
#> [1] TRUE
if ("smart.data" %in% loadedNamespaces()){
detach("package:smart.data", unload = TRUE)
}
rm(redef_data, smart_mt, predef_data, taxonomy_list)