This generates an interaction_model
object. If you are comfortable thinking about matrices, you can think of this as a matrix-like-object.
Usage
old_make_interaction_model(
fo,
tib,
duplicates = "add",
parse_text = FALSE,
dropNA = TRUE,
data_prefix = NULL,
...
)
Arguments
- fo
a formula, like outcome ~ (row_nums & context) * measurement_type.
- tib
a tibble that contains the variables in the formula. The only exception is that the left-hand-side can be 1 and this does not need to be in tib.
- parse_text
if set to TRUE, then the right side of * (i.e. the measurement_type) will be parsed as a sequence of tokens via tidytext::unnest_tokens. Additional arguments in ... will be passed to unnest_tokens. For example, adding to_lower = FALSE will ensure case is kept. Additionally, you could set token to be something other than words (ngrams or skip_ngrams and then additionally specify n = ). See unnest_token for more arguments.
- dropNA
recommended. This drops rows of tib if there are any NA's among the essential variables.
Value
a list with four elements. First, the interaction_tibble, akin to a sparse matrix in triplet form. Second, row_universe which is akin to the row names of A, but in a tidy form. Thir, column_universe which is like row_universe. Fourth, some settings.
Examples
library(nycflights13)
im = old_make_interaction_model(~(month & day)*dest, flights)
names(im)
#> [1] "interaction_tibble" "row_universe" "column_universe"
#> [4] "settings"
im$row_universe
#> # A tibble: 365 × 4
#> month day n row_num
#> <int> <int> <int> <int>
#> 1 11 27 1014 1
#> 2 7 11 1006 2
#> 3 7 8 1004 3
#> 4 7 10 1004 4
#> 5 12 2 1004 5
#> 6 7 18 1003 6
#> 7 7 25 1003 7
#> 8 7 12 1002 8
#> 9 7 9 1001 9
#> 10 7 17 1001 10
#> # ℹ 355 more rows
im$column_universe
#> # A tibble: 105 × 3
#> dest n col_num
#> <chr> <int> <int>
#> 1 ORD 17283 1
#> 2 ATL 17215 2
#> 3 LAX 16174 3
#> 4 BOS 15508 4
#> 5 MCO 14082 5
#> 6 CLT 14064 6
#> 7 SFO 13331 7
#> 8 FLL 12055 8
#> 9 MIA 11728 9
#> 10 DCA 9705 10
#> # ℹ 95 more rows
im$interaction_tibble
#> # A tibble: 31,229 × 3
#> row_num col_num outcome
#> <int> <int> <dbl>
#> 1 1 1 52
#> 2 1 2 51
#> 3 1 3 49
#> 4 1 4 43
#> 5 1 5 40
#> 6 1 6 42
#> 7 1 7 43
#> 8 1 8 38
#> 9 1 9 37
#> 10 1 10 28
#> # ℹ 31,219 more rows
im$settings
#> $fo
#> 1 ~ (month & day) * dest
#> <environment: 0x7f9b95a23ed8>
#>
#> $data_prefix
#> NULL
#>
#> $outcome_aggregation
#> [1] "count"
#>
#> $outcome_variables
#> [1] "outcome_unweighted_1"
#>
#> $row_variables
#> [1] "month" "day"
#>
#> $column_variables
#> [1] "dest"
#>
# you can extract the sparse Matrix:
A = longpca:::get_Matrix(im, import_names = TRUE)
str(A)
#> Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
#> ..@ i : int [1:31229] 0 1 2 3 4 5 6 7 8 9 ...
#> ..@ p : int [1:106] 0 365 730 1095 1460 1825 2190 2555 2920 3285 ...
#> ..@ Dim : int [1:2] 365 105
#> ..@ Dimnames:List of 2
#> .. ..$ : chr [1:365] "27/11" "11/7" "8/7" "10/7" ...
#> .. ..$ : chr [1:105] "ORD" "ATL" "LAX" "BOS" ...
#> ..@ x : num [1:31229] 52 55 55 55 49 54 55 55 54 55 ...
#> ..@ factors : list()
im = old_make_interaction_model(~Package*Imports, all_packages, parse_text = TRUE)
names(im)
#> [1] "interaction_tibble" "row_universe" "column_universe"
#> [4] "settings"
im$row_universe
#> # A tibble: 20,319 × 3
#> Package n row_num
#> <chr> <int> <int>
#> 1 Seurat 64 1
#> 2 tidyverse 60 2
#> 3 radiant.data 58 3
#> 4 radiant.model 58 4
#> 5 SSDM 55 5
#> 6 BasketballAnalyzeR 53 6
#> 7 tRigon 49 7
#> 8 AFM 48 8
#> 9 dextergui 48 9
#> 10 proteus 48 10
#> # ℹ 20,309 more rows
im$column_universe
#> # A tibble: 6,230 × 4
#> from_text token n col_num
#> <chr> <chr> <int> <int>
#> 1 Imports stats 5442 1
#> 2 Imports utils 3423 2
#> 3 Imports dplyr 3299 3
#> 4 Imports methods 3210 4
#> 5 Imports ggplot2 3135 5
#> 6 Imports rcpp 2548 6
#> 7 Imports rlang 2172 7
#> 8 Imports graphics 2158 8
#> 9 Imports magrittr 1954 9
#> 10 Imports stringr 1698 10
#> # ℹ 6,220 more rows
im$interaction_tibble
#> # A tibble: 114,833 × 3
#> row_num col_num outcome
#> <int> <int> <dbl>
#> 1 1 1 1
#> 2 1 2 1
#> 3 1 5 1
#> 4 1 6 1
#> 5 1 7 1
#> 6 1 8 1
#> 7 1 12 1
#> 8 1 13 1
#> 9 1 14 1
#> 10 1 15 1
#> # ℹ 114,823 more rows
im$settings
#> $fo
#> 1 ~ Package * Imports
#> <environment: 0x7f9b95704038>
#>
#> $data_prefix
#> [1] "text"
#>
#> $outcome_aggregation
#> [1] "count"
#>
#> $outcome_variables
#> [1] "outcome_unweighted_1"
#>
#> $row_variables
#> [1] "Package"
#>
#> $column_variables
#> [1] "from_text" "token"
#>