Skip to contents

This generates an interaction_model object. If you are comfortable thinking about matrices, you can think of this as a matrix-like-object.

Usage

old_make_interaction_model(
  fo,
  tib,
  duplicates = "add",
  parse_text = FALSE,
  dropNA = TRUE,
  data_prefix = NULL,
  ...
)

Arguments

fo

a formula, like outcome ~ (row_nums & context) * measurement_type.

tib

a tibble that contains the variables in the formula. The only exception is that the left-hand-side can be 1 and this does not need to be in tib.

parse_text

if set to TRUE, then the right side of * (i.e. the measurement_type) will be parsed as a sequence of tokens via tidytext::unnest_tokens. Additional arguments in ... will be passed to unnest_tokens. For example, adding to_lower = FALSE will ensure case is kept. Additionally, you could set token to be something other than words (ngrams or skip_ngrams and then additionally specify n = ). See unnest_token for more arguments.

dropNA

recommended. This drops rows of tib if there are any NA's among the essential variables.

Value

a list with four elements. First, the interaction_tibble, akin to a sparse matrix in triplet form. Second, row_universe which is akin to the row names of A, but in a tidy form. Thir, column_universe which is like row_universe. Fourth, some settings.

Examples

library(nycflights13)
im = old_make_interaction_model(~(month & day)*dest, flights)
names(im)
#> [1] "interaction_tibble" "row_universe"       "column_universe"   
#> [4] "settings"          
im$row_universe
#> # A tibble: 365 × 4
#>    month   day     n row_num
#>    <int> <int> <int>   <int>
#>  1    11    27  1014       1
#>  2     7    11  1006       2
#>  3     7     8  1004       3
#>  4     7    10  1004       4
#>  5    12     2  1004       5
#>  6     7    18  1003       6
#>  7     7    25  1003       7
#>  8     7    12  1002       8
#>  9     7     9  1001       9
#> 10     7    17  1001      10
#> # ℹ 355 more rows
im$column_universe
#> # A tibble: 105 × 3
#>    dest      n col_num
#>    <chr> <int>   <int>
#>  1 ORD   17283       1
#>  2 ATL   17215       2
#>  3 LAX   16174       3
#>  4 BOS   15508       4
#>  5 MCO   14082       5
#>  6 CLT   14064       6
#>  7 SFO   13331       7
#>  8 FLL   12055       8
#>  9 MIA   11728       9
#> 10 DCA    9705      10
#> # ℹ 95 more rows
im$interaction_tibble
#> # A tibble: 31,229 × 3
#>    row_num col_num outcome
#>      <int>   <int>   <dbl>
#>  1       1       1      52
#>  2       1       2      51
#>  3       1       3      49
#>  4       1       4      43
#>  5       1       5      40
#>  6       1       6      42
#>  7       1       7      43
#>  8       1       8      38
#>  9       1       9      37
#> 10       1      10      28
#> # ℹ 31,219 more rows
im$settings
#> $fo
#> 1 ~ (month & day) * dest
#> <environment: 0x7f9b95a23ed8>
#> 
#> $data_prefix
#> NULL
#> 
#> $outcome_aggregation
#> [1] "count"
#> 
#> $outcome_variables
#> [1] "outcome_unweighted_1"
#> 
#> $row_variables
#> [1] "month" "day"  
#> 
#> $column_variables
#> [1] "dest"
#> 
# you can extract the sparse Matrix:
A = longpca:::get_Matrix(im,  import_names = TRUE)
str(A)
#> Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
#>   ..@ i       : int [1:31229] 0 1 2 3 4 5 6 7 8 9 ...
#>   ..@ p       : int [1:106] 0 365 730 1095 1460 1825 2190 2555 2920 3285 ...
#>   ..@ Dim     : int [1:2] 365 105
#>   ..@ Dimnames:List of 2
#>   .. ..$ : chr [1:365] "27/11" "11/7" "8/7" "10/7" ...
#>   .. ..$ : chr [1:105] "ORD" "ATL" "LAX" "BOS" ...
#>   ..@ x       : num [1:31229] 52 55 55 55 49 54 55 55 54 55 ...
#>   ..@ factors : list()
im = old_make_interaction_model(~Package*Imports, all_packages, parse_text = TRUE)
names(im)
#> [1] "interaction_tibble" "row_universe"       "column_universe"   
#> [4] "settings"          
im$row_universe
#> # A tibble: 20,319 × 3
#>    Package                n row_num
#>    <chr>              <int>   <int>
#>  1 Seurat                64       1
#>  2 tidyverse             60       2
#>  3 radiant.data          58       3
#>  4 radiant.model         58       4
#>  5 SSDM                  55       5
#>  6 BasketballAnalyzeR    53       6
#>  7 tRigon                49       7
#>  8 AFM                   48       8
#>  9 dextergui             48       9
#> 10 proteus               48      10
#> # ℹ 20,309 more rows
im$column_universe
#> # A tibble: 6,230 × 4
#>    from_text token        n col_num
#>    <chr>     <chr>    <int>   <int>
#>  1 Imports   stats     5442       1
#>  2 Imports   utils     3423       2
#>  3 Imports   dplyr     3299       3
#>  4 Imports   methods   3210       4
#>  5 Imports   ggplot2   3135       5
#>  6 Imports   rcpp      2548       6
#>  7 Imports   rlang     2172       7
#>  8 Imports   graphics  2158       8
#>  9 Imports   magrittr  1954       9
#> 10 Imports   stringr   1698      10
#> # ℹ 6,220 more rows
im$interaction_tibble
#> # A tibble: 114,833 × 3
#>    row_num col_num outcome
#>      <int>   <int>   <dbl>
#>  1       1       1       1
#>  2       1       2       1
#>  3       1       5       1
#>  4       1       6       1
#>  5       1       7       1
#>  6       1       8       1
#>  7       1      12       1
#>  8       1      13       1
#>  9       1      14       1
#> 10       1      15       1
#> # ℹ 114,823 more rows
im$settings
#> $fo
#> 1 ~ Package * Imports
#> <environment: 0x7f9b95704038>
#> 
#> $data_prefix
#> [1] "text"
#> 
#> $outcome_aggregation
#> [1] "count"
#> 
#> $outcome_variables
#> [1] "outcome_unweighted_1"
#> 
#> $row_variables
#> [1] "Package"
#> 
#> $column_variables
#> [1] "from_text" "token"    
#>