TileDBArray 1.15.0
TileDB implements a framework for local and remote storage of dense and sparse arrays.
We can use this as a DelayedArray
backend to provide an array-level abstraction,
thus allowing the data to be used in many places where an ordinary array or matrix might be used.
The TileDBArray package implements the necessary wrappers around TileDB-R
to support read/write operations on TileDB arrays within the DelayedArray framework.
TileDBArray
Creating a TileDBArray
is as easy as:
X <- matrix(rnorm(1000), ncol=10)
library(TileDBArray)
writeTileDBArray(X)
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] 0.7697613 -0.6533541 -0.9749702 . -0.20305626 1.02172231
## [2,] -2.0385427 -0.4473694 0.2846678 . 0.45728003 -0.46420467
## [3,] -0.4403208 -1.0874565 -0.1739806 . -0.45205551 -1.58060497
## [4,] 0.8899474 1.2739806 0.0768501 . -0.21112761 0.04020712
## [5,] 0.5495859 -0.6691931 0.5409021 . 0.20239993 -1.43012762
## ... . . . . . .
## [96,] 1.63041233 0.49434221 -0.34329351 . 0.06518883 2.68233546
## [97,] 0.21653620 0.25782952 1.29594228 . 0.06921963 -0.12889134
## [98,] 0.92419606 1.38005591 1.43591575 . -1.15167242 0.95182176
## [99,] 0.41072991 0.47027242 -0.14451871 . 0.02216708 0.08721180
## [100,] -1.46576951 0.04117124 -0.77646899 . 1.44356769 -0.75893717
Alternatively, we can use coercion methods:
as(X, "TileDBArray")
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] 0.7697613 -0.6533541 -0.9749702 . -0.20305626 1.02172231
## [2,] -2.0385427 -0.4473694 0.2846678 . 0.45728003 -0.46420467
## [3,] -0.4403208 -1.0874565 -0.1739806 . -0.45205551 -1.58060497
## [4,] 0.8899474 1.2739806 0.0768501 . -0.21112761 0.04020712
## [5,] 0.5495859 -0.6691931 0.5409021 . 0.20239993 -1.43012762
## ... . . . . . .
## [96,] 1.63041233 0.49434221 -0.34329351 . 0.06518883 2.68233546
## [97,] 0.21653620 0.25782952 1.29594228 . 0.06921963 -0.12889134
## [98,] 0.92419606 1.38005591 1.43591575 . -1.15167242 0.95182176
## [99,] 0.41072991 0.47027242 -0.14451871 . 0.02216708 0.08721180
## [100,] -1.46576951 0.04117124 -0.77646899 . 1.44356769 -0.75893717
This process works also for sparse matrices:
Y <- Matrix::rsparsematrix(1000, 1000, density=0.01)
writeTileDBArray(Y)
## <1000 x 1000> sparse TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,999] [,1000]
## [1,] 0 0 0 . 0 0
## [2,] 0 0 0 . 0 0
## [3,] 0 0 0 . 0 0
## [4,] 0 0 0 . 0 0
## [5,] 0 0 0 . 0 0
## ... . . . . . .
## [996,] 0 0 0 . 0 0
## [997,] 0 0 0 . 0 0
## [998,] 0 0 0 . 0 0
## [999,] 0 0 0 . 0 0
## [1000,] 0 0 0 . 0 0
Logical and integer matrices are supported:
writeTileDBArray(Y > 0)
## <1000 x 1000> sparse TileDBMatrix object of type "logical":
## [,1] [,2] [,3] ... [,999] [,1000]
## [1,] FALSE FALSE FALSE . FALSE FALSE
## [2,] FALSE FALSE FALSE . FALSE FALSE
## [3,] FALSE FALSE FALSE . FALSE FALSE
## [4,] FALSE FALSE FALSE . FALSE FALSE
## [5,] FALSE FALSE FALSE . FALSE FALSE
## ... . . . . . .
## [996,] FALSE FALSE FALSE . FALSE FALSE
## [997,] FALSE FALSE FALSE . FALSE FALSE
## [998,] FALSE FALSE FALSE . FALSE FALSE
## [999,] FALSE FALSE FALSE . FALSE FALSE
## [1000,] FALSE FALSE FALSE . FALSE FALSE
As are matrices with dimension names:
rownames(X) <- sprintf("GENE_%i", seq_len(nrow(X)))
colnames(X) <- sprintf("SAMP_%i", seq_len(ncol(X)))
writeTileDBArray(X)
## <100 x 10> TileDBMatrix object of type "double":
## SAMP_1 SAMP_2 SAMP_3 ... SAMP_9 SAMP_10
## GENE_1 0.7697613 -0.6533541 -0.9749702 . -0.20305626 1.02172231
## GENE_2 -2.0385427 -0.4473694 0.2846678 . 0.45728003 -0.46420467
## GENE_3 -0.4403208 -1.0874565 -0.1739806 . -0.45205551 -1.58060497
## GENE_4 0.8899474 1.2739806 0.0768501 . -0.21112761 0.04020712
## GENE_5 0.5495859 -0.6691931 0.5409021 . 0.20239993 -1.43012762
## ... . . . . . .
## GENE_96 1.63041233 0.49434221 -0.34329351 . 0.06518883 2.68233546
## GENE_97 0.21653620 0.25782952 1.29594228 . 0.06921963 -0.12889134
## GENE_98 0.92419606 1.38005591 1.43591575 . -1.15167242 0.95182176
## GENE_99 0.41072991 0.47027242 -0.14451871 . 0.02216708 0.08721180
## GENE_100 -1.46576951 0.04117124 -0.77646899 . 1.44356769 -0.75893717
TileDBArray
sTileDBArray
s are simply DelayedArray
objects and can be manipulated as such.
The usual conventions for extracting data from matrix-like objects work as expected:
out <- as(X, "TileDBArray")
dim(out)
## [1] 100 10
head(rownames(out))
## [1] "GENE_1" "GENE_2" "GENE_3" "GENE_4" "GENE_5" "GENE_6"
head(out[,1])
## GENE_1 GENE_2 GENE_3 GENE_4 GENE_5 GENE_6
## 0.7697613 -2.0385427 -0.4403208 0.8899474 0.5495859 0.8250544
We can also perform manipulations like subsetting and arithmetic.
Note that these operations do not affect the data in the TileDB backend;
rather, they are delayed until the values are explicitly required,
hence the creation of the DelayedMatrix
object.
out[1:5,1:5]
## <5 x 5> DelayedMatrix object of type "double":
## SAMP_1 SAMP_2 SAMP_3 SAMP_4 SAMP_5
## GENE_1 0.76976130 -0.65335409 -0.97497016 -0.28076480 1.18680291
## GENE_2 -2.03854268 -0.44736944 0.28466781 1.18000559 0.49621513
## GENE_3 -0.44032085 -1.08745649 -0.17398057 0.29643029 0.02625013
## GENE_4 0.88994740 1.27398056 0.07685010 -0.36498908 0.39388953
## GENE_5 0.54958587 -0.66919314 0.54090214 -0.34950722 -0.91107235
out * 2
## <100 x 10> DelayedMatrix object of type "double":
## SAMP_1 SAMP_2 SAMP_3 ... SAMP_9 SAMP_10
## GENE_1 1.5395226 -1.3067082 -1.9499403 . -0.40611252 2.04344461
## GENE_2 -4.0770854 -0.8947389 0.5693356 . 0.91456006 -0.92840933
## GENE_3 -0.8806417 -2.1749130 -0.3479611 . -0.90411102 -3.16120994
## GENE_4 1.7798948 2.5479611 0.1537002 . -0.42225523 0.08041424
## GENE_5 1.0991717 -1.3383863 1.0818043 . 0.40479986 -2.86025525
## ... . . . . . .
## GENE_96 3.26082466 0.98868442 -0.68658702 . 0.13037766 5.36467092
## GENE_97 0.43307239 0.51565904 2.59188457 . 0.13843926 -0.25778268
## GENE_98 1.84839213 2.76011183 2.87183150 . -2.30334484 1.90364353
## GENE_99 0.82145982 0.94054484 -0.28903741 . 0.04433417 0.17442360
## GENE_100 -2.93153902 0.08234248 -1.55293797 . 2.88713539 -1.51787434
We can also do more complex matrix operations that are supported by DelayedArray:
colSums(out)
## SAMP_1 SAMP_2 SAMP_3 SAMP_4 SAMP_5 SAMP_6
## -0.4648894 2.8104554 -6.5163846 9.6332049 1.9764228 -6.6409280
## SAMP_7 SAMP_8 SAMP_9 SAMP_10
## 6.4064487 15.6052467 10.9269770 -13.5174681
out %*% runif(ncol(out))
## [,1]
## GENE_1 3.500110241
## GENE_2 -2.426578951
## GENE_3 -1.694308883
## GENE_4 2.783476373
## GENE_5 -1.291171206
## GENE_6 2.746869294
## GENE_7 0.300408405
## GENE_8 1.680164389
## GENE_9 -2.853611761
## GENE_10 0.708522478
## GENE_11 -0.588716477
## GENE_12 -0.107448410
## GENE_13 -3.030307768
## GENE_14 -0.690224165
## GENE_15 1.853605992
## GENE_16 1.019376677
## GENE_17 3.223859378
## GENE_18 2.330919875
## GENE_19 1.256009870
## GENE_20 4.361709562
## GENE_21 -3.504034356
## GENE_22 1.329689630
## GENE_23 0.461975590
## GENE_24 3.768750713
## GENE_25 -2.531536890
## GENE_26 1.063663975
## GENE_27 0.988343963
## GENE_28 1.200104480
## GENE_29 -2.541362680
## GENE_30 -1.600062219
## GENE_31 3.909901638
## GENE_32 1.180230807
## GENE_33 -3.541528110
## GENE_34 0.147100053
## GENE_35 2.644638875
## GENE_36 2.836525446
## GENE_37 -5.331702418
## GENE_38 -0.468205546
## GENE_39 0.706423162
## GENE_40 -0.827083607
## GENE_41 1.116873181
## GENE_42 -2.128556353
## GENE_43 -0.440363545
## GENE_44 -0.307101711
## GENE_45 -0.296124599
## GENE_46 0.824610799
## GENE_47 0.138838048
## GENE_48 -2.001895681
## GENE_49 -1.348208159
## GENE_50 -1.055204518
## GENE_51 2.270511302
## GENE_52 1.806725915
## GENE_53 -0.505097947
## GENE_54 -1.533842079
## GENE_55 4.586600214
## GENE_56 2.128408950
## GENE_57 -3.055868947
## GENE_58 1.485354157
## GENE_59 -0.947118615
## GENE_60 0.315643414
## GENE_61 -4.921599500
## GENE_62 -1.098119188
## GENE_63 -0.712680781
## GENE_64 -2.510075684
## GENE_65 -4.736958304
## GENE_66 0.483001017
## GENE_67 -1.101928292
## GENE_68 -0.064805285
## GENE_69 -0.742623576
## GENE_70 -0.003893721
## GENE_71 0.720069762
## GENE_72 -1.265460811
## GENE_73 0.649068791
## GENE_74 -1.801561864
## GENE_75 2.458440813
## GENE_76 -0.375092785
## GENE_77 1.080196942
## GENE_78 -0.374171789
## GENE_79 2.285058372
## GENE_80 2.589708455
## GENE_81 -2.374524494
## GENE_82 -3.269769987
## GENE_83 3.389328223
## GENE_84 2.385211173
## GENE_85 1.773852496
## GENE_86 -3.710126487
## GENE_87 2.009836209
## GENE_88 -2.232799364
## GENE_89 0.752013230
## GENE_90 -0.255815372
## GENE_91 -4.347298878
## GENE_92 2.094496390
## GENE_93 0.719800715
## GENE_94 0.968306666
## GENE_95 -1.945310298
## GENE_96 4.281002153
## GENE_97 0.063182473
## GENE_98 3.324696560
## GENE_99 -0.505728797
## GENE_100 -3.067453013
We can adjust some parameters for creating the backend with appropriate arguments to writeTileDBArray()
.
For example, the example below allows us to control the path to the backend
as well as the name of the attribute containing the data.
X <- matrix(rnorm(1000), ncol=10)
path <- tempfile()
writeTileDBArray(X, path=path, attr="WHEE")
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] -1.1528422 -0.4036269 1.5852299 . 1.1387243 1.2684216
## [2,] 0.4772829 0.6948020 -0.5037492 . -1.0676545 1.3326806
## [3,] -2.0394615 1.8266183 -0.5541000 . -1.6696460 1.1602071
## [4,] 0.2322201 -1.6360498 -0.5645364 . 1.1875410 -1.4028893
## [5,] 0.3524365 -1.0817854 1.2664471 . 1.0655809 -0.3651902
## ... . . . . . .
## [96,] -0.8692392 0.4017520 -0.5147087 . -1.6258419 1.2042155
## [97,] 0.6347756 -1.0188223 0.5074096 . -0.2345163 0.8902214
## [98,] 0.2895505 -0.1847088 1.1995636 . -0.7374223 1.0866747
## [99,] 1.2344672 -0.7855078 -0.3024826 . -0.2388499 -0.5260541
## [100,] -0.5823710 -1.2901770 1.2485754 . 0.3146209 0.4010926
As these arguments cannot be passed during coercion, we instead provide global variables that can be set or unset to affect the outcome.
path2 <- tempfile()
setTileDBPath(path2)
as(X, "TileDBArray") # uses path2 to store the backend.
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] -1.1528422 -0.4036269 1.5852299 . 1.1387243 1.2684216
## [2,] 0.4772829 0.6948020 -0.5037492 . -1.0676545 1.3326806
## [3,] -2.0394615 1.8266183 -0.5541000 . -1.6696460 1.1602071
## [4,] 0.2322201 -1.6360498 -0.5645364 . 1.1875410 -1.4028893
## [5,] 0.3524365 -1.0817854 1.2664471 . 1.0655809 -0.3651902
## ... . . . . . .
## [96,] -0.8692392 0.4017520 -0.5147087 . -1.6258419 1.2042155
## [97,] 0.6347756 -1.0188223 0.5074096 . -0.2345163 0.8902214
## [98,] 0.2895505 -0.1847088 1.1995636 . -0.7374223 1.0866747
## [99,] 1.2344672 -0.7855078 -0.3024826 . -0.2388499 -0.5260541
## [100,] -0.5823710 -1.2901770 1.2485754 . 0.3146209 0.4010926
sessionInfo()
## R version 4.4.0 Patched (2024-04-24 r86482)
## Platform: x86_64-apple-darwin20
## Running under: macOS Monterey 12.7.4
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] C/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] RcppSpdlog_0.0.17 TileDBArray_1.15.0 DelayedArray_0.31.0
## [4] SparseArray_1.5.0 S4Arrays_1.5.0 abind_1.4-5
## [7] IRanges_2.39.0 S4Vectors_0.43.0 MatrixGenerics_1.17.0
## [10] matrixStats_1.3.0 BiocGenerics_0.51.0 Matrix_1.7-0
## [13] BiocStyle_2.33.0
##
## loaded via a namespace (and not attached):
## [1] bit_4.0.5 jsonlite_1.8.8 compiler_4.4.0
## [4] BiocManager_1.30.22 crayon_1.5.2 Rcpp_1.0.12
## [7] nanoarrow_0.4.0.1 jquerylib_0.1.4 yaml_2.3.8
## [10] fastmap_1.1.1 lattice_0.22-6 R6_2.5.1
## [13] RcppCCTZ_0.2.12 XVector_0.45.0 tiledb_0.26.0
## [16] knitr_1.46 bookdown_0.39 bslib_0.7.0
## [19] rlang_1.1.3 cachem_1.0.8 xfun_0.43
## [22] sass_0.4.9 bit64_4.0.5 cli_3.6.2
## [25] zlibbioc_1.51.0 spdl_0.0.5 digest_0.6.35
## [28] grid_4.4.0 lifecycle_1.0.4 data.table_1.15.4
## [31] evaluate_0.23 nanotime_0.3.7 zoo_1.8-12
## [34] rmarkdown_2.26 tools_4.4.0 htmltools_0.5.8.1