This vignette demonstrates how to use the {samplezoo} package to generate datasets of varying sizes (small, medium, and large) with random variables from multiple probability distributions.
Each dataset contains:
Variables/columns from common distributions such as Normal, Binomial, Poisson, and others.
Adjustable sample sizes to meet needs.
data_small <- samplezoo("small")
head(data_small)
#> norm norm2 norm3 binom neg pois exp unif beta
#> 1 65.29742 67.20541 39.73562 0 5 2 10.3332425 0.7231855 0.27041914
#> 2 30.61407 48.31246 30.86564 0 3 2 1.7652295 0.1450918 0.02491969
#> 3 44.94947 67.29217 38.69375 0 0 4 10.6108062 0.4505904 0.05951487
#> 4 29.59907 44.24359 67.84280 0 0 2 0.5679928 0.8819497 0.60979494
#> 5 43.41407 56.94686 10.84323 1 1 2 2.2938456 0.1684621 0.49810648
#> 6 34.97345 69.90718 33.96964 0 0 0 27.0255575 0.7412955 0.68493214
#> gamma chisq t_dist
#> 1 0.9977802 2.1836605 -0.8853693
#> 2 3.3005588 0.9465372 0.7570046
#> 3 0.7583037 1.3795962 -0.5036858
#> 4 0.9311511 0.4779120 -1.0671415
#> 5 3.0245772 5.3672453 -1.1618699
#> 6 4.2327450 4.3024497 -0.4104507
data_medium <- samplezoo("medium")
head(data_medium)
#> norm norm2 norm3 binom neg pois exp unif beta
#> 1 57.06740 64.11136 3.467672 0 2 1 3.686053 0.3375031 0.14378442
#> 2 47.31645 52.63125 41.940552 0 1 2 8.581828 0.2236899 0.11224683
#> 3 55.08562 66.59396 30.838302 0 0 1 4.933613 0.9181436 0.39128257
#> 4 41.87706 72.75178 65.439129 0 1 0 17.925673 0.6346326 0.29691502
#> 5 65.32016 70.22718 47.284516 1 2 3 47.631865 0.7300565 0.05317965
#> 6 46.27007 67.93487 56.690884 0 0 3 1.982736 0.1547931 0.25384270
#> gamma chisq t_dist
#> 1 3.331139 2.985590 -0.67784531
#> 2 4.227148 4.953648 0.05318012
#> 3 5.768409 12.689136 0.87468194
#> 4 1.144292 8.226658 0.64553503
#> 5 5.610336 7.530971 3.24241463
#> 6 5.249032 5.406572 0.34418083
data_large <- samplezoo("large")
head(data_large)
#> norm norm2 norm3 binom neg pois exp unif beta
#> 1 24.60660 72.14700 19.24886 0 5 5 6.4277008 0.84537939 0.3853909
#> 2 43.87874 61.09530 41.63585 0 1 1 0.5646119 0.93715738 0.2039308
#> 3 37.14783 67.77147 37.70517 0 3 2 1.7067526 0.98601103 0.6175510
#> 4 50.42442 69.47083 49.75178 0 0 4 41.4743139 0.22641873 0.2663413
#> 5 59.50994 44.44783 36.45096 0 6 3 0.5456206 0.06758606 0.1825875
#> 6 57.72893 55.34474 21.66427 1 0 3 1.0757587 0.05526381 0.1545085
#> gamma chisq t_dist
#> 1 1.289751 14.224862 -0.7209851
#> 2 3.437733 6.544303 0.4948181
#> 3 1.173168 10.388673 0.2897624
#> 4 3.818564 8.051823 -0.3072181
#> 5 3.348005 15.916600 1.8211526
#> 6 2.625976 10.669652 -0.8691618
To ensure reproducibility and introduce controlled variation in your dataset, use set.seed() before generating random data.
set.seed(123)
data_large <- samplezoo("large")
head(data_large)
#> norm norm2 norm3 binom neg pois exp unif beta
#> 1 41.59287 83.70725 23.274065 0 1 6 6.628373 0.5468223 0.08294255
#> 2 46.54734 58.33188 35.588540 0 0 5 21.305366 0.3900809 0.63544684
#> 3 73.38062 69.26961 -2.070295 0 2 4 0.189645 0.7262119 0.11520674
#> 4 51.05763 54.31848 6.643849 0 2 2 8.479098 0.5101462 0.38184206
#> 5 51.93932 62.25090 18.040743 0 0 2 11.885521 0.2964126 0.17196046
#> 6 75.72597 71.31986 6.687576 0 1 4 6.363993 0.1442317 0.35908460
#> gamma chisq t_dist
#> 1 6.9893762 10.286282 -0.3814568
#> 2 5.4087626 6.519658 -2.3409216
#> 3 1.2587867 8.011417 -0.4744159
#> 4 0.9871787 14.780626 0.4292511
#> 5 2.4021943 6.799788 -0.6692669
#> 6 4.2109032 17.858701 -0.3370763
set.seed(456)
data_large <- samplezoo("large")
head(data_large)
#> norm norm2 norm3 binom neg pois exp unif beta
#> 1 29.84718 68.13494 7.9885694 0 0 5 3.4417303 0.8866347 0.05413307
#> 2 59.32663 52.32066 21.2526086 0 3 3 0.8114356 0.7976466 0.07195440
#> 3 62.01312 62.47569 38.4789563 0 2 6 46.8038907 0.6469920 0.22555129
#> 4 29.16661 53.51086 -0.8656269 0 1 5 11.6955326 0.2036753 0.71455809
#> 5 39.28465 47.19406 47.7819258 1 1 1 0.3535625 0.3653401 0.34619912
#> 6 45.13908 63.33566 53.3620528 1 1 2 4.5592136 0.7628573 0.25880522
#> gamma chisq t_dist
#> 1 6.7914120 4.464348 -1.0150596
#> 2 3.0132520 8.062120 0.3262369
#> 3 4.7360954 10.969593 1.5141157
#> 4 5.1235878 6.249247 0.6432708
#> 5 6.6851637 4.358815 0.2025742
#> 6 0.3903841 20.019575 1.6257109