CC BY-NC-ND 3.0
applyapplyAplicar una function a todos los elementos de un matrix o array.
Argumentos:
applybdd <- matrix(rnorm(200), ncol = 20)
apply(bdd, MARGIN = 2, FUN = mean)## [1] -0.172688675 0.230224263 0.162718357 0.025784095 -0.008130307
## [6] 0.467153737 -0.070431767 -0.204683442 0.153504785 -0.842461722
## [11] -0.622626753 0.058949880 -0.096573100 0.094711924 -0.005039150
## [16] -0.132252628 0.068651924 0.895776443 0.113150713 -0.346795796
applyapply(bdd, MARGIN = 1, FUN = median)## [1] -0.051987590 0.004223318 -0.141736878 -0.167981389 0.064074971
## [6] 0.335765724 -0.140736765 0.351855180 0.055748881 -0.127539494
applybdd <- matrix(sample(c(1:20, NA), size = 200, replace = TRUE),
ncol = 20)
apply(bdd, MARGIN = 2, FUN = mean)## [1] NA 9.6 NA 15.1 13.0 10.9 9.1 NA 12.9 10.0 NA NA 9.5 NA
## [15] NA 8.3 NA 11.9 NA 7.2
applyapply(bdd, MARGIN = 2, FUN = mean, na.rm = TRUE)## [1] 10.444444 9.600000 11.555556 15.100000 13.000000 10.900000 9.100000
## [8] 15.111111 12.900000 10.000000 11.555556 12.375000 9.500000 12.444444
## [15] 8.444444 8.300000 9.750000 11.900000 10.000000 7.200000
applyapply(bdd, MARGIN = 2, FUN = function(i){
mean(i, na.rm = TRUE)
})## [1] 10.444444 9.600000 11.555556 15.100000 13.000000 10.900000 9.100000
## [8] 15.111111 12.900000 10.000000 11.555556 12.375000 9.500000 12.444444
## [15] 8.444444 8.300000 9.750000 11.900000 10.000000 7.200000
lapplylapplyAplicar una function a todos los elementos de una list (data.frame es una list).
lapplymyList <- list(
a = sample(1:100, size = 10),
b = sample(1:100, size = 10),
c = sample(1:100, size = 10),
d = sample(1:100, size = 10),
e = sample(1:100, size = 10)
)
print(myList)## $a
## [1] 73 68 96 60 56 80 52 62 95 89
##
## $b
## [1] 71 40 74 90 32 14 68 1 34 67
##
## $c
## [1] 85 33 73 98 99 25 43 67 21 100
##
## $d
## [1] 78 84 63 3 77 46 93 11 2 98
##
## $e
## [1] 30 5 33 26 7 9 49 62 89 48
lapplylapply(myList, FUN = mean)## $a
## [1] 73.1
##
## $b
## [1] 49.1
##
## $c
## [1] 64.4
##
## $d
## [1] 55.5
##
## $e
## [1] 35.8
lapplymyList <- list(
a = sample(c(1:5, NA), size = 10, replace = TRUE),
b = sample(c(1:5, NA), size = 10, replace = TRUE),
c = sample(c(1:5, NA), size = 10, replace = TRUE),
d = sample(c(1:5, NA), size = 10, replace = TRUE),
e = sample(c(1:5, NA), size = 10, replace = TRUE)
)
print(myList)## $a
## [1] 4 NA 1 3 3 NA NA 5 2 4
##
## $b
## [1] 4 NA 3 4 3 4 1 3 2 3
##
## $c
## [1] 3 3 5 4 2 2 5 4 2 5
##
## $d
## [1] 3 1 3 NA 1 4 2 5 4 2
##
## $e
## [1] NA 3 5 3 NA 2 4 2 4 1
lapplylapply(myList, FUN = mean)## $a
## [1] NA
##
## $b
## [1] NA
##
## $c
## [1] 3.5
##
## $d
## [1] NA
##
## $e
## [1] NA
lapplylapply(myList, FUN = mean, na.rm = TRUE)## $a
## [1] 3.142857
##
## $b
## [1] 3
##
## $c
## [1] 3.5
##
## $d
## [1] 2.777778
##
## $e
## [1] 3
lapplylapply(myList, FUN = function(i){
mean(i, na.rm = TRUE)
})## $a
## [1] 3.142857
##
## $b
## [1] 3
##
## $c
## [1] 3.5
##
## $d
## [1] 2.777778
##
## $e
## [1] 3
lapplylapply(myList, FUN = function(i){
m <- mean(i, na.rm = TRUE)
if(m > 3){
return("grande")
}else{
return("pequeño")
}
})## $a
## [1] "grande"
##
## $b
## [1] "pequeño"
##
## $c
## [1] "grande"
##
## $d
## [1] "pequeño"
##
## $e
## [1] "pequeño"
lapplyNumero de datos faltantes:
lapply(myList, FUN = function(i){
sum(is.na(i))
})## $a
## [1] 3
##
## $b
## [1] 1
##
## $c
## [1] 0
##
## $d
## [1] 1
##
## $e
## [1] 2
sapplysapplysapply es una forma de lapply con intentos para simplificar el resultado cuando posible (por ejemplo devolver un vector en lugar de un list cuando posible).
sapplylapply(myList, FUN = function(i){
sum(is.na(i))
})## $a
## [1] 3
##
## $b
## [1] 1
##
## $c
## [1] 0
##
## $d
## [1] 1
##
## $e
## [1] 2
sapplysapply(myList, FUN = function(i){
sum(is.na(i))
})## a b c d e
## 3 1 0 1 2
sapplySacar el elemento “n” de una list:
sapply(myList, FUN = '[[', 2)## a b c d e
## NA NA 3 1 3
sapplymyDF <- data.frame(
a = sample(c(1:5, NA), size = 10, replace = TRUE),
b = sample(c(1:5, NA), size = 10, replace = TRUE),
c = sample(c(1:5, NA), size = 10, replace = TRUE),
d = sample(c(1:5, NA), size = 10, replace = TRUE),
e = sample(c(1:5, NA), size = 10, replace = TRUE)
)
print(myDF)## a b c d e
## 1 1 5 1 4 4
## 2 2 3 2 5 3
## 3 1 2 1 5 2
## 4 3 3 3 1 3
## 5 5 3 3 5 2
## 6 1 2 3 5 5
## 7 1 1 NA 3 4
## 8 5 3 NA 2 4
## 9 NA 2 4 3 2
## 10 5 4 3 4 2
sapplysapply(myDF, FUN = function(i){
sum(is.na(i))
})## a b c d e
## 1 0 2 0 0
sapplypruebaTiempo <- lapply(
seq(from = 1000, to = 30000, by = 1000),
function(sampleSize){
sapply(1:30, function(repet){
startTime <- Sys.time()
guessNumber(mySample = 1:sampleSize)
return(Sys.time() - startTime)
})
})sapplytapplytapplycol0 <- sample(LETTERS[1:5], size = 1000, replace = TRUE)
col1 <- rnorm(n = 1000, mean = 10, sd = 0.5)
col2 <- rlnorm(n = 1000, meanlog = 10, sdlog = 0.5)
col3 <- rgamma(n = 1000, shape = 10, rate = 0.5)
dfCol <- data.frame(col0, col1, col2, col3)
print(head(dfCol, n = 10))## col0 col1 col2 col3
## 1 E 10.214841 47273.13 19.53360
## 2 B 10.161125 18761.21 21.31726
## 3 A 9.963536 23280.13 31.25191
## 4 E 10.136935 54161.22 13.70548
## 5 B 10.446320 6520.05 23.58053
## 6 A 9.582148 27207.82 17.60784
## 7 E 9.447795 18704.42 19.38604
## 8 B 10.419673 30796.01 28.34310
## 9 A 10.074750 26959.06 24.33891
## 10 E 10.813097 21645.99 20.52869
tapplytapply(dfCol$col1, INDEX = dfCol$col0, FUN = summary)## $A
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.522 9.700 9.983 10.003 10.270 11.247
##
## $B
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.841 9.664 9.994 10.004 10.356 11.746
##
## $C
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.284 9.644 9.962 9.960 10.332 11.487
##
## $D
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.813 9.662 10.040 10.048 10.379 11.446
##
## $E
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.671 9.613 9.986 9.971 10.317 11.260
tapplysapply(2:4, FUN = function(i){
tapply(dfCol[,i], INDEX = dfCol$col0, FUN = mean)
})## [,1] [,2] [,3]
## A 10.002878 25369.50 19.91896
## B 10.003580 24616.70 20.44072
## C 9.960449 25622.16 20.35900
## D 10.048424 24673.44 19.14632
## E 9.971356 25213.84 20.27397