CC BY-NC-ND 3.0
apply
apply
Aplicar una function
a todos los elementos de un matrix
o array
.
Argumentos:
apply
bdd <- matrix(rnorm(200), ncol = 20)
apply(bdd, MARGIN = 2, FUN = mean)
## [1] -0.172688675 0.230224263 0.162718357 0.025784095 -0.008130307
## [6] 0.467153737 -0.070431767 -0.204683442 0.153504785 -0.842461722
## [11] -0.622626753 0.058949880 -0.096573100 0.094711924 -0.005039150
## [16] -0.132252628 0.068651924 0.895776443 0.113150713 -0.346795796
apply
apply(bdd, MARGIN = 1, FUN = median)
## [1] -0.051987590 0.004223318 -0.141736878 -0.167981389 0.064074971
## [6] 0.335765724 -0.140736765 0.351855180 0.055748881 -0.127539494
apply
bdd <- matrix(sample(c(1:20, NA), size = 200, replace = TRUE),
ncol = 20)
apply(bdd, MARGIN = 2, FUN = mean)
## [1] NA 9.6 NA 15.1 13.0 10.9 9.1 NA 12.9 10.0 NA NA 9.5 NA
## [15] NA 8.3 NA 11.9 NA 7.2
apply
apply(bdd, MARGIN = 2, FUN = mean, na.rm = TRUE)
## [1] 10.444444 9.600000 11.555556 15.100000 13.000000 10.900000 9.100000
## [8] 15.111111 12.900000 10.000000 11.555556 12.375000 9.500000 12.444444
## [15] 8.444444 8.300000 9.750000 11.900000 10.000000 7.200000
apply
apply(bdd, MARGIN = 2, FUN = function(i){
mean(i, na.rm = TRUE)
})
## [1] 10.444444 9.600000 11.555556 15.100000 13.000000 10.900000 9.100000
## [8] 15.111111 12.900000 10.000000 11.555556 12.375000 9.500000 12.444444
## [15] 8.444444 8.300000 9.750000 11.900000 10.000000 7.200000
lapply
lapply
Aplicar una function
a todos los elementos de una list
(data.frame
es una list
).
lapply
myList <- list(
a = sample(1:100, size = 10),
b = sample(1:100, size = 10),
c = sample(1:100, size = 10),
d = sample(1:100, size = 10),
e = sample(1:100, size = 10)
)
print(myList)
## $a
## [1] 73 68 96 60 56 80 52 62 95 89
##
## $b
## [1] 71 40 74 90 32 14 68 1 34 67
##
## $c
## [1] 85 33 73 98 99 25 43 67 21 100
##
## $d
## [1] 78 84 63 3 77 46 93 11 2 98
##
## $e
## [1] 30 5 33 26 7 9 49 62 89 48
lapply
lapply(myList, FUN = mean)
## $a
## [1] 73.1
##
## $b
## [1] 49.1
##
## $c
## [1] 64.4
##
## $d
## [1] 55.5
##
## $e
## [1] 35.8
lapply
myList <- list(
a = sample(c(1:5, NA), size = 10, replace = TRUE),
b = sample(c(1:5, NA), size = 10, replace = TRUE),
c = sample(c(1:5, NA), size = 10, replace = TRUE),
d = sample(c(1:5, NA), size = 10, replace = TRUE),
e = sample(c(1:5, NA), size = 10, replace = TRUE)
)
print(myList)
## $a
## [1] 4 NA 1 3 3 NA NA 5 2 4
##
## $b
## [1] 4 NA 3 4 3 4 1 3 2 3
##
## $c
## [1] 3 3 5 4 2 2 5 4 2 5
##
## $d
## [1] 3 1 3 NA 1 4 2 5 4 2
##
## $e
## [1] NA 3 5 3 NA 2 4 2 4 1
lapply
lapply(myList, FUN = mean)
## $a
## [1] NA
##
## $b
## [1] NA
##
## $c
## [1] 3.5
##
## $d
## [1] NA
##
## $e
## [1] NA
lapply
lapply(myList, FUN = mean, na.rm = TRUE)
## $a
## [1] 3.142857
##
## $b
## [1] 3
##
## $c
## [1] 3.5
##
## $d
## [1] 2.777778
##
## $e
## [1] 3
lapply
lapply(myList, FUN = function(i){
mean(i, na.rm = TRUE)
})
## $a
## [1] 3.142857
##
## $b
## [1] 3
##
## $c
## [1] 3.5
##
## $d
## [1] 2.777778
##
## $e
## [1] 3
lapply
lapply(myList, FUN = function(i){
m <- mean(i, na.rm = TRUE)
if(m > 3){
return("grande")
}else{
return("pequeño")
}
})
## $a
## [1] "grande"
##
## $b
## [1] "pequeño"
##
## $c
## [1] "grande"
##
## $d
## [1] "pequeño"
##
## $e
## [1] "pequeño"
lapply
Numero de datos faltantes:
lapply(myList, FUN = function(i){
sum(is.na(i))
})
## $a
## [1] 3
##
## $b
## [1] 1
##
## $c
## [1] 0
##
## $d
## [1] 1
##
## $e
## [1] 2
sapply
sapply
sapply
es una forma de lapply
con intentos para simplificar el resultado cuando posible (por ejemplo devolver un vector
en lugar de un list
cuando posible).
sapply
lapply(myList, FUN = function(i){
sum(is.na(i))
})
## $a
## [1] 3
##
## $b
## [1] 1
##
## $c
## [1] 0
##
## $d
## [1] 1
##
## $e
## [1] 2
sapply
sapply(myList, FUN = function(i){
sum(is.na(i))
})
## a b c d e
## 3 1 0 1 2
sapply
Sacar el elemento “n” de una list
:
sapply(myList, FUN = '[[', 2)
## a b c d e
## NA NA 3 1 3
sapply
myDF <- data.frame(
a = sample(c(1:5, NA), size = 10, replace = TRUE),
b = sample(c(1:5, NA), size = 10, replace = TRUE),
c = sample(c(1:5, NA), size = 10, replace = TRUE),
d = sample(c(1:5, NA), size = 10, replace = TRUE),
e = sample(c(1:5, NA), size = 10, replace = TRUE)
)
print(myDF)
## a b c d e
## 1 1 5 1 4 4
## 2 2 3 2 5 3
## 3 1 2 1 5 2
## 4 3 3 3 1 3
## 5 5 3 3 5 2
## 6 1 2 3 5 5
## 7 1 1 NA 3 4
## 8 5 3 NA 2 4
## 9 NA 2 4 3 2
## 10 5 4 3 4 2
sapply
sapply(myDF, FUN = function(i){
sum(is.na(i))
})
## a b c d e
## 1 0 2 0 0
sapply
pruebaTiempo <- lapply(
seq(from = 1000, to = 30000, by = 1000),
function(sampleSize){
sapply(1:30, function(repet){
startTime <- Sys.time()
guessNumber(mySample = 1:sampleSize)
return(Sys.time() - startTime)
})
})
sapply
tapply
tapply
col0 <- sample(LETTERS[1:5], size = 1000, replace = TRUE)
col1 <- rnorm(n = 1000, mean = 10, sd = 0.5)
col2 <- rlnorm(n = 1000, meanlog = 10, sdlog = 0.5)
col3 <- rgamma(n = 1000, shape = 10, rate = 0.5)
dfCol <- data.frame(col0, col1, col2, col3)
print(head(dfCol, n = 10))
## col0 col1 col2 col3
## 1 E 10.214841 47273.13 19.53360
## 2 B 10.161125 18761.21 21.31726
## 3 A 9.963536 23280.13 31.25191
## 4 E 10.136935 54161.22 13.70548
## 5 B 10.446320 6520.05 23.58053
## 6 A 9.582148 27207.82 17.60784
## 7 E 9.447795 18704.42 19.38604
## 8 B 10.419673 30796.01 28.34310
## 9 A 10.074750 26959.06 24.33891
## 10 E 10.813097 21645.99 20.52869
tapply
tapply(dfCol$col1, INDEX = dfCol$col0, FUN = summary)
## $A
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.522 9.700 9.983 10.003 10.270 11.247
##
## $B
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.841 9.664 9.994 10.004 10.356 11.746
##
## $C
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.284 9.644 9.962 9.960 10.332 11.487
##
## $D
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.813 9.662 10.040 10.048 10.379 11.446
##
## $E
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.671 9.613 9.986 9.971 10.317 11.260
tapply
sapply(2:4, FUN = function(i){
tapply(dfCol[,i], INDEX = dfCol$col0, FUN = mean)
})
## [,1] [,2] [,3]
## A 10.002878 25369.50 19.91896
## B 10.003580 24616.70 20.44072
## C 9.960449 25622.16 20.35900
## D 10.048424 24673.44 19.14632
## E 9.971356 25213.84 20.27397