X13 - Ejercicios

François Rebaudo, IRD francois.rebaudo@ird.fr

Marzo 2019 ; PUCE-Quito-Ecuador http://myrbooksp.netlify.com/
CC BY-NC-ND 3.0

Asociación gráfica

`plot` + `boxplot`

graphBarplotCol <- function(n){
  myX <- rnorm(n)
  myY <- rnorm(n)
  myCol <- c(2, sample(c(4, 2), size = (n - 1), replace = TRUE))
  myColors <- colorRampPalette(c("blue", "red"))(100)
  myYCut <- cut(myY, breaks = -4:4)
  myXCut <- cut(myX, breaks = -4:4)
  myYCutCol <- table(myCol, myYCut)
  myXCutCol <- table(myCol, myXCut)
  xCol <- round(
    myXCutCol[1,] / (myXCutCol[1,] + myXCutCol[2,]) * 100
  )
  xCol[is.na(xCol) | xCol == 0] <- 1
  yCol <- round(
    myYCutCol[1,] / (myYCutCol[1,] + myYCutCol[2,]) * 100
  )
  yCol[is.na(yCol) | yCol == 0] <- 1
  op <- par(no.readonly = TRUE)
  par(mar = c(2, 3, 1, 1))
  layout(matrix(c(1, 1, 0, 
                  2, 2, 3, 
                  2, 2, 3), ncol = 3, byrow = TRUE))
  barplot(table(myXCut), las = 1, col = myColors[xCol])
  plot(x = myX, y = myY, col = myCol, pch = 16, 
    xlim = c(-4, 4), ylim = c(-4, 4), cex = 1.5, 
    panel.first = grid())
  barplot(table(myYCut), las = 1, horiz = TRUE, 
    col = myColors[yCol])
  par(op)
}

`plot` + `boxplot`

Hacer lo mismo con boxplot en lugar de hist (sin colores).
Hacer lo mismo con un boxplot para los puntos azules y uno para los puntos rojos.

`plot` + `boxplot`

Hacer lo mismo con boxplot en lugar de hist (sin colores).

`plot` + `boxplot`

graphBoxplotCol <- function(n){
  myX <- rnorm(n)
  myY <- rnorm(n)
  myCol <- c(2, sample(c(4, 2), size = (n - 1), replace = TRUE))
  par(mar = c(2, 3, 1, 1))
  layout(matrix(c(1, 1, 0, 
                  2, 2, 3, 
                  2, 2, 3), ncol = 3, byrow = TRUE))
  boxplot(myX, horizontal = TRUE, ylim = c(-4, 4))
  plot(x = myX, y = myY, col = myCol, pch = 16, 
    xlim = c(-4, 4), ylim = c(-4, 4), cex = 1.5, 
    panel.first = grid())
  boxplot(myY, ylim = c(-4, 4))
}

`plot` + `boxplot`

Hacer lo mismo con un boxplot para los puntos azules y uno para los puntos rojos.

`plot` + `boxplot`

graphBoxplotCol2 <- function(n){
  myX <- rnorm(n)
  myY <- rnorm(n)
  myCol <- c(2, sample(c(4, 2), size = (n - 1), replace = TRUE))
  layout(matrix(c(1, 1, 6, 6, 
                  2, 2, 6, 6, 
                  3, 3, 4, 5, 
                  3, 3, 4, 5), ncol = 4, byrow = TRUE))
  par(mar = c(0, 2, 0, 0))
  boxplot(myX[myCol == 2], horizontal = TRUE, ylim = c(-4, 4), 
    col = "red")
  boxplot(myX[myCol == 4], horizontal = TRUE, ylim = c(-4, 4), 
    col = "blue")
  par(mar = c(2, 2, 0, 0))
  plot(x = myX, y = myY, col = myCol, pch = 16, 
    xlim = c(-4, 4), ylim = c(-4, 4), cex = 1.5, 
    panel.first = grid())
  par(mar = c(2, 0, 0, 0))
  boxplot(myY[myCol == 4], ylim = c(-4, 4), col = "blue")
  boxplot(myY[myCol == 2], ylim = c(-4, 4), col = "red")
  par(mar = c(2, 4, 1, 1))
  barplot(matrix(c(
    x = c(length(myX[myCol == 2]), length(myX[myCol == 4]))), 
    ncol = 1), beside = TRUE,
    col = c("blue", "red")
  )
}

`plot` + `boxplot`

Text mining

miTexto01 <- readLines("./myData/discurso20190109.txt")
miTexto02 <- readLines("./myData/discurso20190110.txt")
miTexto03 <- readLines("./myData/discurso20190111.txt")
miTexto04 <- readLines("./myData/discurso20190117.txt")

Text mining

misArchivosTextos <- list.files(path = "./myData", 
  pattern = "^(discurso)(.)*(\\.txt)$")
print(misArchivosTextos)

## [1] "discurso20190109.txt" "discurso20190110.txt" "discurso20190111.txt"
## [4] "discurso20190117.txt"

Text mining

misTextos <- lapply(misArchivosTextos, function(i){
  paste(readLines(paste0("./myData/", i)), 
    collapse = " ")
})

Text mining

sacarCarEsp <- function(mitexto_IN){
  mitexto_OUT <- tolower(mitexto_IN)
  mitexto_OUT <- gsub("á", replacement = "a", mitexto_OUT)
  mitexto_OUT <- gsub("í", replacement = "i", mitexto_OUT)
  mitexto_OUT <- gsub("ñ", replacement = "n", mitexto_OUT)
  mitexto_OUT <- gsub("ó", replacement = "o", mitexto_OUT)
  mitexto_OUT <- gsub("ú", replacement = "u", mitexto_OUT)
  mitexto_OUT <- gsub("é", replacement = "e", mitexto_OUT)
  mitexto_OUT <- gsub("\\—|,|;|:|\\.|\\(|\\)|%|“|”|\\-|[0-9]|¡|!|–", 
    replacement = "", mitexto_OUT)
  return(mitexto_OUT)
}

Text mining

misTextos <- lapply(misArchivosTextos, function(i){
  sacarCarEsp(
    paste(
      readLines(paste0("./myData/", i), encoding = "UTF-8"), 
    collapse = " ")
  )
})

Text mining

misTextos <- lapply(misArchivosTextos, function(i){
  misPalabras <- data.frame(
    palabra = strsplit(
      sacarCarEsp(
        paste(
          readLines(paste0("./myData/", i), encoding = "UTF-8"), 
        collapse = " ")
      ), split = " "
    )[[1]], 
  stringsAsFactors = FALSE)
})

Text mining

stopWords <- readLines("https://raw.githubusercontent.com/stopwords-iso/stopwords-es/master/raw/stop-words-spanish.txt")
stopWords_trans <- sacarCarEsp(mitexto_IN = stopWords)

Text mining

misTextosSW <- lapply(misTextos, function(i){
  ni <- i[,1][nchar(i[,1]) > 3]
  ni <- ni[!ni %in% stopWords_trans]
  ni <- sort(table(ni), decreasing = TRUE)
})

Text mining

m01 <- merge(misTextosSW[[1]], misTextosSW[[2]], 
  by.x = 1, by.y = 1, all = TRUE)
m01 <- merge(m01, misTextosSW[[3]], 
  by.x = 1, by.y = 1, all = TRUE)
m01 <- merge(m01, misTextosSW[[4]], 
  by.x = 1, by.y = 1, all = TRUE)
colnames(m01) <- c("palabra", "d1", "d2", "d3", "d4")

Text mining

# funcion para quitar caracteres especiales
sacarCarEsp <- function(mitexto_IN){
  mitexto_OUT <- tolower(mitexto_IN)
  mitexto_OUT <- gsub("á", replacement = "a", mitexto_OUT)
  mitexto_OUT <- gsub("í", replacement = "i", mitexto_OUT)
  mitexto_OUT <- gsub("ñ", replacement = "n", mitexto_OUT)
  mitexto_OUT <- gsub("ó", replacement = "o", mitexto_OUT)
  mitexto_OUT <- gsub("ú", replacement = "u", mitexto_OUT)
  mitexto_OUT <- gsub("é", replacement = "e", mitexto_OUT)
  mitexto_OUT <- gsub("\\—|,|;|:|\\.|\\(|\\)|%|“|”|\\-|[0-9]|¡|!|–", 
    replacement = "", mitexto_OUT)
  return(mitexto_OUT)
}
# lista de archivos
misArchivosTextos <- list.files(path = "./myData", 
  pattern = "^(discurso)(.)*(\\.txt)$")
# sacar tabla de uso de palabra
misTextos <- lapply(misArchivosTextos, function(i){
  misPalabras <- data.frame(
    palabra = strsplit(
      sacarCarEsp(
        paste(
          readLines(paste0("./myData/", i), encoding = "UTF-8"), 
        collapse = " ")
      ), split = " "
    )[[1]], 
  stringsAsFactors = FALSE)
})
# stop-words
stopWords <- readLines("https://raw.githubusercontent.com/stopwords-iso/stopwords-es/master/raw/stop-words-spanish.txt")
stopWords_trans <- sacarCarEsp(mitexto_IN = stopWords)
# quitar stop words y palabras < 3 y hacer frecuencias
misTextosSW <- lapply(misTextos, function(i){
  ni <- i[,1][nchar(i[,1]) > 3]
  ni <- ni[!ni %in% stopWords_trans]
  ni <- sort(table(ni), decreasing = TRUE)
})
# fusionar los archivos
m01 <- merge(misTextosSW[[1]], misTextosSW[[2]], 
  by.x = 1, by.y = 1, all = TRUE)
m01 <- merge(m01, misTextosSW[[3]], 
  by.x = 1, by.y = 1, all = TRUE)
m01 <- merge(m01, misTextosSW[[4]], 
  by.x = 1, by.y = 1, all = TRUE)
colnames(m01) <- c("palabra", "d1", "d2", "d3", "d4")

Text mining

Hacer un barplot con las 30 palabras mas frecuentes

Text mining

sumRow <- apply(m01[,2:5], MARGIN = 1, FUN = sum, 
  na.rm = TRUE)
m01 <- m01[order(sumRow, decreasing = TRUE),]
m01[is.na(m01)] <- 0

Text mining

## Loading required package: palettesForR

# pkgCheck("palettesForR")
par(mar = c(6, 2, 1, 1))
barplot(t(as.matrix(m01[1:30,2:5])), 
  names.arg = m01[,1][1:30], las = 3, 
  col = Pastels_gpl[1:4], 
  legend.text = sapply(strsplit(
    misArchivosTextos, split = "\\."), "[[", 1)
  )

Text mining

Nota: si quieren hacer analisis de texto, ya existen paquetes para hacer esto…

Desarrollo de los artrópodos

devRate

pkgCheck <- function(packages){
    for(x in packages){
        try(if (!require(x, character.only = TRUE)){
            install.packages(x, dependencies = TRUE)
            if(!require(x, character.only = TRUE)) {
                stop()
            }
        })
    }
}
pkgCheck(c("devRate"))

## Loading required package: devRate

devRate

¿Cuantas especies unicas hay para cada Orden? => graficar

bdd <- campbell_74$startVal
bddGenSpp <- data.frame(ordersp = bdd$ordersp, genSp = bdd$genSp)
sppUnicasOrden <- table(unique(bddGenSpp)$ordersp)

devRate

par(mar = c(8, 2, 1, 1))
barplot(sort(sppUnicasOrden, decreasing = TRUE), 
  las = 3)

devRate

¿Cual es el valor promedio de la temperatura base “-aa/bb” para todos los Ordenes? => graficar

tbOrden <- aggregate((-bdd$param.aa / bdd$param.bb), by = list(bdd$ordersp), FUN = mean)

devRate `barplot`

par(mar = c(8, 4, 1, 1))
barplot(sort(tbOrden[,2], decreasing = TRUE), 
  las = 3, names.arg = tbOrden[,1], 
  ylab = "Temperatura base")

devRate `boxplot`

par(mar = c(8, 4, 1, 1))
boxplot((-bdd$param.aa / bdd$param.bb) ~ bdd$ordersp, 
  ylim = c(-5, 25), las = 3, col = Windows_gpl)

Optimización y paralelización

guessNumber <- function(mySample){
  myNumber <- sample(mySample, size = 1)
  myGuess <- sample(mySample, size = 1)
  numberGuess <- 0
  success <- FALSE
  startTime <- Sys.time()
  while(Sys.time() - startTime < 5){
    if(myGuess == myNumber){
      numberGuess <- numberGuess + 1
      success <- TRUE
      break
    }else{
      myGuess <- sample(mySample, size = 1)
      numberGuess <- numberGuess + 1
    }
  }
  return(list(numberGuess, success))
}

guessNumber2 <- function(mySample){
  myNumber <- sample(mySample, size = 1)
  myGuess <- sample(mySample, size = 1)
  numberGuess <- 0
  success <- FALSE
  startTime <- Sys.time()
  while(Sys.time() - startTime < 5){
    if(myGuess == myNumber){
      numberGuess <- numberGuess + 1
      success <- TRUE
      break
    }else{
      mySample <- mySample[! mySample %in% myGuess]
      myGuess <- sample(mySample, size = 1)
      numberGuess <- numberGuess + 1
    }
  }
  return(list(numberGuess, success))
}

Optimización y paralelización

pkgCheck <- function(x){ 
  if (!require(x, character.only = TRUE)){
    install.packages(x, dependencies = TRUE)
    if(!require(x, character.only = TRUE)) {
      stop()
    }
  }
}

pkgCheck("parallel")

## Loading required package: parallel

Optimización y paralelización

seqTest <- seq(from = 1000, to = 30000, by = 1000)
numRep <- 1 # 100 ?

no_cores <- detectCores() - 1
cl <- makeCluster(no_cores)
clusterExport(cl, 
  varlist = list("guessNumber", "guessNumber2", 
    "seqTest", "numRep"))

pruebaTiempo <- parLapply(cl, seqTest,
  function(sampleSize){
  sapply(1:numRep, function(repet){
    startTime <- Sys.time()
    guessNumber(mySample = 1:sampleSize)
    return(Sys.time() - startTime)
  })
})

stopCluster(cl)

cl <- makeCluster(no_cores)
clusterExport(cl, 
  varlist = list("guessNumber", "guessNumber2", 
    "seqTest", "numRep"))

pruebaTiempo2 <- parLapply(cl, seqTest,
  function(sampleSize){
  sapply(1:numRep, function(repet){
    startTime <- Sys.time()
    guessNumber2(mySample = 1:sampleSize)
    return(Sys.time() - startTime)
  })
})

stopCluster(cl)

Optimización y paralelización

qt <- sapply(pruebaTiempo, quantile)
qt2 <- sapply(pruebaTiempo2, quantile)

plot(x = seqTest, y = qt[3,], 
  type = 'o', ylim = c(0, 5))
points(x = seqTest, y = qt[2,], 
  type = 'l', col = 1, lty = 2)
points(x = seqTest, y = qt[4,], 
  type = 'l', col = 1, lty = 2)

points(x = seqTest, y = qt2[3,], 
  type = 'o', col = 2)
points(x = seqTest, y = qt2[2,], 
  type = 'l', col = 2, lty = 2)
points(x = seqTest, y = qt2[4,], 
  type = 'l', col = 2, lty = 2)

X13 - Ejercicios

François Rebaudo, IRD francois.rebaudo@ird.fr

Marzo 2019 ; PUCE-Quito-Ecuador http://myrbooksp.netlify.com/ CC BY-NC-ND 3.0

Asociación gráfica

plot + boxplot

plot + boxplot

plot + boxplot

plot + boxplot

plot + boxplot

plot + boxplot

plot + boxplot

plot + boxplot

plot + boxplot

Text mining

Text mining

Text mining

Text mining

Text mining

Text mining

Text mining

Text mining

Text mining

Text mining

Text mining

Text mining

Text mining

Text mining

Text mining

Text mining

Desarrollo de los artrópodos

devRate

devRate

devRate

devRate

devRate barplot

devRate boxplot

Optimización y paralelización

Optimización y paralelización

Optimización y paralelización

Optimización y paralelización

Optimización y paralelización

Optimización y paralelización

Marzo 2019 ; PUCE-Quito-Ecuador http://myrbooksp.netlify.com/
CC BY-NC-ND 3.0

`plot` + `boxplot`

`plot` + `boxplot`

`plot` + `boxplot`

`plot` + `boxplot`

`plot` + `boxplot`

`plot` + `boxplot`

`plot` + `boxplot`

`plot` + `boxplot`

`plot` + `boxplot`

devRate `barplot`

devRate `boxplot`