CC BY-NC-ND 3.0
Explicar Y
gracias a variables explicativas X
(s)
y ~ x
miX <- iris$Sepal.Length
miY <- iris$Sepal.Width
lm01 <- lm(miY ~ miX)
print(lm01)
##
## Call:
## lm(formula = miY ~ miX)
##
## Coefficients:
## (Intercept) miX
## 3.41895 -0.06188
summary(lm01)
##
## Call:
## lm(formula = miY ~ miX)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1095 -0.2454 -0.0167 0.2763 1.3338
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.41895 0.25356 13.48 <2e-16 ***
## miX -0.06188 0.04297 -1.44 0.152
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4343 on 148 degrees of freedom
## Multiple R-squared: 0.01382, Adjusted R-squared: 0.007159
## F-statistic: 2.074 on 1 and 148 DF, p-value: 0.1519
plot(miX, miY)
abline(lm01)
df <- data.frame(
miX = iris$Sepal.Length[iris$Species == "setosa"],
miY = iris$Sepal.Width[iris$Species == "setosa"])
df <- df[order(df$miX),]
lm02 <- lm(df$miY ~ df$miX)
print(lm02)
##
## Call:
## lm(formula = df$miY ~ df$miX)
##
## Coefficients:
## (Intercept) df$miX
## -0.5694 0.7985
summary(lm02)
##
## Call:
## lm(formula = df$miY ~ df$miX)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.72394 -0.18273 -0.00306 0.15738 0.51709
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.5694 0.5217 -1.091 0.281
## df$miX 0.7985 0.1040 7.681 6.71e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2565 on 48 degrees of freedom
## Multiple R-squared: 0.5514, Adjusted R-squared: 0.542
## F-statistic: 58.99 on 1 and 48 DF, p-value: 6.71e-10
plot(df$miX, df$miY)
abline(lm02)
attributes(summary(lm02))
## $names
## [1] "call" "terms" "residuals" "coefficients"
## [5] "aliased" "sigma" "df" "r.squared"
## [9] "adj.r.squared" "fstatistic" "cov.unscaled"
##
## $class
## [1] "summary.lm"
summary(lm02)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.5694327 0.5217119 -1.091470 2.805148e-01
## df$miX 0.7985283 0.1039651 7.680738 6.709843e-10
Comprobar Normalidad de los residuos (1: hist)
hist(lm02$residuals, freq = FALSE)
lines(density(lm02$residuals))
Comprobar Normalidad de los residuos (2: qqplot)
qqnorm(lm02$residuals)
qqline(lm02$residuals)
Comprobar Normalidad de los residuos (3: Shapiro-Wilk)
shapiro.test(lm02$residuals)
##
## Shapiro-Wilk normality test
##
## data: lm02$residuals
## W = 0.98681, p-value = 0.8459
Intervalos de confidencia
plot(df$miX, df$miY)
abline(lm02)
pred02 <- predict(lm02, level = 0.95, interval = "confidence")
points(df$miX, pred02[,2], lty = 2, type = 'l')
points(df$miX, pred02[,3], lty = 2, type = 'l')
Intervalos de confidencia
plot(df$miX, df$miY)
abline(lm02)
pred02 <- predict(lm02, level = 0.99, interval = "confidence")
points(df$miX, pred02[,2], lty = 2, type = 'l')
points(df$miX, pred02[,3], lty = 2, type = 'l')
plot(df$miX, df$miY)
lm03 <- lm(df$miY ~ poly(df$miX, 7))
pred03 <- predict(lm03, interval = "confidence")
points(df$miX, pred03[,1], type = 'l')
anova(lm02)
## Analysis of Variance Table
##
## Response: df$miY
## Df Sum Sq Mean Sq F value Pr(>F)
## df$miX 1 3.8821 3.8821 58.994 6.71e-10 ***
## Residuals 48 3.1587 0.0658
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Prueba de Student
b1 <- rnorm(1000)
b2 <- rnorm(1000)
t.test(b1, b2)
##
## Welch Two Sample t-test
##
## data: b1 and b2
## t = -1.9148, df = 1997.1, p-value = 0.05566
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.172070187 0.002059143
## sample estimates:
## mean of x mean of y
## -0.03452971 0.05047581
Prueba de Student
b1 <- rnorm(1000)
b2 <- rnorm(1000, mean = 2)
t.test(b1, b2)
##
## Welch Two Sample t-test
##
## data: b1 and b2
## t = -44.768, df = 1998, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.080451 -1.905826
## sample estimates:
## mean of x mean of y
## 0.02479373 2.01793181
Prueba de Wilcoxon
b1 <- rgamma(1000, shape = 1)
b2 <- rgamma(1000, shape = 2)
par(mfrow = c(1, 2))
hist(b1, col = "lightblue")
hist(b2, col = "salmon")
Prueba de Wilcoxon
wilcox.test(b1, b2)
##
## Wilcoxon rank sum test with continuity correction
##
## data: b1 and b2
## W = 245230, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
df <- data.frame(
b1 = rnorm(1000), b2 = rnorm(1000))
clust <- kmeans(df, centers = 2)
str(clust)
## List of 9
## $ cluster : int [1:1000] 1 1 2 2 1 2 1 2 1 2 ...
## $ centers : num [1:2, 1:2] -0.0889 0.0807 -0.7835 0.8004
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "1" "2"
## .. ..$ : chr [1:2] "b1" "b2"
## $ totss : num 1898
## $ withinss : num [1:2] 648 616
## $ tot.withinss: num 1264
## $ betweenss : num 634
## $ size : int [1:2] 503 497
## $ iter : int 1
## $ ifault : int 0
## - attr(*, "class")= chr "kmeans"
plot(x = df$b1, y = df$b2, col = clust$cluster, pch = 16)
clust <- kmeans(df, centers = 4)
plot(x = df$b1, y = df$b2, col = clust$cluster, pch = 16)
clust <- kmeans(df, centers = 8)
plot(x = df$b1, y = df$b2, col = clust$cluster, pch = 16)
Ajustar una formula a datos experimentales.
Messenger PS and Flitters NE (1958) Effect of constant temperature environments on the egg stage of three species of Hawaiian fruit flies. Annals of the Entomological Society of America 51, 109–119. https://en.wikipedia.org/wiki/Bactrocera_dorsalis
mDS <- data.frame(
temp = (c(55.0, 56.0, 57.0, 58.0, 60.0, 62.5,
65.0, 67.5, 70.0, 75.0, 80.0, 85.0, 87.5,
90.0, 92.5, 95.0, 96.0, 97.0, 97.5) - 32)/1.8,
devRate = 1/(c(263.0, 232.0, 170.5, 148.0,
121.3, 95.5, 74.0, 62.5, 51.5,
38.0, 30.5, 27.0, 25.0, 24.0, 23.5,
25.0, 26.5, 29.3, 34.3)/24))
plot(x = mDS$temp, y = mDS$devRate, pch = 16)
Briere, J.F., Pracros, P., le Roux, A.Y. and Pierre, S. (1999) A novel rate model of temperature-dependent development for arthropods. Environmental Entomology, 28, 22-29.
\[\begin{equation} rT = aa * T * (T - T_{min}) * (T_{max} - T)^{1/bb} \end{equation}\]model01 <- nls(formula = devRate ~ aa * temp *
(temp - Tmin) * (Tmax - temp)^(1 / bb),
data = mDS,
start = list(aa = 0.01, Tmin = 10, Tmax = 40, bb = 2))
print(model01)
## Nonlinear regression model
## model: devRate ~ aa * temp * (temp - Tmin) * (Tmax - temp)^(1/bb)
## data: mDS
## aa Tmin Tmax bb
## 9.291e-04 9.101e+00 3.673e+01 4.049e+00
## residual sum-of-squares: 0.001665
##
## Number of iterations to convergence: 8
## Achieved convergence tolerance: 3.451e-06
plot(x = mDS$temp, y = mDS$devRate, pch = 16, xlim = c(10, 40),
ylim = c(0, 1))
newTemp <- seq(from = 0, to = 50, by = 0.1)
lines(newTemp, predict(model01, newdata = list(temp = newTemp)),
col = 2, lty = 2)
cor(mDS$devRate, predict(model01))
## [1] 0.9996181
AIC(model01) # BIC
## [1] -113.5864
confint(model01)
## Waiting for profiling to be done...
## 2.5% 97.5%
## aa 8.988171e-04 9.561459e-04
## Tmin 8.569185e+00 9.594820e+00
## Tmax 3.663670e+01 3.685067e+01
## bb 3.714081e+00 4.398820e+00
1.0. Un problema de estadística:
Quisiera probar si mis datos estan espacialmente aleatorios
2.0. Una solucion con R:
2.1. Buscar en la ayuda de R
2.2. Buscar en Internet por paquetes R
2.3. Leer la documentacion
??pointpattern
# spatstat
# ...
Internet: “r testing spatial randomness” https://training.fws.gov/courses/references/tutorials/geospatial/CSP7304/documents/PointPatterTutorial.pdf
pkgCheck <- function(packages){
for(x in packages){
try(if (!require(x, character.only = TRUE)){
install.packages(x, dependencies = TRUE)
if(!require(x, character.only = TRUE)) {
stop()
}
})
}
}
pkgCheck("spatstat")
b1 <- sample(0:1000, size = 2500, replace = TRUE)
b2 <- sample(0:1000, size = 2500, replace = TRUE)
X <- ppp(x = b1, y = b2, c(0, 1000), c(0, 1000))
## Warning: data contain duplicated points
par(mfrow = c(1, 2))
plot(density(X, 10))
contour(density(X, 10), axes = FALSE)
Q <- quadratcount(X, nx = 4, ny = 3)
plot(X)
plot(Q, add = TRUE, cex = 2)
den <- density(X, sigma = 70)
plot(den)
plot(X, add = TRUE, cex = 0.5)
persp(den)
quadrat.test(X, nx = 3, ny = 2)
##
## Chi-squared test of CSR using quadrat counts
## Pearson X2 statistic
##
## data: X
## X2 = 10.208, df = 5, p-value = 0.1391
## alternative hypothesis: two.sided
##
## Quadrats: 3 by 2 grid of tiles
M <- quadrat.test(X, nx = 3, ny = 3)
plot(X)
plot(M, add = TRUE, cex = 1)
Hay mas de 10 000 paquetes R…