library(ggplot2)
library(MASS)
z <- rnorm(n=3000,mean=0.2)
z <- data.frame(1:3000,z)
names(z) <- list("ID","myVar")
z <- z[z$myVar>0,]
str(z)
## 'data.frame': 1682 obs. of 2 variables:
## $ ID : int 1 3 6 7 8 10 12 13 14 15 ...
## $ myVar: num 0.9892 0.7955 2.3365 0.0612 1.4703 ...
summary(z$myVar)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00401 0.36955 0.75814 0.88224 1.26338 3.71709
#Histogram. #Observation: R studio indicated that some were deprecated and sugested to change size to linewith so I did it.
p1 <- ggplot(data=z, aes(x=myVar, y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",linewidth=0.2)
print(p1)
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Empirical density curve
p1 <- p1 + geom_density(linetype="dotted",linewidth=0.75)
print(p1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Maximum likelihood parameters for normal
normPars <- fitdistr(z$myVar,"normal")
print(normPars)
## mean sd
## 0.88223649 0.63442715
## (0.01546923) (0.01093840)
str(normPars)
## List of 5
## $ estimate: Named num [1:2] 0.882 0.634
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ sd : Named num [1:2] 0.0155 0.0109
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ vcov : num [1:2, 1:2] 0.000239 0 0 0.00012
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "mean" "sd"
## .. ..$ : chr [1:2] "mean" "sd"
## $ n : int 1682
## $ loglik : num -1621
## - attr(*, "class")= chr "fitdistr"
normPars$estimate["mean"]
## mean
## 0.8822365
#Plot normal probability density
meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]
xval <- seq(0,max(z$myVar),len=length(z$myVar))
stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(z$myVar), args = list(mean = meanML, sd = sdML))
p1 + stat
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Plot exponential probability density
expoPars <- fitdistr(z$myVar,"exponential")
rateML <- expoPars$estimate["rate"]
stat2 <- stat_function(aes(x = xval, y = ..y..), fun = dexp, colour="blue", n = length(z$myVar), args = list(rate=rateML))
p1 + stat + stat2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Plot uniform probability density
stat3 <- stat_function(aes(x = xval, y = ..y..), fun = dunif, colour="darkgreen", n = length(z$myVar), args = list(min=min(z$myVar), max=max(z$myVar)))
p1 + stat + stat2 + stat3
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Plot gamma probability density
gammaPars <- fitdistr(z$myVar,"gamma")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produzidos
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produzidos
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produzidos
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produzidos
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produzidos
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produzidos
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produzidos
shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]
stat4 <- stat_function(aes(x = xval, y = ..y..), fun = dgamma, colour="brown", n = length(z$myVar), args = list(shape=shapeML, rate=rateML))
p1 + stat + stat2 + stat3 + stat4
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Plot beta probability density
pSpecial <- ggplot(data=z, aes(x=myVar/(max(myVar + 0.1)), y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2) +
xlim(c(0,1)) +
geom_density(size=0.75,linetype="dotted")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
betaPars <- fitdistr(x=z$myVar/max(z$myVar + 0.1),start=list(shape1=1,shape2=2),"beta")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produzidos
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produzidos
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produzidos
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produzidos
shape1ML <- betaPars$estimate["shape1"]
shape2ML <- betaPars$estimate["shape2"]
statSpecial <- stat_function(aes(x = xval, y = ..y..), fun = dbeta, colour="orchid", n = length(z$myVar), args = list(shape1=shape1ML,shape2=shape2ML))
pSpecial + statSpecial
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).
Visually gamma or beta probability density seemed to be the better fit.
#Similar to Homework 6 used the quantification of surfactant protein C (SPC) found in the bronchoalveolar lavage fluid across 16 animals.
Z <- read.table("SPC.quantification.homework8.csv",header=TRUE,sep=",")
str(Z)
## 'data.frame': 16 obs. of 1 variable:
## $ SPC: int 23001 19049 7023 12627 20170 13906 16829 29307 22435 30349 ...
summary(Z)
## SPC
## Min. : 7023
## 1st Qu.:16650
## Median :22013
## Mean :24188
## 3rd Qu.:31703
## Max. :42109
#Histogram.
p2 <- ggplot(data=Z, aes(x=SPC, y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",linewidth=0.2)
print(p2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Empirical density curve
p2 <- p2 + geom_density(linetype="dotted",size=0.75)
print(p2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Maximum likelihood parameters for normal
normPars2 <- fitdistr(Z$SPC,"normal")
print(normPars2)
## mean sd
## 24188.375 10104.202
## ( 2526.051) ( 1786.188)
str(normPars2)
## List of 5
## $ estimate: Named num [1:2] 24188 10104
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ sd : Named num [1:2] 2526 1786
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ vcov : num [1:2, 1:2] 6380932 0 0 3190466
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "mean" "sd"
## .. ..$ : chr [1:2] "mean" "sd"
## $ n : int 16
## $ loglik : num -170
## - attr(*, "class")= chr "fitdistr"
normPars2$estimate["mean"]
## mean
## 24188.38
#Plot normal probability density
meanML <- normPars2$estimate["mean"]
sdML <- normPars2$estimate["sd"]
xval <- seq(0,max(Z$SPC),len=length(Z$SPC))
stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(Z$SPC), args = list(mean = meanML, sd = sdML))
p2 + stat
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Plot exponential probability density
expoPars <- fitdistr(Z$SPC,"exponential")
rateML <- expoPars$estimate["rate"]
stat2 <- stat_function(aes(x = xval, y = ..y..), fun = dexp, colour="blue", n = length(Z$SPC), args = list(rate=rateML))
p2 + stat + stat2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Plot uniform probability density
stat3 <- stat_function(aes(x = xval, y = ..y..), fun = dunif, colour="darkgreen", n = length(Z$SPC), args = list(min=min(Z$SPC), max=max(Z$SPC)))
p2 + stat + stat2 + stat3
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Plot gamma probability density #observation:due to error from fitting,
defined the value of lower so it could run. For 0.001 was still giving
error.
gammaPars <- fitdistr(Z$SPC,dgamma,list(shape=1,rate=0.1),lower=0.01)
shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]
stat4 <- stat_function(aes(x = xval, y = ..y..), fun = dgamma, colour="brown", n = length(Z$SPC), args = list(shape=shapeML, rate=rateML))
p2 + stat + stat2 + stat3 + stat4
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Plot beta probability density
pSpecial <- ggplot(data=Z, aes(x=SPC/(max(SPC + 0.1)), y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2) +
xlim(c(0,1)) +
geom_density(size=0.75,linetype="dotted")
betaPars <- fitdistr(x=Z$SPC/max(Z$SPC + 0.1),start=list(shape1=1,shape2=2),"beta")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produzidos
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produzidos
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produzidos
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produzidos
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produzidos
shape1ML <- betaPars$estimate["shape1"]
shape2ML <- betaPars$estimate["shape2"]
statSpecial <- stat_function(aes(x = xval, y = ..y..), fun = dbeta, colour="orchid", n = length(Z$SPC), args = list(shape1=shape1ML,shape2=shape2ML))
pSpecial + statSpecial
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).
Although no fit was perfect, uniform distribution showed the best model for the data.