高磊
2013/09/24
studentdata <- read.table("studentdata.txt", sep = "\t",
header = TRUE)
data(studentdata)
head(studentdata)
输出:
Student Height Gender Shoes Number Dvds ToSleep WakeUp Haircut Job Drink
1 1 67 female 10 5 10 -2.5 5.5 60 30.0 water
2 2 64 female 20 7 5 1.5 8.0 0 20.0 pop
3 3 61 female 12 2 6 -1.5 7.5 48 0.0 milk
4 4 61 female 3 6 40 2.0 8.5 10 0.0 water
5 5 70 male 4 5 6 0.0 9.0 15 17.5 pop
6 6 63 female NA 3 5 1.0 8.5 25 0.0 water
#studentdata$Height
attach(studentdata)
table(Drink)
输出:
Drink
milk pop water
113 178 355
table(Drink, Gender)
输出:
Gender
Drink female male
milk 63 50
pop 110 68
water 256 99
barplot(table(Drink), xlab = "Drink", ylab = "Count")
输出:
table(Dvds)
Dvds
0 1 2 2.5 3 4 5 6 7 8 9 10 11 12 13
26 10 13 1 18 9 27 14 12 12 7 78 3 20 7
14 15 16 17 17.5 18 20 21 22 22.5 23 24 25 27.5 28
4 46 1 3 1 4 83 3 3 1 3 2 31 3 1
29 30 31 33 35 36 37 40 41 42 45 46 48 50 52
1 45 1 1 12 4 1 26 1 1 5 1 2 26 1
53 55 60 62 65 67 70 73 75 80 83 85 90 97 100
2 1 7 1 2 1 4 1 3 4 1 1 1 1 10
120 122 130 137 150 152 157 175 200 250 500 900 1000
2 1 2 1 6 1 1 1 8 1 1 1 1
barplot(table(Dvds))
> female.Haircut <- Haircut[Gender == "female"]
> male.Haircut <- Haircut[Gender == "male"]
> summary(female.Haircut)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.00 15.00 25.00 34.08 45.00 180.00 19
> summary(male.Haircut)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.00 0.00 12.00 10.54 15.00 75.00 1
m <- boxplot(Haircut ~ Gender)
m$stats
[,1] [,2]
[1,] 0 0
[2,] 15 0
[3,] 25 12
[4,] 45 15
[5,] 90 30
hours.of.sleep <- WakeUp - ToSleep
summary(hours.of.sleep)
# Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
# 2.500 6.500 7.500 7.385 8.500 12.500 4
hist(hours.of.sleep, main = "")
输出:
par(mfrow = c(1, 2))
plot(ToSleep, hours.of.sleep)
points(-0.5, 8, pch = 4, col = 2, cex = 2)
plot(jitter(ToSleep), jitter(hours.of.sleep))
> fit <- lm(hours.of.sleep ~ ToSleep)
> fit
Call:
lm(formula = hours.of.sleep ~ ToSleep)
Coefficients:
(Intercept) ToSleep
7.9628 -0.5753
> abline(fit)
predict(fit, data.frame(ToSleep = c(-2)))
1
9.113329
points(-2, predict(fit, data.frame(ToSleep = c(-2))), cex = 2,
col = 2, pch = 16)
set.seed(123)
norm.conf.interval <- function(i)
{
y <- rnorm(16, 5, 1)
n <- length(y)
z=qt(0.975, n - 1)
u <- mean(y)
s <- sd(y)
ci <- c(u - s * z / n^0.5, u + s * z / n^0.5, (u-u0)/(s/n^0.5))
}
ci <- sapply(1:200, norm.conf.interval)