An Introduction to R

高磊
2013/09/24

R与SAS、SPSS、...




         R                                             SAS,SPSS...

R?Wiki!

R package

R Gallary

R官方网站(http://www.r-project.org/)

R安装

R界面


Example1---Student Dataset


读取数据



studentdata <- read.table("studentdata.txt", sep = "\t", 
                          header = TRUE)
data(studentdata)
head(studentdata)
		

输出:


  Student Height Gender Shoes Number Dvds ToSleep WakeUp Haircut  Job Drink
1       1     67 female    10      5   10    -2.5    5.5      60 30.0 water
2       2     64 female    20      7    5     1.5    8.0       0 20.0   pop
3       3     61 female    12      2    6    -1.5    7.5      48  0.0  milk
4       4     61 female     3      6   40     2.0    8.5      10  0.0 water
5       5     70   male     4      5    6     0.0    9.0      15 17.5   pop
6       6     63 female    NA      3    5     1.0    8.5      25  0.0 water

table()



#studentdata$Height
attach(studentdata)
table(Drink)
		
输出:

Drink
 milk   pop water 
  113   178   355 
		

table()


table(Drink, Gender)
		
输出:

       Gender
Drink   female male
  milk      63   50
  pop      110   68
  water    256   99
		

barplot()



barplot(table(Drink), xlab = "Drink", ylab = "Count")
		
输出:

table()



table(Dvds)
Dvds
   0    1    2  2.5    3    4    5    6    7    8    9   10   11   12   13 
  26   10   13    1   18    9   27   14   12   12    7   78    3   20    7 
  14   15   16   17 17.5   18   20   21   22 22.5   23   24   25 27.5   28 
   4   46    1    3    1    4   83    3    3    1    3    2   31    3    1 
  29   30   31   33   35   36   37   40   41   42   45   46   48   50   52 
   1   45    1    1   12    4    1   26    1    1    5    1    2   26    1 
  53   55   60   62   65   67   70   73   75   80   83   85   90   97  100 
   2    1    7    1    2    1    4    1    3    4    1    1    1    1   10 
 120  122  130  137  150  152  157  175  200  250  500  900 1000 
   2    1    2    1    6    1    1    1    8    1    1    1    1 
		

barplot()



barplot(table(Dvds))
		

分析数据:不同水平比较



> female.Haircut <- Haircut[Gender == "female"]
> male.Haircut <- Haircut[Gender == "male"]
> summary(female.Haircut)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
   0.00   15.00   25.00   34.08   45.00  180.00      19 
> summary(male.Haircut)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
   0.00    0.00   12.00   10.54   15.00   75.00       1 
		

boxplot(.~.)



m <- boxplot(Haircut ~ Gender)
m$stats
     [,1] [,2]
[1,]    0    0
[2,]   15    0
[3,]   25   12
[4,]   45   15
[5,]   90   30
		

整理数据:构建新的变量



hours.of.sleep <- WakeUp - ToSleep
summary(hours.of.sleep)
#   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#  2.500   6.500   7.500   7.385   8.500  12.500       4 
hist(hours.of.sleep, main = "")
		
输出:

plot():散点图



par(mfrow = c(1, 2))
plot(ToSleep, hours.of.sleep)
points(-0.5, 8, pch = 4, col = 2, cex = 2)
plot(jitter(ToSleep), jitter(hours.of.sleep))
		

lm():回归模型



> fit <- lm(hours.of.sleep ~ ToSleep)
> fit
Call:
lm(formula = hours.of.sleep ~ ToSleep)
Coefficients:
(Intercept)      ToSleep  
     7.9628      -0.5753  
> abline(fit)
		

predict():预测



predict(fit, data.frame(ToSleep = c(-2)))
       1 
9.113329 
points(-2, predict(fit, data.frame(ToSleep = c(-2))), cex = 2,
       col = 2, pch = 16)
	   

Example2--正态总体均值的区间估计(方差未知)




区间估计函数



set.seed(123)
norm.conf.interval <- function(i)
{
  y <- rnorm(16, 5, 1)
  n <- length(y)
  z=qt(0.975, n - 1)
  u <- mean(y)
  s <- sd(y)
  ci <- c(u - s * z / n^0.5, u + s * z / n^0.5, (u-u0)/(s/n^0.5))
}
ci <- sapply(1:200, norm.conf.interval)
		

动态示意


正直的统计学网站--统计之都论坛

Thank you!