R basics
setwd("/Volumes/EDMS451/Spring 2022/Software Instructions")
load("Undergrad_Survey_Database.rda")
dat <- data.frame(var1 = c(4, 5, 7, 8, 9, 3, 1,
6, 7, 6, 5, 9, 2, 5,
5, 1, 2, 5, 7, 8, 3))
colnames(dat) <- "var1"
print(dat)
var1
1 4
2 5
3 7
4 8
5 9
6 3
7 1
8 6
9 7
10 6
11 5
12 9
13 2
14 5
15 5
16 1
17 2
18 5
19 7
20 8
21 3
# function to produce the frequency table
cum_freq_tab <- function(x){
freq <- table(x)
cumfreq <- cumsum(freq)
propfreq <- freq/sum(freq)
cumpropfreq <- cumfreq/sum(freq)
freq_tab <- as.data.frame(cbind(noquote(row.names(freq)), freq, cumfreq, propfreq, cumpropfreq))
colnames(freq_tab)[1] <- "value"
freq_tab <- sapply(freq_tab, as.numeric)
return(freq_tab)
}
freq_tab <- cum_freq_tab(dat$var1)
knitr::kable(round(freq_tab, 3))
value | freq | cumfreq | propfreq | cumpropfreq |
---|---|---|---|---|
1 | 2 | 2 | 0.095 | 0.095 |
2 | 2 | 4 | 0.095 | 0.190 |
3 | 2 | 6 | 0.095 | 0.286 |
4 | 1 | 7 | 0.048 | 0.333 |
5 | 5 | 12 | 0.238 | 0.571 |
6 | 2 | 14 | 0.095 | 0.667 |
7 | 3 | 17 | 0.143 | 0.810 |
8 | 2 | 19 | 0.095 | 0.905 |
9 | 2 | 21 | 0.095 | 1.000 |
mean(dat$var1) # mean
[1] 5.142857
median(dat$var1) # median
[1] 5
var(dat$var1) # variance
[1] 6.128571
sd(dat$var1) # standard deviation
[1] 2.475595
range(dat$var1) # range
[1] 1 9
min(dat$var1) # min
[1] 1
max(dat$var1) # max
[1] 9
# find the mode
Mode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
Mode(dat$var1)
[1] 5
# get a summary of the descriptive statistics
describe(dat)
vars n mean sd median trimmed mad min max range skew kurtosis
X1 1 21 5.14 2.48 5 5.18 2.97 1 9 8 -0.14 -1.17
se
X1 0.54