324000/5/10
6500x5
6500*5
6500*5*10
7000*5*10
75000*4
75000*4*3/4
?hluster
x<-1
good <- function() { x <- 5}
good()
print(x)
bad <- function() { x <<- 5}
bad()
print(x)
y <- seq(1, 10, length.out = 5)
(y <- seq(1, 10, length.out = 5))
y <- seq(1, 10, length.out = 5)
(y <- seq(1, 10, length.out = 5))
c(T,T,F,F) == c(T,F,T,F)
c(T,T,F,F) & c(T,F,T,F)
c(T,T,F,F) | c(T,F,T,F)
c(T,T,F,F) && c(T,F,T,F)
c(T,T,F,F) == c(T,F,T,F)
all.equal(1/5,3/5-2/5)
x=matrix (data=c(1,2,3,4) , nrow=2, ncol =2)
matrix (data=c(1,2,3,4) , nrow=2, ncol =2)
matrix (c(1,2,3,4) ,2,2,byrow =TRUE)
?cor
rnorm (50)
rnorm (50)
rnorm (50)
set.seed (1303)
rnorm (50)
rnorm (50)
rnorm (50)
set.seed (1303)
rnorm (50)
set.seed (1303)
rnorm (50)
x <- 3 * 4
x
x = 3 * 4
x
divide <- function(numerator, denominator) { numerator/denominator }
divide(2,1)
divide(denominator=2,numerator=1)
divide(denominator<-2,numerator<-1)
this_is_a_really_long_name <- 2.5
seq(1, 10)
y <- seq(1, 10, length.out = 5)
y
(y <- seq(1, 10, length.out = 5))
c(T,T,F,F) & c(T,F,T,F)
c(T,T,F,F) && c(T,F,T,F)
rep(1,10)
rep(10,1)
b<-c()
length(b)
is.null(b)
is.na(b)
?cor
c(6,'fred')
list(6,'fred')
b<-matrix(c(2,4,3,1,5,7), nrow=3,ncol=2)
b<-matrix(c(2,4,3,1,5,7), nrow=3,ncol=2)
b
(b<-matrix(c(2,4,3,1,5,7), nrow=3,ncol=2))
b
b[1,2]
b[2,1]
uciCar <- read.table(  	# Note: 1
'http://www.win-vector.com/dfiles/car.data.csv', 	# Note: 2
sep=',', 	# Note: 3
header=T 	# Note: 4
)
summary(uciCar)
str(uciCar)
324,000/5
324000/5
324000/5
10993%mod%1001
10993 %% 1001
3x143x5+4x91x4+8x77x12
3*143*5 + 4*91*4 + 8*77*12
3*143*54*91*4 + 8*77*12
3*143*54
3*143*5
4*91*4
8*77*12
5^720
2^3
?merge
authors <- data.frame(
surname = I(c("Tukey", "Venables", "Tierney", "Ripley", "McNeil")),
nationality = c("US", "Australia", "US", "UK", "Australia"),
deceased = c("yes", rep("no", 4)))
books <- data.frame(
name = I(c("Tukey", "Venables", "Tierney",
"Ripley", "Ripley", "McNeil", "R Core")),
title = c("Exploratory Data Analysis",
"Modern Applied Statistics ...",
"LISP-STAT",
"Spatial Statistics", "Stochastic Simulation",
"Interactive Data Analysis",
"An Introduction to R"),
other.author = c(NA, "Ripley", NA, NA, NA, NA,
"Venables & Smith"))
View(authors)
View(books)
(m1 <- merge(authors, books, by.x = "surname", by.y = "name"))
?boxplot
1500*1000
1500*1000/20
?boxplot
?hclust
round(3)
runif(3)
?runif
sample(1,3)
sample(1:3,3)
sample(1:3,3)
sample(1:3,3)
sample(1:3,3)
sample(1:3,3)
sample(1:3,3)
sample(1:3,3)
sample(1:3,3)
sample(1:3,3)
sample(1,2)
?sample
sample(x)
x <- 1:12
sample(x)
x <- c("1","2")
sample(x)
x <- c("1","2","3")
sample(x)
sample(x)
sample(x)
sample(x)
x <- c("littlefish0331","littlefish0331","littlefish0331","RyuChu","RyuChu","RickyLeeeee","Doppelfelix","spisdoor","y28235579","106753015","LauskiMori","liekee","hwanimi","TsaiZX","tqmisz12")
length(x)
x <- c("littlefish0331","lazurite","itsnotponpon","RyuChu","chensex","RickyLeeeee","Doppelfelix","spisdoor","y28235579","106753015","LauskiMori","liekee","hwanimi","TsaiZX","tqmisz12")
length(x)
180/length(x)
x <- c("littlefish0331","lazurite","itsnotponpon","RyuChu","chensex","RickyLeeeee","Doppelfelix","spisdoor","y28235579","106753015","LauskiMori","hwanimi","TsaiZX","tqmisz12")
x <- c("littlefish0331","lazurite","itsnotponpon","RyuChu","chensex","RickyLeeeee","Doppelfelix","spisdoor","y28235579","106753015","LauskiMori","hwanimi","TsaiZX")
x <- c("littlefish0331","lazurite","itsnotponpon","RyuChu","RickyLeeeee","Doppelfelix","spisdoor","y28235579","106753015","LauskiMori","hwanimi")
x <- c("littlefish0331","lazurite","itsnotponpon","RyuChu","RickyLeeeee","Doppelfelix","spisdoor","y28235579","106753015","LauskiMori")
length(x)
180/length(x)
sample(x)
sample(x)
sample(x)
sample(x)
sample(1:6)
sample(1:6)
sample(1:6)
x = matrix(c(3,0,0,4), nrow=2, ncol=2)
x
eigen(x)
P = matrix(c(0,-1,1,0), nrow=2, ncol=2)
P
P %*% x %*% t(P)
ker
library(tidyverse)
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut))
diamonds %>%
count(cut)
ggplot(data = diamonds) +
geom_histogram(mapping = aes(x = carat), binwidth = 0.5)
diamonds %>%
count(cut_width(carat, 0.5))
smaller <- diamonds %>%
filter(carat < 3)
ggplot(data = smaller, mapping = aes(x = carat, colour = cut)) +
geom_freqpoly(binwidth = 0.1)
ggplot(diamonds) +
geom_histogram(mapping = aes(x = y), binwidth = 0.5)
ggplot(diamonds) +
geom_histogram(mapping = aes(x = y), binwidth = 0.5) +
coord_cartesian(ylim = c(0, 50))
unusual <- diamonds %>%
filter(y < 3 | y > 20) %>%
arrange(y)
unusual
diamonds2 <- diamonds %>%
filter(between(y, 3, 20))
diamonds2 <- diamonds %>%
mutate(y = ifelse(y < 3 | y > 20, NA, y))
ggplot(data = diamonds2, mapping = aes(x = x, y = y)) +
geom_point()
ggplot(data = diamonds, mapping = aes(x = price)) +
geom_freqpoly(mapping = aes(colour = cut), binwidth = 500)
ggplot(data = diamonds, mapping = aes(x = price, y = ..density..)) +
geom_freqpoly(mapping = aes(colour = cut), binwidth = 500)
ggplot(data = diamonds, mapping = aes(x = cut, y = price)) +
geom_boxplot()
?geom_boxplot
ggplot(data = diamonds, mapping = aes(x = cut, y = price)) +
geom_boxplot(notch=T)
?boxplot
ggplot(data = mpg, mapping = aes(x = class, y = hwy)) +
geom_boxplot()
ggplot(data = mpg) +
geom_boxplot(mapping = aes(x = reorder(class, hwy, FUN = median), y = hwy))
ggplot(data = mpg) +
geom_boxplot(mapping = aes(x = reorder(class, hwy, FUN = median), y = hwy)) +
coord_flip()
ggplot(data = diamonds) +
geom_count(mapping = aes(x = cut, y = color))
diamonds %>%
count(color, cut) %>%
ggplot(mapping = aes(x = color, y = cut)) +
geom_tile(mapping = aes(fill = n))
ggplot(data = diamonds) +
geom_point(mapping = aes(x = carat, y = price))
ggplot(data = diamonds) +
geom_point(mapping = aes(x = carat, y = price), alpha = 1 / 100)
ggplot(data = smaller) +
geom_bin2d(mapping = aes(x = carat, y = price))
ggplot(data = smaller, mapping = aes(x = carat, y = price)) +
geom_boxplot(mapping = aes(group = cut_width(carat, 0.1)))
ggplot(data = faithful, mapping = aes(x = eruptions)) +
geom_freqpoly(binwidth = 0.25)
ggplot(faithful, aes(eruptions)) +
geom_freqpoly(binwidth = 0.25)
ggplot(faithful, aes(eruptions)) +
geom_freqpoly(binwidth = 0.25)
ggplot(faithful, aes(eruptions)) +
geom_freqpoly(binwidth = 0.25)
x y <- 10
46*0.7
library(fpc)
sample(1:10,2)
sample(1:11,11)
sample(1:11,11)
sample(1:11,11)
sample(1:11,11)
sample(1:6,6)
sample(1:6,6)
sample(1:6,6)
sample(1:6,6)
?sample
sample(1:4)
sample(1:4)
sample(1:4)
samepl(c(1,2,3,3))
sample(c(1,2,3,3))
sample(c(1,2,3,3))
samepl(c(1,3))
sample(c(1,3))
sample(c(1,3))
sample(c(1,3))
sample(c(1,3))
sample(c(2,4))
2^8
d<-read.table("~/Downloads/1081mid.csv",sep=";",header = T)
head(d)
d$exam
hist(d$exam)
hist(as.numeric(d$exam))
hist(as.integer(d$exam))
hist(as.integer(d$exam))
d<-read.table("~/Downloads/1081mid.csv",sep=";",header = T, as.is = T)
hist(d$exam)
hist(d$exam)
head(d)
hist(d$exam)
head(d$exam)
hist(as.integer(d$exam))
sample(1,2,3,4)
sample(c(1,2,3,4))
sample(c(1,2,3,4))
sample(c(1,2,3,4))
sample(c(1,2,3,4))
sample(c(1,2,3,4))
sample(c(1,2,3,4))
sample(c(1,2,3,4))
sample(c(1,2,3,4))
sample(c(1,2,3,4))
sample(c(1,2,3,4))
print("hello world")
help(print)
?print
x <- c(1,3,2,5)
x=c(1,6,2)
y=c(1,4,3)
length(x)
length(y)
x+y
ls()
rm(x,y)
ls()
rm(list=ls())
x <- c(1,3,2,5)
x=c(1,6,2)
y=c(1,4,3)
length(x)
length(y)
x+y
ls()
rm(x,y)
ls()
rm(list=ls())
?matrix
x=matrix(data=c(1,2,3,4) , nrow=2, ncol =2)
x=matrix(c(1,2,3,4) ,2,2)
matrix(c(1,2,3,4) ,2,2,byrow =TRUE)
sqrt(x)
x^2
x=rnorm(50)
y=x+rnorm(50, mean=50, sd=.1)
cor(x,y)
set.seed(1303)
rnorm(50)
set.seed(1303)
rnorm(10)
set.seed(1303)
rnorm(10)
set.seed (3)
y=rnorm (100)
mean(y)
var(y)
sqrt(var(y))
sd(y)
sessionInfo()
list(a='b')['a']
list(a='b')[['a']]
class(list(a='b')[['a']])
class(list(a='b')['a'])
"a"+"b"
"paste(a","b")
paste("a","b")
paste("a",list(a='b')[['a']])
paste("a",list(a='b')['a'])
class(list(a=3)['a'])
list(a=3)['a']
list(a=3)['a']+2
list(a=3)[['a']]+2
list(a=1, b=2, c=3)[c('a',’c')]
list(a=1, b=2, c=3)[c('a','c')]
d
d = data.frame(x=c(1,2,3), y=c('x','y','z'))
d
d[c(1,3)]
d[c(1,3),]
subset(d,c(T,F,T))
subset(d,d$x>2)
subset(d,d$x>1)
d <- rbind(
data.frame(group='A',converted=rbinom(100000,size=1,p=0.05)),
data.frame(group='B',converted=rbinom(10000,size=1,p=0.055))
)
tab <- table(d)
print(tab)
fisher.test(tab)
(aConversionRate <- tab['A','1']/sum(tab['A',]))
(bConversionRate <- tab['B','1']/sum(tab['B',]))
(commonRate <- sum(tab[,'1'])/sum(tab))
print(pbinom(
lower.tail=F,
q=tab['B','1']-1,
size=sum(tab['B',]),
prob=commonRate
))
estimate <- function(targetRate,difference,errorProb) {
ceiling(-log(errorProb)*targetRate/(difference^2))
}
(est <- estimate(0.045,0.004,0.05))
estimate(0.045,0.004,0.05)
estimate(0.045,0.005,0.04)
estimate(0.045,0.003,0.06)
pbinom(ceiling(0.041* 8426),8426,0.045)
pbinom(ceiling(0.041* 8425),8425,0.045)
pbinom(ceiling(0.041* 8424),8424,0.045)
pbinom(ceiling(0.041* 8423),8423,0.045)
pbinom(ceiling(0.041* 4211),4211,0.045)
pbinom(ceiling(0.041* 6000),6000,0.045)
binSearchNonPositive <- function(fEventuallyNegative) { 	# Note: 3
low <- 1
high <- low+1
while(fEventuallyNegative(high)>0) {
high <- 2*high
}
while(high>low+1) {
m <- low + (high-low) %/% 2
if(fEventuallyNegative(m)>0) {
low <- m
} else {
high <- m
}
}
high
}
actualSize <- function(targetRate,difference,errorProb) {
binSearchNonPositive(function(n) {
errorProb(targetRate,difference,n) - errorProb })
}
size <- actualSize(0.045,0.004,0.05)
errorProb <- function(targetRate,difference,size) {
pbinom(ceiling((targetRate-difference)*size),
size=size,prob=targetRate)
}
size <- actualSize(0.045,0.004,0.05)
size
print(errorProb(0.045,0.004,size))
print(errorProb(0.045,0.004,7622))
set.seed(235236)
d <- data.frame(EarnedIncome=100000*rlnorm(100),  CapitalGains=100000*rlnorm(100))
print(with(d,cor(EarnedIncome,CapitalGains)))
with(d,cor(EarnedIncome,CapitalGains,method='spearman'))
with(d,cor.test(EarnedIncome,CapitalGains,method='spearman'))
ctest <-with(d,cor.test(EarnedIncome,CapitalGains,method='spearman’)
sigr::wrapCorTest(ctest)
;
)
ctest <-with(d,cor.test(EarnedIncome,CapitalGains,method='spearman’)
sigr::wrapCorTest(ctest()
ctest <-with(d,cor.test(EarnedIncome,CapitalGains,method='spearman’)
sigr::wrapCorTest(ctest)
install.packages("sigr")
library("sigr")
sigr::wrapCorTest(ctest)
ctest <-with(d,cor.test(EarnedIncome,CapitalGains,method='spearman’)
)
)
ctest <- with(d,cor.test(EarnedIncome,CapitalGains,method='spearman’))
ctest <- with(d,cor.test(EarnedIncome,CapitalGains,method='spearman’))
ctest <- with(d,cor.test(EarnedIncome,CapitalGains,method='spearman’))
ctest <- with(d,cor.test(EarnedIncome,CapitalGains,method='spearman'))
sigr::wrapCorTest(ctest)
library('rpart')
d <- read.csv("~/Downloads/Archaeal_tfpssm.csv",header = F)
levels(d[,2])
head(d[,5600:5603])
# select subset of the data
tmp <- d[c(seq(1,700,25), seq(700,800,5)),]
# model using decision tree
model <- rpart(V2 ~ V3 + V4 + V5600 + V5601 + V5602,
data=tmp, control=rpart.control(maxdepth=4),
method="class")
# make confusion matrix tabel
resultframe <- data.frame(truth=tmp$V2,
pred=predict(model, type="class"))
(rtab <- table(resultframe))
dim(tmp)
dim(d)
with(d,cor(EarnedIncome,CapitalGains,method='spearman'))
set.seed(235236)
d <- data.frame(EarnedIncome=100000*rlnorm(100),  CapitalGains=100000*rlnorm(100))
print(with(d,cor(EarnedIncome,CapitalGains)))
with(d,cor(EarnedIncome,CapitalGains,method='spearman'))
with(d,cor.test(EarnedIncome,CapitalGains,method='spearman'))
sigr::wrapCorTest(ctest)
set.seed(235236)
d <- data.frame(EarnedIncome=100000*rlnorm(100),  CapitalGains=100000*rlnorm(100))
print(with(d,cor(c(0.1,0.2),c(0.2,0.4))))
with(d,cor(EarnedIncome,CapitalGains,method='spearman'))
with(d,cor.test((c(0.1,0.2),c(0.2,0.4)),method='spearman'))
with(d,cor(EarnedIncome,CapitalGains,method='spearman'))
with(d,cor.test(c(0.1,0.2),c(0.2,0.4)),method='spearman'))
with(d,cor(EarnedIncome,CapitalGains,method='spearman'))
with(d,cor.test(c(0.1,0.2),c(0.2,0.4)),method='spearman')
with(d,cor(EarnedIncome,CapitalGains,method='spearman'))
cor.test(c(0.1,0.2),c(0.2,0.4)),method='spearman')
with(d,cor(EarnedIncome,CapitalGains,method='spearman'))
cor.test((c(0.1,0.2),c(0.2,0.4)),method='spearman')
?cor.test
with(d,cor(EarnedIncome,CapitalGains,method='spearman'))
cor.test(c(0.1,0.2),c(0.2,0.4),method='spearman')
a <- c(0.1,0.2)
b <- c(0.3,0.4)
cor(a,a)
cor(a,b)
c <- c(0.2, 0.4)
d <- c(0.4,0.8)
cor(a,c)
cor(a,d)
data(iris)
head(iris, 3)
log.ir <- log(iris[, 1:4])
summary(iris)
ir.species <- iris[, 5]
ir.pca <- prcomp(log.ir,center = TRUE, scale. = TRUE)
?prcomp
print(ir.pca)
summary(ir.pca)
plot(ir.pca, type = "l")
library(ggbiplot)
g <- ggbiplot(ir.pca, obs.scale = 1, var.scale = 1, groups = ir.species)
g <- g + scale_color_discrete(name = '')
g <- g + theme(legend.direction = 'horizontal', legend.position = 'top')
print(g)
install.packages("ggbiplot")
setwd("~/Dropbox/13_NCCU/courses/DataScienceInPractice_資料科學實務/codes/code11.sup2.linReg")
table218 <- table(
Var218=dTrain[,'Var218'],
churn=dTrain[,outcome],
useNA='ifany')
print(table218)
load("psub.RData")
dtrain <- subset(psub,ORIGRANDGROUP >= 500)
dtest <- subset(psub,ORIGRANDGROUP < 500)
model <- lm(log(PINCP,base=10) ~ AGEP + SEX + COW + SCHL,data=dtrain)
dtest$predLogPINCP <- predict(model,newdata=dtest)
dtrain$predLogPINCP <- predict(model,newdata=dtrain)
levels(dtrain$SCHL)
coefficients(model)
summary(model)
