ggplot(mpg, aes(x= displ, y= hwy)) + geom_point(aes(colour(class)))
ggplot(mpg, aes(x= displ, y= hwy)) + geom_point(aes(colour(class))
)
ggplot(mpg, aes(x= displ, y= hwy)) + geom_point(aes(colour=class))
summary(mpg)
sessionInfo()
warnings()
15106 %% 15
3091 %% 15
6693 %% 15
?heatmap
require(graphics); require(grDevices)
x  <- as.matrix(mtcars)
rc <- rainbow(nrow(x), start = 0, end = .3)
cc <- rainbow(ncol(x), start = 0, end = .3)
hv <- heatmap(x, col = cm.colors(256), scale = "column",
RowSideColors = rc, ColSideColors = cc, margins = c(5,10),
xlab = "specification variables", ylab =  "Car Models",
main = "heatmap(<Mtcars data>, ..., scale = \"column\")")
utils::str(hv) # the two re-ordering index vectors
## no column dendrogram (nor reordering) at all:
heatmap(x, Colv = NA, col = cm.colors(256), scale = "column",
RowSideColors = rc, margins = c(5,10),
xlab = "specification variables", ylab =  "Car Models",
main = "heatmap(<Mtcars data>, ..., scale = \"column\")")
head(x)
?heatmap
?heatmpa
?heatmap
c('a','b')[[7]]
c('a','b')[7]
list(a='b')['a']
sin(pi / 2)
x <- 3 * 4
x
x = 3*4
x
x <- 3
x <- 3 *4
x
divide <- function(numerator, denominator) { numerator/denominator }
divide(2,1)
divide(denominator=2,numerator=1)
divide(denominator<-2,numerator<-1)  # yields 2, a wrong answer
this_is_a_really_long_name <- 2.5
this_is_a_really_long_name
x<-1
good <- function() { x <- 5}
good()
print(x)
## [1] 1
bad <- function() { x <<- 5}
bad()
print(x)
## [1] 5
seq(1,10)
y <- seq(1, 10, length.out = 5)
(y <- seq(1, 10, length.out = 5))
y <- seq(1, 10, length.out = 5)
y
(y <- seq(1, 10, length.out = 5))
my_variable <- 10
my_varıable
library(tidyverse)
ggplot(dota = mpg) + geom_point(mapping = aes(x = displ, y = hwy))
fliter(mpg, cyl = 8)
filter(diamond, carat > 3)
install.packages("tidyverse")
library(tidyverse)
ggplot(dota = mpg) + geom_point(mapping = aes(x = displ, y = hwy))
fliter(mpg, cyl = 8)
filter(diamond, carat > 3)
ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy))
warnings()
fliter(mpg, cyl = 8)
filter(mpg, cyl = 8)
filter(mpg, cyl == 8)
filter(diamond, carat > 3)
filter(diamonds, carat > 3)
c(T,T,F,F) == c(T,F,T,F)
c(T,T,F,F) & c(T,F,T,F)
c(T,T,F,F) | c(T,F,T,F)
c(T,T,F,F) & c(T,F,T,F)
c(T,T,F,F) && c(T,F,T,F)
c(T,T,F,F) && c(F,T,F,T)
c(T,T,F,F) == c(T,F,T,F)
identical(c(T,T,F,F),c(T,F,T,F))
all.equal(c(T,T,F,F),c(T,F,T,F))
class(c(1,2))
ls()
vec <- c(1,2)
fun <- function(v) { v[[2]]<-5; print(v)}
fun(vec)
print(vec)
1/5
3/5-2/5
1/5==3/5-2/5
sprintf("%.20f",1/5)
sprintf("%.20f",3/5-2/5)
all.equal(1/5,3/5-2/5)
1:2*5
1:(2*5)
rep(1,10)
rep(10,1)
a<-c(1:10)
length(a)
a[1]
a[[1]]
a[11]
a[[11]]
b<-c()
length(b)
is.null(b)
is.na(b)
c(6,'fred')
list(6,'fred')
x <- list('a'=6,b='fred')
names(x)
x$a
x$b
x[['a']]
c('a','b')[[7]]
c('a','b')[7]
c('a','b')[[7]]
c('a','b')[1]
c('a','b')[[1]]
c('a','b')[1]
list(a='b')['a']
list(a='b')[['a']]
class(list(a='b')['a'])
class(list(a='b')[['a'])
class(list(a='b')[['a']])
list(a='b')[c('a','a')]
list(a='b')[[c('a','a')]]
b<-matrix(c(2,4,3,1,5,7), nrow=3,ncol=2)
b[1,2]
b[2,1]
b
t(b)
cbind(b, b)
rbind(b, b)
d = data.frame(x=c(1,2,3), y=c('x','y','z'))
d
str(d)
subset(d,c(T,F,T))
d
str(d)
factor('red',levels=c('red','orange'))
factor('apple',levels=c('red','orange'))
uciCar <- read.table(  	# Note: 1
'http://www.win-vector.com/dfiles/car.data.csv', 	# Note: 2
sep=',', 	# Note: 3
header=T 	# Note: 4
)
head(uciCar)
max(uciCar$doors)
max(as.integer(uciCar$doors))
str(uciCar)
class(uciCar)
summary(uciCar)
dim(uciCar)
summary(as.integer(uciCar$doors))
d <- read.table(paste('http://archive.ics.uci.edu/ml/','machine-learning-databases/statlog/german/german.data',sep=''),stringsAsFactors=F,header=F)
d
d <- read.table("~/Dropbox/13_NCCU/courses/DataScienceInPractice_資料科學實務/105.2/codes/code03/german.data.txt",stringsAsFactors=F,header=F)
d
colnames(d) <- c('Status.of.existing.checking.account',
'Duration.in.month',  'Credit.history', 'Purpose',
'Credit.amount', 'Savings account/bonds',
'Present.employment.since',
'Installment.rate.in.percentage.of.disposable.income',
'Personal.status.and.sex', 'Other.debtors/guarantors',
'Present.residence.since', 'Property', 'Age.in.years',
'Other.installment.plans', 'Housing',
'Number.of.existing.credits.at.this.bank', 'Job',
'Number.of.people.being.liable.to.provide.maintenance.for',
'Telephone', 'foreign.worker', 'Good.Loan')
d$Good.Loan <- as.factor(ifelse(d$Good.Loan==1,'GoodLoan','BadLoan'))
mapping <- list(
'A40'='car (new)',
'A41'='car (used)',
'A42'='furniture/equipment',
'A43'='radio/television',
'A44'='domestic appliances')
head(d)
for(i in 1:(dim(d))[2]) {             	# Note: 1
if(class(d[,i])=='character') {
d[,i] <- as.factor(as.character(mapping[d[,i]]))  	# Note: 2
}
}
summary(d$Purpose)
table(d$Purpose,d$Good.Loan)
2^15
2^14
?axis
?png
?log
log(0)
double.xmin
10-e10
e-10
10^-10
head(mtcars)
str(mtcars)
mtcars$mpg
hist(mtcars$mpg)
d <- density(mtcars$mpg) # returns the density data
plot(d)
?density
library(ggplot2)
ggplot(data=chol, aes(chol$AGE)) + geom_histogram()
ggplot(data=mtcars, aes(mtcars$mpg)) + geom_histogram()
r()
q()
1/5
##[1] 0.2
3/5-2/5
##[1] 0.2
1/5==3/5-2/5
##[1] FALSE
sprintf("%.20f",1/5)
##[1] "0.20000000000000001110”
sprintf("%.20f",3/5-2/5)
##[1] "0.19999999999999995559"
all.equal(1/5,3/5-2/5)
##[1] TRUE
3/5
1/5
##[1] 0.2
3/5-2/5
##[1] 0.2
1/5==3/5-2/5
##[1] FALSE
sprintf("%.20f",1/5)
##[1] "0.20000000000000001110”
sprintf("%.20f",3/5-2/5)
##[1] "0.19999999999999995559"
all.equal(1/5,3/5-2/5)
##[1] TRUE
?plot
x<-1:10
plot(x,x)
1/5
##[1] 0.2
3/5-2/5
##[1] 0.2
1/5==3/5-2/5
##[1] FALSE
sprintf("%.20f",1/5)
##[1] "0.20000000000000001110”
sprintf("%.20f",3/5-2/5)
##[1] "0.19999999999999995559"
all.equal(1/5,3/5-2/5)
##[1] TRUE
1/5
3/5-2/5
1/5==3/5-2/5
sprintf("%.20f",1/5)
sprintf("%.20f",3/5-2/5)
all.equal(1/5,3/5-2/5)
z<-(1>0)?"a":"b"
11000000-10720000
(11000000-10720000)/160000
?polygon
2^8
2^7
2^3
log10(2.57646712135004e-05)
49548335/1000000
49548335/10000000
49548335/1000000
log10(49548335/1000000)
2.57646712135004e-05
1/3575368
324000/4=
c
324000/5
324000/5/10
6500x5
6500*5
6500*5*10
7000*5*10
75000*4
75000*4*3/4
?hluster
x<-1
good <- function() { x <- 5}
good()
print(x)
bad <- function() { x <<- 5}
bad()
print(x)
y <- seq(1, 10, length.out = 5)
(y <- seq(1, 10, length.out = 5))
y <- seq(1, 10, length.out = 5)
(y <- seq(1, 10, length.out = 5))
c(T,T,F,F) == c(T,F,T,F)
c(T,T,F,F) & c(T,F,T,F)
c(T,T,F,F) | c(T,F,T,F)
c(T,T,F,F) && c(T,F,T,F)
c(T,T,F,F) == c(T,F,T,F)
all.equal(1/5,3/5-2/5)
x=matrix (data=c(1,2,3,4) , nrow=2, ncol =2)
matrix (data=c(1,2,3,4) , nrow=2, ncol =2)
matrix (c(1,2,3,4) ,2,2,byrow =TRUE)
?cor
rnorm (50)
rnorm (50)
rnorm (50)
set.seed (1303)
rnorm (50)
rnorm (50)
rnorm (50)
set.seed (1303)
rnorm (50)
set.seed (1303)
rnorm (50)
x <- 3 * 4
x
x = 3 * 4
x
divide <- function(numerator, denominator) { numerator/denominator }
divide(2,1)
divide(denominator=2,numerator=1)
divide(denominator<-2,numerator<-1)
this_is_a_really_long_name <- 2.5
seq(1, 10)
y <- seq(1, 10, length.out = 5)
y
(y <- seq(1, 10, length.out = 5))
c(T,T,F,F) & c(T,F,T,F)
c(T,T,F,F) && c(T,F,T,F)
rep(1,10)
rep(10,1)
b<-c()
length(b)
is.null(b)
is.na(b)
?cor
c(6,'fred')
list(6,'fred')
b<-matrix(c(2,4,3,1,5,7), nrow=3,ncol=2)
b<-matrix(c(2,4,3,1,5,7), nrow=3,ncol=2)
b
(b<-matrix(c(2,4,3,1,5,7), nrow=3,ncol=2))
b
b[1,2]
b[2,1]
uciCar <- read.table(  	# Note: 1
'http://www.win-vector.com/dfiles/car.data.csv', 	# Note: 2
sep=',', 	# Note: 3
header=T 	# Note: 4
)
summary(uciCar)
str(uciCar)
324,000/5
324000/5
324000/5
10993%mod%1001
10993 %% 1001
3x143x5+4x91x4+8x77x12
3*143*5 + 4*91*4 + 8*77*12
3*143*54*91*4 + 8*77*12
3*143*54
3*143*5
4*91*4
8*77*12
5^720
2^3
?merge
authors <- data.frame(
surname = I(c("Tukey", "Venables", "Tierney", "Ripley", "McNeil")),
nationality = c("US", "Australia", "US", "UK", "Australia"),
deceased = c("yes", rep("no", 4)))
books <- data.frame(
name = I(c("Tukey", "Venables", "Tierney",
"Ripley", "Ripley", "McNeil", "R Core")),
title = c("Exploratory Data Analysis",
"Modern Applied Statistics ...",
"LISP-STAT",
"Spatial Statistics", "Stochastic Simulation",
"Interactive Data Analysis",
"An Introduction to R"),
other.author = c(NA, "Ripley", NA, NA, NA, NA,
"Venables & Smith"))
View(authors)
View(books)
(m1 <- merge(authors, books, by.x = "surname", by.y = "name"))
?boxplot
1500*1000
1500*1000/20
?boxplot
?hclust
round(3)
runif(3)
?runif
sample(1,3)
sample(1:3,3)
sample(1:3,3)
sample(1:3,3)
sample(1:3,3)
sample(1:3,3)
sample(1:3,3)
sample(1:3,3)
sample(1:3,3)
sample(1:3,3)
sample(1,2)
?sample
sample(x)
x <- 1:12
sample(x)
x <- c("1","2")
sample(x)
x <- c("1","2","3")
sample(x)
sample(x)
sample(x)
sample(x)
x <- c("littlefish0331","littlefish0331","littlefish0331","RyuChu","RyuChu","RickyLeeeee","Doppelfelix","spisdoor","y28235579","106753015","LauskiMori","liekee","hwanimi","TsaiZX","tqmisz12")
length(x)
x <- c("littlefish0331","lazurite","itsnotponpon","RyuChu","chensex","RickyLeeeee","Doppelfelix","spisdoor","y28235579","106753015","LauskiMori","liekee","hwanimi","TsaiZX","tqmisz12")
length(x)
180/length(x)
x <- c("littlefish0331","lazurite","itsnotponpon","RyuChu","chensex","RickyLeeeee","Doppelfelix","spisdoor","y28235579","106753015","LauskiMori","hwanimi","TsaiZX","tqmisz12")
x <- c("littlefish0331","lazurite","itsnotponpon","RyuChu","chensex","RickyLeeeee","Doppelfelix","spisdoor","y28235579","106753015","LauskiMori","hwanimi","TsaiZX")
x <- c("littlefish0331","lazurite","itsnotponpon","RyuChu","RickyLeeeee","Doppelfelix","spisdoor","y28235579","106753015","LauskiMori","hwanimi")
x <- c("littlefish0331","lazurite","itsnotponpon","RyuChu","RickyLeeeee","Doppelfelix","spisdoor","y28235579","106753015","LauskiMori")
length(x)
180/length(x)
sample(x)
sample(x)
sample(x)
sample(x)
sample(1:6)
sample(1:6)
sample(1:6)
x = matrix(c(3,0,0,4), nrow=2, ncol=2)
x
eigen(x)
P = matrix(c(0,-1,1,0), nrow=2, ncol=2)
P
P %*% x %*% t(P)
ker
spamD <- read.table('spamD.tsv',header=T,sep='\t')
spamTrain <- subset(spamD,spamD$rgroup>=10)
spamTest <- subset(spamD,spamD$rgroup<10)
setwd("~/Dropbox/13_NCCU/courses/DataScienceInPractice_資料科學實務/1061/codes/code03/")
spamD <- read.table('spamD.tsv',header=T,sep='\t')
spamTrain <- subset(spamD,spamD$rgroup>=10)
spamTest <- subset(spamD,spamD$rgroup<10)
spamVars <- setdiff(colnames(spamD),list('rgroup','spam'))
spamFormula <- as.formula(paste('spam=="spam"',
paste(spamVars,collapse=' + '),sep=' ~ '))
spamModel <- glm(spamFormula,family=binomial(link='logit'), data=spamTrain)
spamTrain$pred <- predict(spamModel,newdata=spamTrain, type='response')
spamTest$pred <- predict(spamModel,newdata=spamTest, type='response')
print(with(spamTrain,table(y=spam,glmPred=pred>0.5)))
print(with(spamTest,table(y=spam,glmPred=pred>0.5)))
sample <- spamTest[c(7,35,224,327),c('spam','pred')]
print(sample)
cM <- table(truth=spamTest$spam, prediction=spamTest$pred>0.5)
print(cM)
ggplot(data=spamTest) +
geom_density(aes(x=pred,color=spam,linetype=spam))
library(ggplot2)
ggplot(data=spamTest) +
geom_density(aes(x=pred,color=spam,linetype=spam))
library('ROCR')
eval <- prediction(spamTest$pred,spamTest$spam)
plot(performance(eval,"tpr","fpr"))
print(attributes(performance(eval,'auc'))$y.values[[1]])
likeli_model <- sum(ifelse(spamTest$spam=='spam', log(spamTest$pred), log(1-spamTest$pred)))
likeli_model/dim(spamTest)[[1]]
pNull <- sum(ifelse(spamTest$spam=='spam',1,0))/dim(spamTest)[[1]]
likeli_nullModel <- sum(ifelse(spamTest$spam=='spam',1,0))*log(pNull) + sum(ifelse(spamTest$spam=='spam',0,1))*log(1-pNull)
likeli_model
likeli_nullModel
log(0.9)
log(0.1)
set.seed(32297)
d <- data.frame(x=runif(100),y=runif(100))
clus <- kmeans(d,centers=5)
d$cluster <- clus$cluster
table(d$cluster)
library('ggplot2'); library('grDevices')
h <- do.call(rbind,
lapply(unique(clus$cluster),
function(c) { f <- subset(d,cluster==c); f[chull(f),]}))
ggplot() +
geom_text(data=d,aes(label=cluster,x=x,y=y,
color=cluster),size=3)  +
geom_polygon(data=h,aes(x=x,y=y,group=cluster,fill=as.factor(cluster)),
alpha=0.4,linetype=0) +
theme(legend.position = "none")
?chull
?stop
tst1 <- function(...) stop("dummy error")
try(tst1(1:10, long, calling, expression))
tst2 <- function(...) stop("dummy error", call. = FALSE)
try(tst2(1:10, longcalling, expression, but.not.seen.in.Error))
?dcast
library('reshape2')
?dcast
library('reshape2')
n <- dim(d)[[1]]
pairs <- data.frame(
ca = as.vector(outer(1:n,1:n,function(a,b) d[a,'cluster'])),
cb = as.vector(outer(1:n,1:n,function(a,b) d[b,'cluster'])),
dist = as.vector(outer(1:n,1:n,function(a,b)
sqrt((d[a,'x']-d[b,'x'])^2 + (d[a,'y']-d[b,'y'])^2)))
)
dcast(pairs,ca~cb,value.var='dist',mean)
