載入Packages 及設定工作區
install.packages("tree")
install.packages("party")
install.packages("randomForest")
install.packages("rpart")
install.packages("rpart.plot")
install.packages("dplyr")
library(tree)
library(party)
library(rpart)
library(randomForest)
library(dplyr)#dplyr是對資料做整理的套件
library(ggplot2)#ggplot2 是劃圖套件
library(rpart)# rpart 是決策樹套件
library(rpart.plot)#rpart.plot是決策樹的繪圖套件
setwd("/media/hsusir/DATA/Rdata Practice/09Algorithm/descion-tree-at-iris/")
data(iris)
基本資料呈現**
names(iris) # 查看變數名
## [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"
## [5] "Species"
dim(iris) # 查看列數與欄數
## [1] 150 5
summary(iris)#基本敘述性統計量
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
summary(iris$Species)
## setosa versicolor virginica
## 50 50 50
head(iris, 12) #只看前12筆資料
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
## 11 5.4 3.7 1.5 0.2 setosa
## 12 4.8 3.4 1.6 0.2 setosa
tail(iris, 12) #只看尾巴12筆資料
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 139 6.0 3.0 4.8 1.8 virginica
## 140 6.9 3.1 5.4 2.1 virginica
## 141 6.7 3.1 5.6 2.4 virginica
## 142 6.9 3.1 5.1 2.3 virginica
## 143 5.8 2.7 5.1 1.9 virginica
## 144 6.8 3.2 5.9 2.3 virginica
## 145 6.7 3.3 5.7 2.5 virginica
## 146 6.7 3.0 5.2 2.3 virginica
## 147 6.3 2.5 5.0 1.9 virginica
## 148 6.5 3.0 5.2 2.0 virginica
## 149 6.2 3.4 5.4 2.3 virginica
## 150 5.9 3.0 5.1 1.8 virginica
使用第一組套件: tree
stree = tree(Species ~ ., data = iris)
stree
## node), split, n, deviance, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 329.600 setosa ( 0.33333 0.33333 0.33333 )
## 2) Petal.Length < 2.45 50 0.000 setosa ( 1.00000 0.00000 0.00000 ) *
## 3) Petal.Length > 2.45 100 138.600 versicolor ( 0.00000 0.50000 0.50000 )
## 6) Petal.Width < 1.75 54 33.320 versicolor ( 0.00000 0.90741 0.09259 )
## 12) Petal.Length < 4.95 48 9.721 versicolor ( 0.00000 0.97917 0.02083 )
## 24) Sepal.Length < 5.15 5 5.004 versicolor ( 0.00000 0.80000 0.20000 ) *
## 25) Sepal.Length > 5.15 43 0.000 versicolor ( 0.00000 1.00000 0.00000 ) *
## 13) Petal.Length > 4.95 6 7.638 virginica ( 0.00000 0.33333 0.66667 ) *
## 7) Petal.Width > 1.75 46 9.635 virginica ( 0.00000 0.02174 0.97826 )
## 14) Petal.Length < 4.95 6 5.407 virginica ( 0.00000 0.16667 0.83333 ) *
## 15) Petal.Length > 4.95 40 0.000 virginica ( 0.00000 0.00000 1.00000 ) *
plot(stree)
text(stree)
使用第二組套件: rpart
fit2 <- rpart(Species~.,data=iris)
fit2
## n= 150
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 150 100 setosa (0.33333333 0.33333333 0.33333333)
## 2) Petal.Length< 2.45 50 0 setosa (1.00000000 0.00000000 0.00000000) *
## 3) Petal.Length>=2.45 100 50 versicolor (0.00000000 0.50000000 0.50000000)
## 6) Petal.Width< 1.75 54 5 versicolor (0.00000000 0.90740741 0.09259259) *
## 7) Petal.Width>=1.75 46 1 virginica (0.00000000 0.02173913 0.97826087) *
prp(fit2,col=2,box.col="lightgray",shadow.col="gray")
使用第三組套件: ctree
library(party)
target <- Species ~ .
cdt <- ctree(target, iris)
plot(cdt,type="simple")
使用第四組套件: Random Forest
set.seed(777)
iris.rf=randomForest(Species~.,data=iris,importane=T,proximity=T)
print(iris.rf)
##
## Call:
## randomForest(formula = Species ~ ., data = iris, importane = T, proximity = T)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 2
##
## OOB estimate of error rate: 4.67%
## Confusion matrix:
## setosa versicolor virginica class.error
## setosa 50 0 0 0.00
## versicolor 0 47 3 0.06
## virginica 0 4 46 0.08
100*(sum(predict(iris.rf)==iris$Species)/nrow(iris))
## [1] 95.33333