艙等、性別、年齡到底哪個因素才是關鍵? 有規則可詢嗎?
1.設定所需的函式庫(libraries)以及載入資料
install.packages("arules")
install.packages("arulesViz")
install.packages("grid")
library(arules)# find association rules with default settings
setwd("/media/hsusir/DATA/Rdata Practice/09Algorithm/Titanic-Survived")
load("./titanic.raw.rdata")
titanic<-titanic.raw
str(titanic)
2.rules with rhs containing “Survived” only
rules <- apriori(titanic)
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport support minlen maxlen
## 0.8 0.1 1 none FALSE TRUE 0.1 1 10
## target ext
## rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 220
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[10 item(s), 2201 transaction(s)] done [0.00s].
## sorting and recoding items ... [9 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [27 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
inspect(rules)
## lhs rhs support
## 1 {} => {Age=Adult} 0.9504771
## 2 {Class=2nd} => {Age=Adult} 0.1185825
## 3 {Class=1st} => {Age=Adult} 0.1449341
## 4 {Sex=Female} => {Age=Adult} 0.1930940
## 5 {Class=3rd} => {Age=Adult} 0.2848705
## 6 {Survived=Yes} => {Age=Adult} 0.2971377
## 7 {Class=Crew} => {Sex=Male} 0.3916402
## 8 {Class=Crew} => {Age=Adult} 0.4020900
## 9 {Survived=No} => {Sex=Male} 0.6197183
## 10 {Survived=No} => {Age=Adult} 0.6533394
## 11 {Sex=Male} => {Age=Adult} 0.7573830
## 12 {Sex=Female,Survived=Yes} => {Age=Adult} 0.1435711
## 13 {Class=3rd,Sex=Male} => {Survived=No} 0.1917310
## 14 {Class=3rd,Survived=No} => {Age=Adult} 0.2162653
## 15 {Class=3rd,Sex=Male} => {Age=Adult} 0.2099046
## 16 {Sex=Male,Survived=Yes} => {Age=Adult} 0.1535666
## 17 {Class=Crew,Survived=No} => {Sex=Male} 0.3044071
## 18 {Class=Crew,Survived=No} => {Age=Adult} 0.3057701
## 19 {Class=Crew,Sex=Male} => {Age=Adult} 0.3916402
## 20 {Class=Crew,Age=Adult} => {Sex=Male} 0.3916402
## 21 {Sex=Male,Survived=No} => {Age=Adult} 0.6038164
## 22 {Age=Adult,Survived=No} => {Sex=Male} 0.6038164
## 23 {Class=3rd,Sex=Male,Survived=No} => {Age=Adult} 0.1758292
## 24 {Class=3rd,Age=Adult,Survived=No} => {Sex=Male} 0.1758292
## 25 {Class=3rd,Sex=Male,Age=Adult} => {Survived=No} 0.1758292
## 26 {Class=Crew,Sex=Male,Survived=No} => {Age=Adult} 0.3044071
## 27 {Class=Crew,Age=Adult,Survived=No} => {Sex=Male} 0.3044071
## confidence lift
## 1 0.9504771 1.0000000
## 2 0.9157895 0.9635051
## 3 0.9815385 1.0326798
## 4 0.9042553 0.9513700
## 5 0.8881020 0.9343750
## 6 0.9198312 0.9677574
## 7 0.9740113 1.2384742
## 8 1.0000000 1.0521033
## 9 0.9154362 1.1639949
## 10 0.9651007 1.0153856
## 11 0.9630272 1.0132040
## 12 0.9186047 0.9664669
## 13 0.8274510 1.2222950
## 14 0.9015152 0.9484870
## 15 0.9058824 0.9530818
## 16 0.9209809 0.9689670
## 17 0.9955423 1.2658514
## 18 1.0000000 1.0521033
## 19 1.0000000 1.0521033
## 20 0.9740113 1.2384742
## 21 0.9743402 1.0251065
## 22 0.9242003 1.1751385
## 23 0.9170616 0.9648435
## 24 0.8130252 1.0337773
## 25 0.8376623 1.2373791
## 26 1.0000000 1.0521033
## 27 0.9955423 1.2658514
#這麼多規則,不一定全都是有用的。我只想看(“Survived=No”, “Survived=Yes”)有關的
rules <- apriori(titanic,control = list(verbose=F),parameter = list(minlen=2, supp=0.005, conf=0.8),appearance = list(rhs=c("Survived=No","Survived=Yes"),default="lhs"))
quality(rules) <- round(quality(rules), digits=3)
rules.sorted <- sort(rules, by="lift")
inspect(rules.sorted)
## lhs rhs support confidence
## 1 {Class=2nd,Age=Child} => {Survived=Yes} 0.011 1.000
## 7 {Class=2nd,Sex=Female,Age=Child} => {Survived=Yes} 0.006 1.000
## 4 {Class=1st,Sex=Female} => {Survived=Yes} 0.064 0.972
## 10 {Class=1st,Sex=Female,Age=Adult} => {Survived=Yes} 0.064 0.972
## 2 {Class=2nd,Sex=Female} => {Survived=Yes} 0.042 0.877
## 5 {Class=Crew,Sex=Female} => {Survived=Yes} 0.009 0.870
## 11 {Class=Crew,Sex=Female,Age=Adult} => {Survived=Yes} 0.009 0.870
## 8 {Class=2nd,Sex=Female,Age=Adult} => {Survived=Yes} 0.036 0.860
## 9 {Class=2nd,Sex=Male,Age=Adult} => {Survived=No} 0.070 0.917
## 3 {Class=2nd,Sex=Male} => {Survived=No} 0.070 0.860
## 12 {Class=3rd,Sex=Male,Age=Adult} => {Survived=No} 0.176 0.838
## 6 {Class=3rd,Sex=Male} => {Survived=No} 0.192 0.827
## lift
## 1 3.096
## 7 3.096
## 4 3.010
## 10 3.010
## 2 2.716
## 5 2.692
## 11 2.692
## 8 2.663
## 9 1.354
## 3 1.271
## 12 1.237
## 6 1.222
3.你會發現有些規則好像有重複性(redundant rules),所以現在我們要排除redundant rules
inspect(rules.sorted[1:2])
## lhs rhs support confidence
## 1 {Class=2nd,Age=Child} => {Survived=Yes} 0.011 1
## 7 {Class=2nd,Sex=Female,Age=Child} => {Survived=Yes} 0.006 1
## lift
## 1 3.096
## 7 3.096
## find redundant rules
subset.matrix <- is.subset(rules.sorted, rules.sorted)
subset.matrix[lower.tri(subset.matrix, diag = T)] <- NA
redundant <- colSums(subset.matrix, na.rm = T) >= 1
## which rules are redundant
which(redundant)
## {Class=2nd,Sex=Female,Age=Child,Survived=Yes}
## 2
## {Class=1st,Sex=Female,Age=Adult,Survived=Yes}
## 4
## {Class=Crew,Sex=Female,Age=Adult,Survived=Yes}
## 7
## {Class=2nd,Sex=Female,Age=Adult,Survived=Yes}
## 8
## remove redundant rules
rules.pruned <- rules.sorted[!redundant]
inspect(rules.pruned)
## lhs rhs support confidence
## 1 {Class=2nd,Age=Child} => {Survived=Yes} 0.011 1.000
## 4 {Class=1st,Sex=Female} => {Survived=Yes} 0.064 0.972
## 2 {Class=2nd,Sex=Female} => {Survived=Yes} 0.042 0.877
## 5 {Class=Crew,Sex=Female} => {Survived=Yes} 0.009 0.870
## 9 {Class=2nd,Sex=Male,Age=Adult} => {Survived=No} 0.070 0.917
## 3 {Class=2nd,Sex=Male} => {Survived=No} 0.070 0.860
## 12 {Class=3rd,Sex=Male,Age=Adult} => {Survived=No} 0.176 0.838
## 6 {Class=3rd,Sex=Male} => {Survived=No} 0.192 0.827
## lift
## 1 3.096
## 4 3.010
## 2 2.716
## 5 2.692
## 9 1.354
## 3 1.271
## 12 1.237
## 6 1.222
4.Rules about Children
rules <- apriori(titanic, control = list(verbose=F),
parameter = list(minlen=3, supp=0.002, conf=0.2),
appearance = list(default="none", rhs=c("Survived=Yes"),
lhs=c("Class=1st", "Class=2nd", "Class=3rd",
"Age=Child", "Age=Adult")))
rules.sorted <- sort(rules, by="confidence")
inspect(rules.sorted)
## lhs rhs support confidence lift
## 1 {Class=2nd,Age=Child} => {Survived=Yes} 0.010904134 1.0000000 3.0956399
## 2 {Class=1st,Age=Child} => {Survived=Yes} 0.002726034 1.0000000 3.0956399
## 5 {Class=1st,Age=Adult} => {Survived=Yes} 0.089504771 0.6175549 1.9117275
## 4 {Class=2nd,Age=Adult} => {Survived=Yes} 0.042707860 0.3601533 1.1149048
## 3 {Class=3rd,Age=Child} => {Survived=Yes} 0.012267151 0.3417722 1.0580035
## 6 {Class=3rd,Age=Adult} => {Survived=Yes} 0.068605179 0.2408293 0.7455209
5.Visualizing Association Rules
library(arulesViz)
plot(rules)
plot(rules, method="graph", control=list(type="items"))
plot(rules, method="paracoord", control=list(reorder=TRUE))