艙等、性別、年齡到底哪個因素才是關鍵? 有規則可詢嗎?

1.設定所需的函式庫(libraries)以及載入資料

install.packages("arules")
install.packages("arulesViz")
install.packages("grid")
library(arules)# find association rules with default settings

setwd("/media/hsusir/DATA/Rdata Practice/09Algorithm/Titanic-Survived")
load("./titanic.raw.rdata")
titanic<-titanic.raw
str(titanic)

2.rules with rhs containing “Survived” only

rules <- apriori(titanic)
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport support minlen maxlen
##         0.8    0.1    1 none FALSE            TRUE     0.1      1     10
##  target   ext
##   rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 220 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[10 item(s), 2201 transaction(s)] done [0.00s].
## sorting and recoding items ... [9 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [27 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
inspect(rules)
##    lhs                                   rhs           support  
## 1  {}                                 => {Age=Adult}   0.9504771
## 2  {Class=2nd}                        => {Age=Adult}   0.1185825
## 3  {Class=1st}                        => {Age=Adult}   0.1449341
## 4  {Sex=Female}                       => {Age=Adult}   0.1930940
## 5  {Class=3rd}                        => {Age=Adult}   0.2848705
## 6  {Survived=Yes}                     => {Age=Adult}   0.2971377
## 7  {Class=Crew}                       => {Sex=Male}    0.3916402
## 8  {Class=Crew}                       => {Age=Adult}   0.4020900
## 9  {Survived=No}                      => {Sex=Male}    0.6197183
## 10 {Survived=No}                      => {Age=Adult}   0.6533394
## 11 {Sex=Male}                         => {Age=Adult}   0.7573830
## 12 {Sex=Female,Survived=Yes}          => {Age=Adult}   0.1435711
## 13 {Class=3rd,Sex=Male}               => {Survived=No} 0.1917310
## 14 {Class=3rd,Survived=No}            => {Age=Adult}   0.2162653
## 15 {Class=3rd,Sex=Male}               => {Age=Adult}   0.2099046
## 16 {Sex=Male,Survived=Yes}            => {Age=Adult}   0.1535666
## 17 {Class=Crew,Survived=No}           => {Sex=Male}    0.3044071
## 18 {Class=Crew,Survived=No}           => {Age=Adult}   0.3057701
## 19 {Class=Crew,Sex=Male}              => {Age=Adult}   0.3916402
## 20 {Class=Crew,Age=Adult}             => {Sex=Male}    0.3916402
## 21 {Sex=Male,Survived=No}             => {Age=Adult}   0.6038164
## 22 {Age=Adult,Survived=No}            => {Sex=Male}    0.6038164
## 23 {Class=3rd,Sex=Male,Survived=No}   => {Age=Adult}   0.1758292
## 24 {Class=3rd,Age=Adult,Survived=No}  => {Sex=Male}    0.1758292
## 25 {Class=3rd,Sex=Male,Age=Adult}     => {Survived=No} 0.1758292
## 26 {Class=Crew,Sex=Male,Survived=No}  => {Age=Adult}   0.3044071
## 27 {Class=Crew,Age=Adult,Survived=No} => {Sex=Male}    0.3044071
##    confidence lift     
## 1  0.9504771  1.0000000
## 2  0.9157895  0.9635051
## 3  0.9815385  1.0326798
## 4  0.9042553  0.9513700
## 5  0.8881020  0.9343750
## 6  0.9198312  0.9677574
## 7  0.9740113  1.2384742
## 8  1.0000000  1.0521033
## 9  0.9154362  1.1639949
## 10 0.9651007  1.0153856
## 11 0.9630272  1.0132040
## 12 0.9186047  0.9664669
## 13 0.8274510  1.2222950
## 14 0.9015152  0.9484870
## 15 0.9058824  0.9530818
## 16 0.9209809  0.9689670
## 17 0.9955423  1.2658514
## 18 1.0000000  1.0521033
## 19 1.0000000  1.0521033
## 20 0.9740113  1.2384742
## 21 0.9743402  1.0251065
## 22 0.9242003  1.1751385
## 23 0.9170616  0.9648435
## 24 0.8130252  1.0337773
## 25 0.8376623  1.2373791
## 26 1.0000000  1.0521033
## 27 0.9955423  1.2658514
#這麼多規則,不一定全都是有用的。我只想看(“Survived=No”, “Survived=Yes”)有關的
rules <- apriori(titanic,control = list(verbose=F),parameter = list(minlen=2, supp=0.005, conf=0.8),appearance = list(rhs=c("Survived=No","Survived=Yes"),default="lhs"))


quality(rules) <- round(quality(rules), digits=3)
rules.sorted <- sort(rules, by="lift")
inspect(rules.sorted)
##    lhs                                  rhs            support confidence
## 1  {Class=2nd,Age=Child}             => {Survived=Yes} 0.011   1.000     
## 7  {Class=2nd,Sex=Female,Age=Child}  => {Survived=Yes} 0.006   1.000     
## 4  {Class=1st,Sex=Female}            => {Survived=Yes} 0.064   0.972     
## 10 {Class=1st,Sex=Female,Age=Adult}  => {Survived=Yes} 0.064   0.972     
## 2  {Class=2nd,Sex=Female}            => {Survived=Yes} 0.042   0.877     
## 5  {Class=Crew,Sex=Female}           => {Survived=Yes} 0.009   0.870     
## 11 {Class=Crew,Sex=Female,Age=Adult} => {Survived=Yes} 0.009   0.870     
## 8  {Class=2nd,Sex=Female,Age=Adult}  => {Survived=Yes} 0.036   0.860     
## 9  {Class=2nd,Sex=Male,Age=Adult}    => {Survived=No}  0.070   0.917     
## 3  {Class=2nd,Sex=Male}              => {Survived=No}  0.070   0.860     
## 12 {Class=3rd,Sex=Male,Age=Adult}    => {Survived=No}  0.176   0.838     
## 6  {Class=3rd,Sex=Male}              => {Survived=No}  0.192   0.827     
##    lift 
## 1  3.096
## 7  3.096
## 4  3.010
## 10 3.010
## 2  2.716
## 5  2.692
## 11 2.692
## 8  2.663
## 9  1.354
## 3  1.271
## 12 1.237
## 6  1.222

3.你會發現有些規則好像有重複性(redundant rules),所以現在我們要排除redundant rules

inspect(rules.sorted[1:2])
##   lhs                                 rhs            support confidence
## 1 {Class=2nd,Age=Child}            => {Survived=Yes} 0.011   1         
## 7 {Class=2nd,Sex=Female,Age=Child} => {Survived=Yes} 0.006   1         
##   lift 
## 1 3.096
## 7 3.096
## find redundant rules
subset.matrix <- is.subset(rules.sorted, rules.sorted)
subset.matrix[lower.tri(subset.matrix, diag = T)] <- NA
redundant <- colSums(subset.matrix, na.rm = T) >= 1

## which rules are redundant
which(redundant)
##  {Class=2nd,Sex=Female,Age=Child,Survived=Yes} 
##                                              2 
##  {Class=1st,Sex=Female,Age=Adult,Survived=Yes} 
##                                              4 
## {Class=Crew,Sex=Female,Age=Adult,Survived=Yes} 
##                                              7 
##  {Class=2nd,Sex=Female,Age=Adult,Survived=Yes} 
##                                              8
## remove redundant rules
rules.pruned <- rules.sorted[!redundant]
inspect(rules.pruned)
##    lhs                               rhs            support confidence
## 1  {Class=2nd,Age=Child}          => {Survived=Yes} 0.011   1.000     
## 4  {Class=1st,Sex=Female}         => {Survived=Yes} 0.064   0.972     
## 2  {Class=2nd,Sex=Female}         => {Survived=Yes} 0.042   0.877     
## 5  {Class=Crew,Sex=Female}        => {Survived=Yes} 0.009   0.870     
## 9  {Class=2nd,Sex=Male,Age=Adult} => {Survived=No}  0.070   0.917     
## 3  {Class=2nd,Sex=Male}           => {Survived=No}  0.070   0.860     
## 12 {Class=3rd,Sex=Male,Age=Adult} => {Survived=No}  0.176   0.838     
## 6  {Class=3rd,Sex=Male}           => {Survived=No}  0.192   0.827     
##    lift 
## 1  3.096
## 4  3.010
## 2  2.716
## 5  2.692
## 9  1.354
## 3  1.271
## 12 1.237
## 6  1.222

4.Rules about Children

rules <- apriori(titanic, control = list(verbose=F),
parameter = list(minlen=3, supp=0.002, conf=0.2),
appearance = list(default="none", rhs=c("Survived=Yes"),
lhs=c("Class=1st", "Class=2nd", "Class=3rd",
"Age=Child", "Age=Adult")))

rules.sorted <- sort(rules, by="confidence")
inspect(rules.sorted)
##   lhs                      rhs            support     confidence lift     
## 1 {Class=2nd,Age=Child} => {Survived=Yes} 0.010904134 1.0000000  3.0956399
## 2 {Class=1st,Age=Child} => {Survived=Yes} 0.002726034 1.0000000  3.0956399
## 5 {Class=1st,Age=Adult} => {Survived=Yes} 0.089504771 0.6175549  1.9117275
## 4 {Class=2nd,Age=Adult} => {Survived=Yes} 0.042707860 0.3601533  1.1149048
## 3 {Class=3rd,Age=Child} => {Survived=Yes} 0.012267151 0.3417722  1.0580035
## 6 {Class=3rd,Age=Adult} => {Survived=Yes} 0.068605179 0.2408293  0.7455209

5.Visualizing Association Rules

library(arulesViz)
plot(rules)

plot(rules, method="graph", control=list(type="items"))

plot(rules, method="paracoord", control=list(reorder=TRUE))