[.設定所需的函式庫(libraries)以及載入資料]

setwd("D:/Rdata Practice/Health Database")
system.time(load("D:/Rdata Practice/Health Database/ugicco.rda"))
##    user  system elapsed 
##    2.06    0.07    2.15
mysample1 <- ugicco[ sample( which(ugicco$ugic==0), round(0.2*length(which(ugicco$ugic==0)))), ]
mysample2 <- ugicco[ sample( which(ugicco$ugic==1), round(0.2*length(which(ugicco$ugic==1)))), ]
mysample <- rbind(mysample1, mysample2)

library(survival)

[第一部份].Kaplan-Meier method

#library(survival)
mysample$dur <- as.numeric(as.Date(mysample$end.date, "%Y%m%d") - as.Date(mysample$first.date, "%Y%m%d") + 1) / 365.25

surv1 <- survfit(Surv(dur, ugic) ~ 1, conf.type="none", data=mysample)
# surv1 <- survfit(Surv(dur, ugic==1) ~ 1, conf.type="none", data=mysample)
surv1; # summary(surv1)
## Call: survfit(formula = Surv(dur, ugic) ~ 1, data = mysample, conf.type = "none")
## 
## records   n.max n.start  events  median 
##   25077   25077   25077     650      NA
summary(surv1, times=seq(0, 5, 1))
## Call: survfit(formula = Surv(dur, ugic) ~ 1, data = mysample, conf.type = "none")
## 
##  time n.risk n.event survival  std.err
##     0  25077       0    1.000 0.000000
##     1  23567     147    0.994 0.000501
##     2  22365     245    0.983 0.000835
##     3   2360     208    0.974 0.001045
##     4     50      50    0.948 0.003933
##     5      1       0    0.948 0.003933
plot(surv1, xlab="Time", ylab="Survival Probability")

plot(surv1, xlab="Time", ylab="Survival Probability", ylim=c(0.8,1), mark.time=F)

surv2 <- survfit(Surv(dur, ugic) ~ expose, data = mysample)
plot(surv2, xlab="Time", ylab="Survival Probability", col=c(1,2))

survdiff(Surv(dur, ugic) ~ expose, data = mysample, rho=0)
## Call:
## survdiff(formula = Surv(dur, ugic) ~ expose, data = mysample, 
##     rho = 0)
## 
##              N Observed Expected (O-E)^2/E (O-E)^2/V
## expose=0 21857      334      524      68.8       542
## expose=1  3220      316      126     285.7       542
## 
##  Chisq= 542  on 1 degrees of freedom, p= 0
survdiff(Surv(dur, ugic) ~ expose, data = mysample, rho=1)
## Call:
## survdiff(formula = Surv(dur, ugic) ~ expose, data = mysample, 
##     rho = 1)
## 
##              N Observed Expected (O-E)^2/E (O-E)^2/V
## expose=0 21857      330      517      67.9       542
## expose=1  3220      311      123     284.9       542
## 
##  Chisq= 542  on 1 degrees of freedom, p= 0
# With 'rho = 0' this is the log-rank or Mantel-Haenszel test, and with 'rho = 1' it is equivalent to the Peto & Peto modification of the Gehan-Wilcoxon test."

[第一部份].Cox proportional hazards

coxph1 <- coxph(Surv(dur, ugic) ~ expose, data = mysample)
coxph1
## Call:
## coxph(formula = Surv(dur, ugic) ~ expose, data = mysample)
## 
## 
##        coef exp(coef) se(coef)    z p
## expose  1.7      5.49   0.0822 20.7 0
## 
## Likelihood ratio test=365  on 1 df, p=0  n= 25077, number of events= 650
summary(coxph1)
## Call:
## coxph(formula = Surv(dur, ugic) ~ expose, data = mysample)
## 
##   n= 25077, number of events= 650 
## 
##           coef exp(coef) se(coef)    z Pr(>|z|)    
## expose 1.70206   5.48525  0.08222 20.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##        exp(coef) exp(-coef) lower .95 upper .95
## expose     5.485     0.1823     4.669     6.444
## 
## Concordance= 0.657  (se = 0.007 )
## Rsquare= 0.014   (max possible= 0.4 )
## Likelihood ratio test= 365  on 1 df,   p=0
## Wald test            = 428.6  on 1 df,   p=0
## Score (logrank) test = 542.4  on 1 df,   p=0
coxph2 <- coxph(Surv(dur, ugic) ~ expose + age + id.sex, data = mysample)
coxph2
## Call:
## coxph(formula = Surv(dur, ugic) ~ expose + age + id.sex, data = mysample)
## 
## 
##           coef exp(coef) se(coef)     z       p
## expose  1.3265      3.77  0.10093 13.14 0.0e+00
## age     0.0153      1.02  0.00232  6.56 5.4e-11
## id.sexM 0.1110      1.12  0.07975  1.39 1.6e-01
## 
## Likelihood ratio test=410  on 3 df, p=0  n= 25077, number of events= 650
summary(coxph2)
## Call:
## coxph(formula = Surv(dur, ugic) ~ expose + age + id.sex, data = mysample)
## 
##   n= 25077, number of events= 650 
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)    
## expose  1.326452  3.767653 0.100928 13.143  < 2e-16 ***
## age     0.015253  1.015370 0.002325  6.561 5.36e-11 ***
## id.sexM 0.110975  1.117367 0.079748  1.392    0.164    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## expose      3.768     0.2654    3.0914     4.592
## age         1.015     0.9849    1.0108     1.020
## id.sexM     1.117     0.8950    0.9557     1.306
## 
## Concordance= 0.683  (se = 0.012 )
## Rsquare= 0.016   (max possible= 0.4 )
## Likelihood ratio test= 410.2  on 3 df,   p=0
## Wald test            = 465.9  on 3 df,   p=0
## Score (logrank) test = 584  on 3 df,   p=0
# 非類固醇類抗炎藥(NSAID)與低劑量阿司匹林聯用可升高上消化道出血(UGIB)風險

(mytable <- xtabs(~ expose + ugic, data=mysample))
##       ugic
## expose     0     1
##      0 21523   334
##      1  2904   316
prop.table(mytable, 2)
##       ugic
## expose         0         1
##      0 0.8811152 0.5138462
##      1 0.1188848 0.4861538
aggregate(mysample$dur, by=list(Group=mysample$expose), summary)
##   Group   x.Min. x.1st Qu. x.Median   x.Mean x.3rd Qu.   x.Max.
## 1     0 0.002738  2.998000 2.998000 2.761000  2.998000 2.998000
## 2     1 0.180700  2.889000 3.740000 3.293000  3.948000 6.368000