[.設定所需的函式庫(libraries)以及載入資料]
setwd("D:/Rdata Practice/Health Database")
system.time(load("D:/Rdata Practice/Health Database/ugicco.rda"))
## user system elapsed
## 2.06 0.07 2.15
mysample1 <- ugicco[ sample( which(ugicco$ugic==0), round(0.2*length(which(ugicco$ugic==0)))), ]
mysample2 <- ugicco[ sample( which(ugicco$ugic==1), round(0.2*length(which(ugicco$ugic==1)))), ]
mysample <- rbind(mysample1, mysample2)
library(survival)
[第一部份].Kaplan-Meier method
#library(survival)
mysample$dur <- as.numeric(as.Date(mysample$end.date, "%Y%m%d") - as.Date(mysample$first.date, "%Y%m%d") + 1) / 365.25
surv1 <- survfit(Surv(dur, ugic) ~ 1, conf.type="none", data=mysample)
# surv1 <- survfit(Surv(dur, ugic==1) ~ 1, conf.type="none", data=mysample)
surv1; # summary(surv1)
## Call: survfit(formula = Surv(dur, ugic) ~ 1, data = mysample, conf.type = "none")
##
## records n.max n.start events median
## 25077 25077 25077 650 NA
summary(surv1, times=seq(0, 5, 1))
## Call: survfit(formula = Surv(dur, ugic) ~ 1, data = mysample, conf.type = "none")
##
## time n.risk n.event survival std.err
## 0 25077 0 1.000 0.000000
## 1 23567 147 0.994 0.000501
## 2 22365 245 0.983 0.000835
## 3 2360 208 0.974 0.001045
## 4 50 50 0.948 0.003933
## 5 1 0 0.948 0.003933
plot(surv1, xlab="Time", ylab="Survival Probability")
plot(surv1, xlab="Time", ylab="Survival Probability", ylim=c(0.8,1), mark.time=F)
surv2 <- survfit(Surv(dur, ugic) ~ expose, data = mysample)
plot(surv2, xlab="Time", ylab="Survival Probability", col=c(1,2))
survdiff(Surv(dur, ugic) ~ expose, data = mysample, rho=0)
## Call:
## survdiff(formula = Surv(dur, ugic) ~ expose, data = mysample,
## rho = 0)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## expose=0 21857 334 524 68.8 542
## expose=1 3220 316 126 285.7 542
##
## Chisq= 542 on 1 degrees of freedom, p= 0
survdiff(Surv(dur, ugic) ~ expose, data = mysample, rho=1)
## Call:
## survdiff(formula = Surv(dur, ugic) ~ expose, data = mysample,
## rho = 1)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## expose=0 21857 330 517 67.9 542
## expose=1 3220 311 123 284.9 542
##
## Chisq= 542 on 1 degrees of freedom, p= 0
# With 'rho = 0' this is the log-rank or Mantel-Haenszel test, and with 'rho = 1' it is equivalent to the Peto & Peto modification of the Gehan-Wilcoxon test."
[第一部份].Cox proportional hazards
coxph1 <- coxph(Surv(dur, ugic) ~ expose, data = mysample)
coxph1
## Call:
## coxph(formula = Surv(dur, ugic) ~ expose, data = mysample)
##
##
## coef exp(coef) se(coef) z p
## expose 1.7 5.49 0.0822 20.7 0
##
## Likelihood ratio test=365 on 1 df, p=0 n= 25077, number of events= 650
summary(coxph1)
## Call:
## coxph(formula = Surv(dur, ugic) ~ expose, data = mysample)
##
## n= 25077, number of events= 650
##
## coef exp(coef) se(coef) z Pr(>|z|)
## expose 1.70206 5.48525 0.08222 20.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## expose 5.485 0.1823 4.669 6.444
##
## Concordance= 0.657 (se = 0.007 )
## Rsquare= 0.014 (max possible= 0.4 )
## Likelihood ratio test= 365 on 1 df, p=0
## Wald test = 428.6 on 1 df, p=0
## Score (logrank) test = 542.4 on 1 df, p=0
coxph2 <- coxph(Surv(dur, ugic) ~ expose + age + id.sex, data = mysample)
coxph2
## Call:
## coxph(formula = Surv(dur, ugic) ~ expose + age + id.sex, data = mysample)
##
##
## coef exp(coef) se(coef) z p
## expose 1.3265 3.77 0.10093 13.14 0.0e+00
## age 0.0153 1.02 0.00232 6.56 5.4e-11
## id.sexM 0.1110 1.12 0.07975 1.39 1.6e-01
##
## Likelihood ratio test=410 on 3 df, p=0 n= 25077, number of events= 650
summary(coxph2)
## Call:
## coxph(formula = Surv(dur, ugic) ~ expose + age + id.sex, data = mysample)
##
## n= 25077, number of events= 650
##
## coef exp(coef) se(coef) z Pr(>|z|)
## expose 1.326452 3.767653 0.100928 13.143 < 2e-16 ***
## age 0.015253 1.015370 0.002325 6.561 5.36e-11 ***
## id.sexM 0.110975 1.117367 0.079748 1.392 0.164
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## expose 3.768 0.2654 3.0914 4.592
## age 1.015 0.9849 1.0108 1.020
## id.sexM 1.117 0.8950 0.9557 1.306
##
## Concordance= 0.683 (se = 0.012 )
## Rsquare= 0.016 (max possible= 0.4 )
## Likelihood ratio test= 410.2 on 3 df, p=0
## Wald test = 465.9 on 3 df, p=0
## Score (logrank) test = 584 on 3 df, p=0
# 非類固醇類抗炎藥(NSAID)與低劑量阿司匹林聯用可升高上消化道出血(UGIB)風險
(mytable <- xtabs(~ expose + ugic, data=mysample))
## ugic
## expose 0 1
## 0 21523 334
## 1 2904 316
prop.table(mytable, 2)
## ugic
## expose 0 1
## 0 0.8811152 0.5138462
## 1 0.1188848 0.4861538
aggregate(mysample$dur, by=list(Group=mysample$expose), summary)
## Group x.Min. x.1st Qu. x.Median x.Mean x.3rd Qu. x.Max.
## 1 0 0.002738 2.998000 2.998000 2.761000 2.998000 2.998000
## 2 1 0.180700 2.889000 3.740000 3.293000 3.948000 6.368000