## This script caculates the incidence rates for PM-related diseases according to GBD in US, Japan, and South Korea rm(list=ls()) setwd('~/Documents/Research/3Paper2/4ERL/Code/Data_and_Analysis/Health/WHO') ##== Read death data dth0 = read.table("Morticd10_part2", header=TRUE, sep=',') sort(unique(dth0$Country)) # Read pop data pop0 = read.table("pop", header=TRUE, sep=',') unique(pop0$Country) ##== Baseline mortality ct = c('SK','JP','US') # Country numbers are from "country_codes.csv" # Republic of Korea, Japan, United States of America ct_nb = c(3325,3160,2450) dss = c('IHD', 'CEV', 'COPD', 'LC', 'ALRI') nct = length(ct) nds = length(dss) pop = matrix(list(),nrow=nct,ncol=nds) dth = matrix(list(),nrow=nct,ncol=nds) y0.total = array(NA,c(nct,nds)) y0 = matrix(list(),nrow=nct,ncol=nds) for (i in 1:nct){ # Deaths in one country dth1 = dth0[dth0$Country==ct_nb[i],] # Population in one country pop1 = pop0[pop0$Country==ct_nb[i],] # Most recents year yr = min(max(unique(dth1$Year)),max(unique(pop1$Year))) print(yr) dth2 = dth1[dth1$Year==yr,] pop2 = pop1[pop1$Year==yr,] # Population ipp = match(c('Pop12','Pop21','Pop25'),colnames(pop2)) # pop by age group 30-75 & 80+ for IHD and CEV pop3 = as.matrix(pop2[,ipp[1]:ipp[2]]) pop4 = colSums(pop3,na.rm=TRUE) pop5 = sum(as.matrix(pop2[,(ipp[2]+1):ipp[3]]),na.rm=TRUE) pop[[i,1]] = c(pop4,pop5) pop[[i,2]] = c(pop4,pop5) # pop 30+ for COPD and LC pop[[i,3]] = sum(as.matrix(pop2[,ipp[1]:ipp[3]]),na.rm=TRUE) pop[[i,4]] = sum(as.matrix(pop2[,ipp[1]:ipp[3]]),na.rm=TRUE) # pop 5- for ALRI ipp = match(c("Pop2","Pop6"),colnames(pop1)) pop[[i,5]] = sum(as.matrix(pop2[,ipp[1]:ipp[2]]),na.rm=TRUE) # Disease matching with GBD # SK and Japan/US have different categories if (i==1){ # IHD: I20-I25 dth2$Cause[grepl("^I2",dth2$Cause)] ind = match(c("I20","I25"),dth2$Cause) ind1 = (ind[1]:(ind[2]+1)) # CEV: I60-I69 dth2$Cause[grepl("^I6",dth2$Cause)] ind = match(c("I60","I69"),dth2$Cause) ind2 = c(ind[1]:(ind[2]+1)) # COPD: J40-J47 dth2$Cause[grepl("^J4",dth2$Cause)] ind = match(c("J40","J47"),dth2$Cause) ind3 = c(ind[1]:(ind[2]+1)) # LC: C33-C34 dth2$Cause[grepl("^C3",dth2$Cause)] ind = match(c("C33","C34"),dth2$Cause) ind4 = (ind[1]:(ind[2]+1)) # ALRI: J10-J22 dth2$Cause[grepl("^J",dth2$Cause)] ind = match(c("J10","J22"),dth2$Cause) ind5 = (ind[1]:(ind[2]+1)) } else{ # IHD: I20-I25 dth2$Cause[grepl("^I2",dth2$Cause)] ind = match(c("I200","I259"),dth2$Cause) ind1 = (ind[1]:(ind[2]+1)) # CEV: I60-I69 dth2$Cause[grepl("^I6",dth2$Cause)] ind = match(c("I600","I698"),dth2$Cause) ind2 = c(ind[1]:(ind[2]+1)) # COPD: J40-J47 dth2$Cause[grepl("^J4",dth2$Cause)] ind = match(c("J40","J47"),dth2$Cause) ind3 = c((ind[1]:(ind[2]+1))) # LC: C33-C34 dth2$Cause[grepl("^C3",dth2$Cause)] ind = match(c("C33","C349"),dth2$Cause) ind4 = (ind[1]:(ind[2]+1)) # ALRI: J10-J22 dth2$Cause[grepl("^J",dth2$Cause)] ind = match(c("J100","J22"),dth2$Cause) ind5 = (ind[1]:(ind[2]+1)) } ind = list(ind1,ind2,ind3,ind4,ind5) for (j in 1:nds){ # Deaths for each ds dth3 = dth2[ind[[j]],] # Deaths by age group for IHD and CEV idt = match(c('Deaths12','Deaths21','Deaths25'),colnames(dth3)) if (j==1 | j==2){ dth4 = as.matrix(dth3[,idt[1]:idt[2]]) dth5 = colSums(dth4,na.rm=TRUE) dth6 = sum(as.matrix(dth3[,(idt[2]+1):idt[3]]),na.rm=TRUE) dth[[i,j]] = c(dth5,dth6) } else{ # Deaths 30+ for COPD and LC if (j==3 | j==4){ dth[[i,j]] = sum(as.matrix(dth3[,idt[1]:idt[3]]),na.rm=TRUE) } else{ # Deaths 5- for ALRI idt = match(c("Deaths2","Deaths6"),colnames(dth3)) dth[[i,j]] = sum(as.matrix(dth3[,idt[1]:idt[2]]),na.rm=TRUE) } } # incidence y0.total[i,j] = sum(dth[[i,j]])/sum(pop[[i,j]]) y0[[i,j]] = dth[[i,j]]/pop[[i,j]] } } colnames(y0.total) = dss rownames(y0.total) = ct print(round(y0.total*1000,3)) # IHD CEV COPD LC ALRI # SK 0.410 0.768 0.214 0.520 0.008 # JP 0.822 1.301 0.215 0.801 0.023 # US 2.300 0.767 0.722 0.899 0.019 colnames(y0) = dss rownames(y0) = ct save(y0,y0.total,file="y0_pm_ICD10.Rdata")