## This script caculates the incidence rates for oz-related diseases according to GBD in US, Japan, and South Korea
rm(list=ls())
library(abind)
setwd('~/Documents/Research/3Paper2/4ERL/Code/Data_and_Analysis/Health/WHO')

##== Read death data
dth0 = read.table("Morticd10_part2", header=TRUE, sep=',')
sort(unique(dth0$Country))

# Read pop data
pop0 = read.table("pop", header=TRUE, sep=',')
sort(unique(pop0$Country))

##== Baseline mortality
ct  = c('SK','JP','US')
# Country numbers are from "country_codes.csv"
# Republic of Korea, Japan, United States of America
ct_nb = c(3325,3160,2450)
dss = c('Circ', 'Resp')
nct = length(ct)
nds = length(dss)

pop = array(NA,nct)
dth = array(NA,c(nct,nds))
names(pop) = ct
rownames(dth) = ct
colnames(dth) = dss
for (i in 1:nct){
    # Deaths in one country    
    dth1 = dth0[dth0$Country==ct_nb[i],]
    # Population in one country
    pop1 = pop0[pop0$Country==ct_nb[i],]
    # Most recent year 
    yr = min(max(unique(dth1$Year)),max(unique(pop1$Year)))
    print(yr)
    
    dth2 = dth1[dth1$Year==yr,]
    pop2 = pop1[pop1$Year==yr,]

    # Pop 30+
    ipp  = match(c('Pop12','Pop25'),colnames(pop2))
    pop[i] = sum(as.matrix(pop2[,ipp[1]:ipp[2]]),na.rm=TRUE)

    # Disease mapping
    # circulatory: I00–I99, E10-E14
    # respiratory: J00–J98
    y=as.character(dth2$Cause)
        
    # SK and Japan/US have different categories   
    if (i==1){
       y[grepl("^I",y)]
       y[grepl("^E",y)]
       # circulatory
       ind  = match(c("I05","I99"),dth2$Cause) 
       ind1 = (ind[1]:(ind[2]+1))

       ind  = match(c("E10","E14"),dth2$Cause)
       ind2 = c(ind[1]:(ind[2]+1))
       ind.1 = c(ind1,ind2)
       
       y[grepl("^J",y)]
       # respiratory
       ind  = match(c("J00","J98"),dth2$Cause) 
       ind.2 = c(ind[1]:(ind[2]+1))

    } else{
       y[grepl("^I",y)]
       y[grepl("^E",y)]
       # circulatory
       ind  = match(c("I00","I99"),dth2$Cause) 
       ind1 = (ind[1]:(ind[2]+1))

       ind  = match(c("E100","E149"),dth2$Cause)
       ind2 = c(ind[1]:(ind[2]+1))
       ind.1 = c(ind1,ind2)

       y[grepl("^J",y)]       
       # respiratory
       ind  = match(c("J00","J989"),dth2$Cause) 
       ind.2 = c((ind[1]:(ind[2]+1)))
    }

    ind = list(ind.1,ind.2)

    # Deaths
    for (j in 1:nds){
        # mt from each ds
        dth3 = dth2[ind[[j]],]
        idt  = match(c('Deaths12','Deaths25'),colnames(dth3))        
        dth[i,j] = sum(as.matrix(dth3[,idt[1]:idt[2]]),na.rm=TRUE)   
    }
}

dth
     # Circ   Resp
# SK  67768  22405
# JP 358674 203203
# US 877073 225152

pop
       # SK        JP        US 
 # 33015416  90808000 176487485
 
##== Incidence
y0 = array(NA,c(nct,nds))
for (i in 1:nct){
for (j in 1:nds){
  	y0[i,j] = dth[i,j]/pop[i]
}
}
colnames(y0) = dss
rownames(y0) = ct
print(round(y0*1000,3))

    # Circ  Resp
# SK 2.053 0.679
# JP 3.950 2.238
# US 4.970 1.276

save(y0, file = "y0_oz_ICD10.Rdata")