S'utilitza un model Distributed Lag Non-Linear Model (DLNM) per capturar:
library(dlnm)
library(mgcv)
library(boot)
library(dplyr)
library(readr)
library(lubridate)
df <- read_csv("meteocontamsivicMartorell.csv.txt") %>%
mutate(
date = as.Date(date),
temps_numeric = as.numeric(date),
dia_setmana = as.factor(wday(date, week_start = 1))
) %>%
na.omit()
---
lag_max <- 7
cb_no2 <- crossbasis(df$no2, lag=lag_max,
argvar=list(fun="ns", df=3),
arglag=list(fun="ns", df=3))
cb_pm10 <- crossbasis(df$pm10, lag=lag_max,
argvar=list(fun="ns", df=3),
arglag=list(fun="ns", df=3))
fit <- gam(
casos_total ~
cb_no2 +
cb_pm10 +
s(temp_mean, k=5) +
s(rh, k=5) +
s(temps_numeric, k=30) +
dia_setmana +
offset(log(poblacio_total)),
family=quasipoisson(link="log"),
data=df,
method="REML"
)
---
rr <- exp(predict(fit, type="link"))
af <- (rr - 1) / rr
casos_atribuibles <- sum(df$casos_total * af, na.rm=TRUE)
casos_atribuibles
---
calc_attr <- function(data, indices) {
d <- data[indices, ]
cb_no2 <- crossbasis(d$no2, lag=7,
argvar=list(fun="ns", df=3),
arglag=list(fun="ns", df=3))
cb_pm10 <- crossbasis(d$pm10, lag=7,
argvar=list(fun="ns", df=3),
arglag=list(fun="ns", df=3))
fit <- gam(
casos_total ~
cb_no2 +
cb_pm10 +
s(temp_mean, k=5) +
s(rh, k=5) +
s(temps_numeric, k=30) +
dia_setmana +
offset(log(poblacio_total)),
family=quasipoisson(link="log"),
data=d
)
rr <- exp(predict(fit, type="link"))
af <- (rr - 1) / rr
sum(d$casos_total * af, na.rm=TRUE)
}
set.seed(123)
boot_res <- boot(df, calc_attr, R=500)
---
ci <- boot.ci(boot_res, type="perc")
mean_val <- mean(boot_res$t)
low <- ci$percent[4]
high <- ci$percent[5]
cat("Casos atribuïbles:", mean_val, "\n")
cat("IC 95%:", low, "-", high)
---