Aquest codi implementa un Model Additiu Generalitzat per analitzar l'impacte del NO2 sobre la salut, ajustant per meteorologia i estacionalitat.
# ==============================================================================
# 1. PREPARATION & LIBRARIES
# ==============================================================================
packages <- c("mgcv", "readr", "dplyr", "lubridate", "ggplot2")
for (p in packages) {
if (!require(p, character.only = TRUE)) install.packages(p)
library(p, character.only = TRUE)
}
# ==============================================================================
# 2. DATA LOADING, CLEANING & FEATURE ENGINEERING (Corrected)
# ==============================================================================
# List of public holidays (Catalonia/Spain)
holidays_cat <- c("01-01", "01-06", "05-01", "06-24", "08-15", "09-11",
"10-12", "11-01", "12-06", "12-08", "12-25", "12-26")
df <- read_csv("meteocontamsivicMartorell.csv", show_col_types = FALSE) %>%
arrange(date) %>%
# Renaming native Catalan columns to English for consistency
rename(
total_cases = casos_total,
total_population = poblacio_total
) %>%
mutate(
date = as.Date(date),
day_of_week = as.factor(wday(date, week_start = 1)),
time_numeric = as.numeric(date),
# Seasonality and Inertia
day_of_year = yday(date),
cases_yesterday = log(lag(total_cases, n=1) + 1),
# Moving average for NO2 (Lag 0-2)
no2_lag02 = (no2 + lag(no2, n=1) + lag(no2, n=2)) / 3,
# Control variables
pandemic = as.factor(ifelse(date >= as.Date("2020-03-14") & date <= as.Date("2022-03-31"), "Yes", "No")),
day_month = format(date, "%m-%d"),
is_holiday = as.factor(ifelse(day_month %in% holidays_cat, "Yes", "No")),
# Threshold Variables for mean temperatures
degrees_above_25 = ifelse(temp_mean > 25, temp_mean - 25, 0),
degrees_below_15 = ifelse(temp_mean < 15, 15 - temp_mean, 0),
# EXTREME WEATHER VARIABLES (Heatwaves & Coldwaves)
heatwave = ifelse(temp_max > 34, temp_max - 34, 0),
coldwave = ifelse(temp_min < 5, 5 - temp_min, 0)
) %>%
# 🔥 A QUÍ ESTÀ LA CORRECCIÓ: Sense pm10 ni pm2.5
select(date, total_cases, total_population, no2_lag02, temp_mean, temp_max, temp_min, rh,
time_numeric, day_of_week, day_of_year, cases_yesterday, pandemic, is_holiday,
degrees_above_25, degrees_below_15, heatwave, coldwave) %>%
filter(
complete.cases(.)
)
cat("Clean rows ready for modeling:", nrow(df), "\n")
# ==============================================================================
# 3. BASE GAM MODEL (Advanced Epidemiological Model)
# ==============================================================================
model_base <- gam(
total_cases ~
s(no2_lag02, bs = "cr", k = 5) +
s(temp_mean, bs = "cr", k = 10) +
s(rh, bs = "cr", k = 5) +
s(day_of_year, bs = "cc", k = 12) +
s(time_numeric, bs = "cr", k = 30) +
s(cases_yesterday, bs = "cr", k = 5) +
day_of_week + is_holiday + pandemic +
offset(log(total_population)),
family = nb(),
data = df,
method = "REML"
)
# ==============================================================================
# 4. MODEL DIAGNOSTICS & SUMMARY
# ==============================================================================
summary(model_base)
par(mfrow = c(2, 2))
gam.check(model_base)
par(mfrow = c(1, 1))
# ==============================================================================
# 5. RELATIVE RISK: NO2 (Lag 0-2)
# ==============================================================================
model_lin_no2 <- gam(
total_cases ~
no2_lag02 + # Linear term
s(temp_mean, bs = "cr", k = 10) + s(rh, bs = "cr", k = 5) +
s(day_of_year, bs = "cc", k = 12) + s(time_numeric, bs = "cr", k = 30) +
s(cases_yesterday, bs = "cr", k = 5) + day_of_week + is_holiday + pandemic + offset(log(total_population)),
family = nb(), data = df, method = "REML"
)
coef_no2 <- coef(model_lin_no2)["no2_lag02"]
se_no2 <- summary(model_lin_no2)$se["no2_lag02"]
rr_10units <- exp(coef_no2 * 10)
ic_inf_no2 <- exp((coef_no2 - 1.96 * se_no2) * 10)
ic_sup_no2 <- exp((coef_no2 + 1.96 * se_no2) * 10)
cat("\n--- RR: NO2 (Lag 0-2) ---\n")
cat(sprintf("RR per 10 µg/m³ increase: %.4f [%.4f - %.4f]\n", rr_10units, ic_inf_no2, ic_sup_no2))
# ==============================================================================
# 6. DOSE-RESPONSE PLOT: NO2
# ==============================================================================
plot_data_no2 <- plot(model_base, select = 1)
df_plot_no2 <- data.frame(
no2 = plot_data_no2[[1]]$x, effect = plot_data_no2[[1]]$fit, se = plot_data_no2[[1]]$se
)
plot_no2 <- ggplot(df_plot_no2, aes(x = no2, y = effect)) +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
geom_ribbon(aes(ymin = effect - 1.96*se, ymax = effect + 1.96*se), alpha = 0.2, fill = "blue") +
geom_line(color = "darkblue", linewidth = 1.2) +
labs(title = "Dose-Response: Impact of NO2 on Morbidity",
subtitle = "Adjusted for environment, seasonality, and inertia",
x = "NO2 Concentration (µg/m3, Lag 0-2)", y = "Effect on log(cases)") +
theme_minimal()
print(plot_no2)
# ==============================================================================
# 7. DOSE-RESPONSE PLOT: MEAN TEMPERATURE
# ==============================================================================
plot_data_temp <- plot(model_base, select = 2)
df_plot_temp <- data.frame(
temperature = plot_data_temp[[2]]$x, effect = plot_data_temp[[2]]$fit, se = plot_data_temp[[2]]$se
)
plot_temp <- ggplot(df_plot_temp, aes(x = temperature, y = effect)) +
geom_hline(yintercept = 0, linetype = "dashed", color = "red", linewidth = 1) +
geom_ribbon(aes(ymin = effect - 1.96*se, ymax = effect + 1.96*se), alpha = 0.2, fill = "orange") +
geom_line(color = "darkorange3", linewidth = 1.2) +
labs(title = "Dose-Response: Impact of Mean Temperature",
subtitle = "Showing the J/U-shaped non-linear effect",
x = "Daily Mean Temperature (ºC)", y = "Effect on log(cases)") +
theme_minimal()
print(plot_temp)
# ==============================================================================
# 8. RELATIVE RISK: MODERATE HEAT (>25ºC Mean)
# ==============================================================================
model_heat_mean <- gam(
total_cases ~
degrees_above_25 + degrees_below_15 +
s(no2_lag02, bs = "cr", k = 5) + s(rh, bs = "cr", k = 5) + s(day_of_year, bs = "cc", k = 12) +
s(time_numeric, bs = "cr", k = 30) + s(cases_yesterday, bs = "cr", k = 5) +
day_of_week + is_holiday + pandemic + offset(log(total_population)),
family = nb(), data = df, method = "REML"
)
coef_heat_m <- coef(model_heat_mean)["degrees_above_25"]
se_heat_m <- summary(model_heat_mean)$se["degrees_above_25"]
rr_heat_m <- exp(coef_heat_m * 1)
ic_inf_heat_m <- exp((coef_heat_m - 1.96 * se_heat_m) * 1)
ic_sup_heat_m <- exp((coef_heat_m + 1.96 * se_heat_m) * 1)
cat("\n--- RR: MODERATE HEAT (Mean Temp > 25ºC) ---\n")
cat(sprintf("Increase per 1ºC extra: %.2f%% [%.2f%% to %.2f%%]\n",
(rr_heat_m - 1)*100, (ic_inf_heat_m - 1)*100, (ic_sup_heat_m - 1)*100))
# ==============================================================================
# 9. RELATIVE RISK: HEATWAVE (Max Temp > 34ºC)
# ==============================================================================
model_heatwave <- gam(
total_cases ~
heatwave +
s(no2_lag02, bs = "cr", k = 5) + s(rh, bs = "cr", k = 5) + s(day_of_year, bs = "cc", k = 12) +
s(time_numeric, bs = "cr", k = 30) + s(cases_yesterday, bs = "cr", k = 5) +
day_of_week + is_holiday + pandemic + offset(log(total_population)),
family = nb(), data = df, method = "REML"
)
coef_hw <- coef(model_heatwave)["heatwave"]
se_hw <- summary(model_heatwave)$se["heatwave"]
rr_hw <- exp(coef_hw * 1)
ic_inf_hw <- exp((coef_hw - 1.96 * se_hw) * 1)
ic_sup_hw <- exp((coef_hw + 1.96 * se_hw) * 1)
cat("\n--- RR: EXTREME HEATWAVE (Max Temp > 34ºC) ---\n")
cat(sprintf("Increase per 1ºC extra above 34ºC: %.2f%% [%.2f%% to %.2f%%]\n",
(rr_hw - 1)*100, (ic_inf_hw - 1)*100, (ic_sup_hw - 1)*100))
# ==============================================================================
# 10. RELATIVE RISK: COLDWAVE (Min Temp < 5ºC)
# ==============================================================================
model_coldwave <- gam(
total_cases ~
coldwave +
s(no2_lag02, bs = "cr", k = 5) + s(rh, bs = "cr", k = 5) + s(day_of_year, bs = "cc", k = 12) +
s(time_numeric, bs = "cr", k = 30) + s(cases_yesterday, bs = "cr", k = 5) +
day_of_week + is_holiday + pandemic + offset(log(total_population)),
family = nb(), data = df, method = "REML"
)
coef_cw <- coef(model_coldwave)["coldwave"]
se_cw <- summary(model_coldwave)$se["coldwave"]
rr_cw <- exp(coef_cw * 1)
ic_inf_cw <- exp((coef_cw - 1.96 * se_cw) * 1)
ic_sup_cw <- exp((coef_cw + 1.96 * se_cw) * 1)
cat("\n--- RR: EXTREME COLDWAVE (Min Temp < 5ºC) ---\n")
cat(sprintf("Increase per 1ºC drop below 5ºC: %.2f%% [%.2f%% to %.2f%%]\n",
(rr_cw - 1)*100, (ic_inf_cw - 1)*100, (ic_sup_cw - 1)*100))
cat("======================================================================\n")