From 6925150c8c59042c74abf0a97c8338d98283d2b2 Mon Sep 17 00:00:00 2001 From: "Franck.Gaspard" <franck.gaspard@developpement-durable.gouv.fr> Date: Mon, 13 Jan 2025 15:09:55 +0000 Subject: [PATCH] =?UTF-8?q?adaptation=20de=20data-raw/chargement=5Focsge?= =?UTF-8?q?=20=C3=A0=20la=20nouvelle=20livraison=20de=20l'OCSGE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data-raw/chargement_ocsge.R | 303 +++++++++++++++++++++++++++++------- 1 file changed, 246 insertions(+), 57 deletions(-) diff --git a/data-raw/chargement_ocsge.R b/data-raw/chargement_ocsge.R index 3df1231..eab7997 100644 --- a/data-raw/chargement_ocsge.R +++ b/data-raw/chargement_ocsge.R @@ -1,7 +1,7 @@ # chargement_ocsge -library(magritt) +library(magrittr) rm(list=ls()) @@ -17,15 +17,15 @@ con_referentiels <- DBI::dbConnect( ) -# fonction get_ocsge_dep_com --------- - +# fonctions get_ocsge_dep_com --------- # elle decoupe une couche dep ocsge millesimée aux contours des communes de la bd topo # et renvoie une ligne par commune, usage et couverture -get_ocsge_dep_com <- function(dep, mil = "2016") { - query = paste0("SELECT com.code_insee, ocsge.couverture, ocsge.usage, ocsge.millesime, ", +## ocsge ancienne génération ----- +get_ocsge_dep_com_anc_gen <- function(dep, mil = "2016") { + query = paste0("SELECT com.code_insee, ocsge.couverture, ocsge.usage, ocsge.millesime, ", "SUM(ST_Area(ST_intersection(ocsge.the_geom, com.the_geom))) AS surf_intersection_m2 ", - "FROM ocsge.n_occupation_sol_", mil, "_0", dep, " AS ocsge, ", + "FROM ocs_ge_ancienne_generation.n_occupation_sol_", mil, "_0", dep, " AS ocsge, ", "bdtopo_v3.n_bdt_commune_s_r52 AS com ", "WHERE ST_intersects(ocsge.the_geom, com.the_geom)", "GROUP BY com.code_insee, ocsge.couverture, ocsge.usage, ocsge.millesime", @@ -33,95 +33,279 @@ get_ocsge_dep_com <- function(dep, mil = "2016") { DBI::dbGetQuery(con_referentiels, query) } +## ocsge nouvelle génération -------- +get_ocsge_dep_com_nouv_gen <- function(dep, mil = "2019") { + query = paste0("SELECT com.code_insee, ocsge.code_cs, ocsge.code_us, ocsge.millesime, ", + "SUM(ST_Area(ST_intersection(ocsge.the_geom, com.the_geom))) AS surf_intersection_m2 ", + "FROM ocs_ge_nouvelle_generation.n_ocsge_occupation_sol_", mil, "_s_d", dep, " AS ocsge, ", + "bdtopo_v3.n_bdt_commune_s_r52 AS com ", + "WHERE ST_intersects(ocsge.the_geom, com.the_geom)", + "GROUP BY com.code_insee, ocsge.code_cs, ocsge.code_us, ocsge.millesime", + ";") + DBI::dbGetQuery(con_referentiels, query) +} + -# application de la fonction aux 5 départements et aux 2 millésimes ------- -ocsge_pdl <- purrr::map2_dfr( - .x = rep(c("44", "49", "53", "72", "85"), 2), - .y = c(rep("2013man", 5), rep("2016", 5)), - .f = ~ get_ocsge_dep_com(dep = .x, mil = .y) ) %>% +# calcul d'ocsge ancienne génération --------- +ocsge_pdl_anc_gen <- purrr::map2_dfr( + .x = rep(c("44", "49", "53", "72", "85"), 2), + .y = c(rep("2013man", 5), rep("2016", 5)), + .f = ~ get_ocsge_dep_com_anc_gen(dep = .x, mil = .y) ) %>% + dplyr::mutate(date = lubridate::make_date(millesime, 06, 30)) + + +# calcul d'ocsge nouvelle génération ---------- +ocsge_pdl_2022 <- purrr::map2_dfr( + .x = rep(c("44", "49", "53", "72", "85"), 1), + .y = c(rep("2022", 5)), + .f = ~ get_ocsge_dep_com_nouv_gen(dep = .x, mil = .y) ) %>% + dplyr::mutate(date = lubridate::make_date(millesime, 06, 30)) + +ocsge_pdl_2020 <- purrr::map2_dfr( + .x = rep(c("44", "49"), 1), + .y = c(rep("2020", 2)), + .f = ~ get_ocsge_dep_com_nouv_gen(dep = .x, mil = .y) ) %>% + dplyr::mutate(date = lubridate::make_date(millesime, 06, 30)) + +ocsge_pdl_2019 <- purrr::map2_dfr( + .x = rep(c("53", "72", "85"), 1), + .y = c(rep("2019", 3)), + .f = ~ get_ocsge_dep_com_nouv_gen(dep = .x, mil = .y)) %>% + dplyr::mutate(date = lubridate::make_date(millesime, 06, 30)) + +ocsge_pdl_nouv_gen <- dplyr::bind_rows( + ocsge_pdl_2022, + ocsge_pdl_2020, + ocsge_pdl_2019 + ) %>% + tidyr::complete() %>% dplyr::mutate(date = lubridate::make_date(millesime, 06, 30)) # en cas de connexion par VPN, mieux vaut procéder par étape pour éviter de tout perdre en cas de déconnexion -# ocsge_44 <- get_ocsge_dep_com(dep = "44", mil = "2016") -# ocsge_49 <- get_ocsge_dep_com(dep = "49", mil = "2016") -# ocsge_53 <- get_ocsge_dep_com(dep = "53", mil = "2016") -# ocsge_72 <- get_ocsge_dep_com(dep = "72", mil = "2016") -# ocsge_85 <- get_ocsge_dep_com(dep = "85", mil = "2016") -# ocsge_44_2013 <- get_ocsge_dep_com(dep = "44", mil = "2013man") -# ocsge_49_2013 <- get_ocsge_dep_com(dep = "49", mil = "2013man") -# ocsge_53_2013 <- get_ocsge_dep_com(dep = "53", mil = "2013man") -# ocsge_72_2013 <- get_ocsge_dep_com(dep = "72", mil = "2013man") -# ocsge_85_2013 <- get_ocsge_dep_com(dep = "85", mil = "2013man") -# -# ocsge_pdl <- dplyr::bind_rows( -# ocsge_44, ocsge_49, ocsge_53, ocsge_72, ocsge_85, -# ocsge_44_2013, ocsge_49_2013, ocsge_53_2013, ocsge_72_2013, ocsge_85_2013 -# ) %>% -# dplyr::mutate(date = lubridate::make_date(millesime + 1, 01, 01)) +ocsge_44_2022 <- get_ocsge_dep_com_nouv_gen(dep = "44", mil = "2022") +ocsge_44_2020 <- get_ocsge_dep_com_nouv_gen(dep = "44", mil = "2020") +ocsge_49_2022 <- get_ocsge_dep_com_nouv_gen(dep = "49", mil = "2022") +ocsge_49_2020 <- get_ocsge_dep_com_nouv_gen(dep = "49", mil = "2020") +ocsge_53_2022 <- get_ocsge_dep_com_nouv_gen(dep = "53", mil = "2022") +ocsge_53_2019 <- get_ocsge_dep_com_nouv_gen(dep = "53", mil = "2019") +ocsge_72_2022 <- get_ocsge_dep_com_nouv_gen(dep = "72", mil = "2022") +ocsge_72_2019 <- get_ocsge_dep_com_nouv_gen(dep = "72", mil = "2019") +ocsge_85_2022 <- get_ocsge_dep_com_nouv_gen(dep = "85", mil = "2022") +ocsge_85_2019 <- get_ocsge_dep_com_nouv_gen(dep = "85", mil = "2019") + +ocsge_pdl_nouv_gen <- dplyr::bind_rows( + ocsge_44_2022, ocsge_49_2022, ocsge_53_2022, ocsge_72_2022, ocsge_85_2022, + ocsge_44_2020, ocsge_49_2020, + ocsge_53_2019, ocsge_72_2019, ocsge_85_2019 + ) %>% + dplyr::mutate(date = lubridate::make_date(millesime, 06, 30)) + +# sauveagarde provisoire ---------- +save.image(file = "sauvegarde_provisoire.RData") + + +# déconnexion au SGBDR ---------- DBI::dbDisconnect(con_referentiels) # versement de ocsge_pdl dans le sgbd/datamart.portrait_territoires ------------- -ocsge_pdl_2013 <- dplyr::filter(ocsge_pdl, millesime == 2013) -ocsge_pdl_2016 <- dplyr::filter(ocsge_pdl, millesime == 2016) +## ocsge ancienne génération ------------ +ocsge_pdl_2013 <- dplyr::filter(ocsge_pdl_anc_gen, millesime == 2013) datalibaba::poster_data( data = ocsge_pdl_2013, db = "datamart", - schema = "portrait_territoires", + schema = "portrait_territoires", table = "ocsge_pdl_couverture_usage_2013", - post_row_name = FALSE, + post_row_name = FALSE, overwrite = TRUE, droits_schema = TRUE, user = "does" - ) +) +ocsge_pdl_2016 <- dplyr::filter(ocsge_pdl_anc_gen, millesime == 2016) datalibaba::poster_data( data = ocsge_pdl_2016, db = "datamart", - schema = "portrait_territoires", + schema = "portrait_territoires", table = "ocsge_pdl_couverture_usage_2016", - post_row_name = FALSE, + post_row_name = FALSE, overwrite = TRUE, droits_schema = TRUE, user = "does" ) +## ocsge nouvelle génération ------------ +ocsge_pdl_2022 <- dplyr::filter(ocsge_pdl_nouv_gen, millesime == 2022) +datalibaba::poster_data( + data = ocsge_pdl_2022, + db = "datamart", + schema = "portrait_territoires", + table = "ocsge_pdl_couverture_usage_2022", + post_row_name = FALSE, + overwrite = TRUE, + droits_schema = TRUE, + user = "does" +) + +ocsge_pdl_2020 <- dplyr::filter(ocsge_pdl_nouv_gen, millesime == 2020) +datalibaba::poster_data( + data = ocsge_pdl_2020, + db = "datamart", + schema = "portrait_territoires", + table = "ocsge_pdl_couverture_usage_2020", + post_row_name = FALSE, + overwrite = TRUE, + droits_schema = TRUE, + user = "does" +) + +ocsge_pdl_2019 <- dplyr::filter(ocsge_pdl_nouv_gen, millesime == 2019) +datalibaba::poster_data( + data = ocsge_pdl_2019, + db = "datamart", + schema = "portrait_territoires", + table = "ocsge_pdl_couverture_usage_2019", + post_row_name = FALSE, + overwrite = TRUE, + droits_schema = TRUE, + user = "does" +) # typologie des espaces ------- -ocsge_type <- dplyr::select(ocsge_pdl, couverture, usage) %>% + +## ancienne nomenclature ------------- +# ocsge_type <- dplyr::select(ocsge_pdl, couverture, usage) %>% +# dplyr::distinct() %>% +# dplyr::mutate( +# type_espace = dplyr::case_when( +# couverture %in% c("CS1.1.1.1", "CS1.1.1.2", "CS1.1.2.1", "CS1.1.2.2") ~ "espace_artificialise", +# couverture == "CS1.2.1" & +# usage %in% c("US1.1", "US1.3", "US1.4", "US235", "US4.1.1", "US4.1.2", "US4.1.3", "US4.1.4", "US4.2", "US4.3", "US6.1", "US6.2") ~ "espace_artificialise", +# couverture == "CS1.2.1" & +# usage %in% c("US1.2", "US1.5", "US6.3", "US6.4") ~ "autre_surface_naturelle", +# couverture == "CS1.2.2" ~ "surface_en_eau", +# couverture %in% c("CS2.1.1.1", "CS2.1.1.2", "CS2.1.1.3", "CS2.1.2", "CS2.1.3", "CS2.2.1", "CS2.2.2") & usage == "US1.1" ~ "espace_agricole", +# couverture %in% c("CS2.1.1.1", "CS2.1.1.2", "CS2.1.1.3","CS2.1.1", "CS2.1.2", "CS2.1.3") & usage %in% c("US1.3", "US235") ~ "espace_artificialise", +# couverture %in% c("CS2.1.1.1", "CS2.1.1.2", "CS2.1.1.3", "CS2.1.2", "CS2.1.3") & +# usage %in% c("US1.2", "US1.4", "US1.5", "US4.1.1", "US4.1.2", "US4.1.3", "US4.1.4", "US4.1.5", "US4.2", "US4.3", "US6.1", "US6.2", "US6.3", "US6.4") ~ "surface_naturelle_boisee", +# couverture == "CS2.2.1" & +# usage %in% c("US1.3", "US1.4", "US1.5", "US235", "US4.1.1", "US4.1.2", "US4.1.3", "US4.1.4", "US4.1.5", "US4.2", "US4.3", "US6.1", "US6.2") ~ "espace_artificialise", +# couverture == "CS2.2.1" & +# usage %in% c("US1.2", "US6.3", "US6.4") ~ "autre_surface_naturelle", +# couverture == "CS2.2.2" & +# usage != "US1.1" ~ "a_definir", +# TRUE ~ "a_revoir" +# ) +# ) + +## nouvelle nomenclature --------------- +# passage entre les catégories d'usage et de couverture du sol +# vers Artif / Non Artif +# elle a été établie à la demande de la DGALN par l'IGN +# lien vers la nouvelle nomenclature : +# https://artificialisation.developpement-durable.gouv.fr/sites/artificialisation/files/fichiers/2022/05/2022_05_03_Tableau-OCSGE-CouvUsage-ARTIFICIALISATION%5B1%5D.pdf + +## ocsge ancienne génération ---------- +ocsge_pdl_anc_gen <- ocsge_pdl_anc_gen %>% + dplyr::rename( + code_cs = couverture, + code_us = usage + ) %>% + dplyr::mutate(millesime = as.character(millesime)) + +# unique(ocsge_pdl_anc_gen$code_us) +# [1] "US1.1" "US235" "US6.1" "US6.2" "US4.1.1" "US4.3" "US4.1.2" "US6.3" "US1.2" "US4.1.3" "US4.1.4" "US1.3" "US1.4" +# [14] "US4.2" + +# unique(ocsge_pdl_anc_gen$code_cs) +# [1] "CS1.1.1.1" "CS1.1.1.2" "CS1.1.2.1" "CS1.2.1" "CS1.2.2" "CS2.1.1.1" "CS2.1.1.2" "CS2.1.1.3" "CS2.1.2" "CS2.2.1" "CS2.1.3" +# [12] "CS2.1.1" "CS1.1.2.2" "CS2.2.2" + +ocsge_type_anc_gen <- dplyr::select(ocsge_pdl_anc_gen, code_cs, code_us) %>% dplyr::distinct() %>% dplyr::mutate( type_espace = dplyr::case_when( - couverture %in% c("CS1.1.1.1", "CS1.1.1.2", "CS1.1.2.1", "CS1.1.2.2") ~ "espace_artificialise", - couverture == "CS1.2.1" & - usage %in% c("US1.1", "US1.3", "US1.4", "US235", "US4.1.1", "US4.1.2", "US4.1.3", "US4.1.4", "US4.2", "US4.3", "US6.1", "US6.2") ~ "espace_artificialise", - couverture == "CS1.2.1" & - usage %in% c("US1.2", "US1.5", "US6.3", "US6.4") ~ "autre_surface_naturelle", - couverture == "CS1.2.2" ~ "surface_en_eau", - couverture %in% c("CS2.1.1.1", "CS2.1.1.2", "CS2.1.1.3", "CS2.1.2", "CS2.1.3", "CS2.2.1", "CS2.2.2") & usage == "US1.1" ~ "espace_agricole", - couverture %in% c("CS2.1.1.1", "CS2.1.1.2", "CS2.1.1.3","CS2.1.1", "CS2.1.2", "CS2.1.3") & usage %in% c("US1.3", "US235") ~ "espace_artificialise", - couverture %in% c("CS2.1.1.1", "CS2.1.1.2", "CS2.1.1.3", "CS2.1.2", "CS2.1.3") & - usage %in% c("US1.2", "US1.4", "US1.5", "US4.1.1", "US4.1.2", "US4.1.3", "US4.1.4", "US4.1.5", "US4.2", "US4.3", "US6.1", "US6.2", "US6.3", "US6.4") ~ "surface_naturelle_boisee", - couverture == "CS2.2.1" & - usage %in% c("US1.3", "US1.4", "US1.5", "US235", "US4.1.1", "US4.1.2", "US4.1.3", "US4.1.4", "US4.1.5", "US4.2", "US4.3", "US6.1", "US6.2") ~ "espace_artificialise", - couverture == "CS2.2.1" & - usage %in% c("US1.2", "US6.3", "US6.4") ~ "autre_surface_naturelle", - couverture == "CS2.2.2" & - usage != "US1.1" ~ "a_definir", + code_cs %in% c("CS1.1.1.1", "CS1.1.1.2", "CS1.1.2.2") ~ "espace_artificialise", + code_cs == "CS1.1.2.1" & code_us == "US1.3" ~ "espace_non_artificialise", + code_cs == "CS1.1.2.1" & + code_us %in% c("US1.1", "US1.2", "US1.4", "US1.5", "US235", "US2", "US3","US5", + "US4.1.1", "US4.1.2", "US4.1.3", "US4.1.4", "US4.1.5", + "US4.2", "US4.3", "US6.1", "US6.2", "US6.3", "US6.6") ~ "espace_artificialise", + code_cs %in% c("CS1.2.1", "CS1.2.2", "CS1.2.3", + "CS2.1.1.1", "CS2.1.1.2", "CS2.1.1.3", + "CS2.1.2", "CS2.1.3") ~ "espace_non_artificialise", + code_cs %in% c("CS2.2.1", "CS2.2.2") & + code_us %in% c("US1.1", "US1.2", "US1.3", "US1.4", "US6.3", "US6.6") ~ "espace_non_artificialise", + code_cs %in% c("CS2.2.1", "CS2.2.2") & + code_us %in% c("US235", "US2", "US3","US5","US4.1.1", "US4.1.2", + "US4.1.3", "US4.1.4", "US4.1.5", + "US4.2", "US4.3", "US6.1", "US6.2") ~ "espace_artificialise", + code_cs == "CS2.1.1" & code_us == "US235" ~ "espace_non_artificialise", # ajouté pour le cas de "44014" en 2013 TRUE ~ "a_revoir" - ) ) + ) -# chargement de la liste des depcom COGiter région PDL et région PDL + epci ------- -source("R/levels_facteurs_com.R") +## ocsge nouvelle génération ------------ + +# unique(ocsge_pdl_nouv_gen$code_us) +# [1] "US1.1" "US2" "US3" "US4.3" "US5" "US6.1" "US6.2" "US4.1.1" "US4.1.2" "US6.3" "US1.2" "US235" "US1.4" +# [14] "US4.2" "US4.1.3" "US4.1.4" "US1.3" + +# unique(ocsge_pdl_nouv_gen$code_cs) +# [1] "CS1.1.1.1" "CS1.1.1.2" "CS1.1.2.1" "CS1.2.1" "CS1.2.2" "CS2.1.1.1" "CS2.1.1.2" "CS2.1.1.3" "CS2.1.2" "CS2.2.1" "CS2.1.3" +# [12] "CS1.1.2.2" + +ocsge_type_nouv_gen <- dplyr::select(ocsge_pdl_nouv_gen, code_cs, code_us) %>% + dplyr::distinct() %>% + dplyr::mutate( + type_espace = dplyr::case_when( + code_cs %in% c("CS1.1.1.1", "CS1.1.1.2", "CS1.1.2.2") ~ "espace_artificialise", + code_cs == "CS1.1.2.1" & code_us == "US1.3" ~ "espace_non_artificialise", + code_cs == "CS1.1.2.1" & + code_us %in% c("US1.1", "US1.2", "US1.4", "US1.5", "US235", "US2", "US3","US5", + "US4.1.1", "US4.1.2", "US4.1.3", "US4.1.4", "US4.1.5", + "US4.2", "US4.3", "US6.1", "US6.2", "US6.3", "US6.6") ~ "espace_artificialise", + code_cs %in% c("CS1.2.1", "CS1.2.2", "CS1.2.3", + "CS2.1.1.1", "CS2.1.1.2", "CS2.1.1.3", + "CS2.1.2", "CS2.1.3") ~ "espace_non_artificialise", + code_cs %in% c("CS2.2.1", "CS2.2.2") & + code_us %in% c("US1.1", "US1.2", "US1.3", "US1.4", "US6.3", "US6.6") ~ "espace_non_artificialise", + code_cs %in% c("CS2.2.1", "CS2.2.2") & + code_us %in% c("US235", "US2", "US3","US5","US4.1.1", "US4.1.2", + "US4.1.3", "US4.1.4", "US4.1.5", + "US4.2", "US4.3", "US6.1", "US6.2") ~ "espace_artificialise", + TRUE ~ "a_revoir" + ) + ) # calcul des indicateurs communaux ----------- -source_ocsge <- ocsge_pdl %>% - dplyr::left_join(ocsge_type) %>% + +# chargement de la liste des depcom COGiter région PDL et région PDL + epci +source("R/levels_facteurs_com.R") + +source_ocsge_anc_gen <- ocsge_pdl_anc_gen %>% + dplyr::left_join(ocsge_type_anc_gen) %>% + dplyr::select( + depcom = code_insee, + date, + variable = type_espace, + valeur = surf_intersection_m2 + ) %>% + dplyr::group_by(date, depcom, variable) %>% + dplyr::summarise(valeur = sum(valeur), .groups = "drop") %>% + dplyr::mutate_if(is.character, as.factor) %>% + dplyr::mutate(depcom = forcats::fct_expand(depcom, com_reg)) %>% + tidyr::complete(depcom, date, variable, fill = list(valeur = NA), explicit = FALSE) %>% + dplyr::mutate(depcom = forcats::fct_expand(depcom, com_reg_et_vois)) %>% + tidyr::complete(depcom, date, variable, fill = list(valeur = NA)) %>% + tidyr::pivot_wider(names_from = variable,values_from = valeur) + +source_ocsge_nouv_gen <- ocsge_pdl_nouv_gen %>% + dplyr::left_join(ocsge_type_nouv_gen) %>% dplyr::select( depcom = code_insee, date, @@ -132,10 +316,15 @@ source_ocsge <- ocsge_pdl %>% dplyr::summarise(valeur = sum(valeur), .groups = "drop") %>% dplyr::mutate_if(is.character, as.factor) %>% dplyr::mutate(depcom = forcats::fct_expand(depcom, com_reg)) %>% - tidyr::complete(depcom, date, variable, fill = list(valeur = 0), explicit = FALSE) %>% + tidyr::complete(depcom, date, variable, fill = list(valeur = NA), explicit = FALSE) %>% dplyr::mutate(depcom = forcats::fct_expand(depcom, com_reg_et_vois)) %>% tidyr::complete(depcom, date, variable, fill = list(valeur = NA)) %>% tidyr::pivot_wider(names_from = variable,values_from = valeur) + +source_ocsge <- dplyr::bind_rows( + source_ocsge_nouv_gen, + source_ocsge_anc_gen +) # versement de ocsge dans le sgbd/datamart.portrait_territoires et metadonnées ------------- -- GitLab