chunk like this:
library(tidyverse)
## Warning: le package 'tidyr' a été compilé avec la version R 4.3.2
## Warning: le package 'readr' a été compilé avec la version R 4.3.2
## Warning: le package 'dplyr' a été compilé avec la version R 4.3.2
## Warning: le package 'stringr' a été compilé avec la version R 4.3.2
## Warning: le package 'forcats' a été compilé avec la version R 4.3.2
## Warning: le package 'lubridate' a été compilé avec la version R 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(datalibaba)
## The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
## which was just loaded, will retire in October 2023.
## Please refer to R-spatial evolution reports for details, especially
## https://r-spatial.org/r/2023/05/15/evolution4.html.
## It may be desirable to make the sf package available;
## package maintainers should consider adding sf to Suggests:.
## The sp package is now running under evolution status 2
## (status 2 uses the sf package in place of rgdal)
role <- "does"
db_list <- c(consultation = datalibaba::connect_to_db("consultation", user = role),
referentiels = datalibaba::connect_to_db("referentiels", user = role),
datamart = datalibaba::connect_to_db("datamart", user = role),
si_eau = datalibaba::connect_to_db("si_eau", user = role))
bases_avec_commentaires <- purrr::map_dfr(.x = names(db_list),
.f = ~datalibaba::get_db_comment(db = .x, user = role)) %>%
rename(base = nom_base)%>%
dplyr::mutate(commentaire = tidyr::replace_na(commentaire, "")) %>%
dplyr::mutate(display_name = paste(base, commentaire, sep = " : "))
db_schema_list0 <- purrr::map2_dfr(.x = db_list, .y = names(db_list),
.f = ~datalibaba::list_schemas(.x) %>%
tibble::enframe(name = NULL, value = "nom_schema") %>%
dplyr::arrange(nom_schema) %>%
dplyr::mutate(base = .y)) %>%
# on écarte les schémas "techniques" information_schema de chaque base + le datamart du catalogue + schema d'archives
dplyr::filter(!(nom_schema %in% c("pg_catalog", "public", "information_schema")), nom_schema != "catalogue", !grepl("^zz_|^z_", nom_schema))
commentaires_schema <- purrr::map2_dfr(.x = db_schema_list0$nom_schema, .y = db_schema_list0$base,
.f = ~(datalibaba::get_schema_comment(schema = .x, db = .y, user = role) %>%
dplyr::mutate(base = .y))
)
db_schema_list <- db_schema_list0 %>%
dplyr::left_join(commentaires_schema %>% dplyr::mutate(commentaire = tidyr::replace_na(commentaire, "")),
by = c("base", "nom_schema")) %>%
dplyr::mutate(display_name = paste(nom_schema, commentaire, sep = " : ") %>%
gsub(": $", "", .))
rm(db_schema_list0)
#3- Tables ## 3-1 Liste des tables accessibles pour le role
# (elle prend en argument l'index d'une ligne de db_schema_list)
lister_tables <- function(i = 1) {
datalibaba::list_tables(db = db_schema_list$base[i], schema = db_schema_list$nom_schema[i],
con = db_list[[db_schema_list$base[i]]]) %>%
tibble::enframe(name = NULL, value = "table") %>%
# ajout des colonnes 'base' et 'schema'
dplyr::cross_join(db_schema_list[i, ])
}
tb_sch_base_0 <- purrr::map_df(1:nrow(db_schema_list), .f = lister_tables) %>%
dplyr::arrange(base, nom_schema, table)
anomalies_noms_tables <- tb_sch_base_0 %>%
dplyr::filter(tolower(table) != table)
## Pour mémoire : voici comment ajouter les quotes au besoin
# mutate(table = if_else(tolower(table) != table, paste0('"', table, '"'), table)) %>%
tb_sch_base <- tb_sch_base_0 %>%
dplyr::filter(tolower(table) == table, !grepl("^zz_|^z_", table)) #, nom_schema != "culture_societe_service"
types_ref <- read.csv2(file = "../data-raw/ref_type_champs.csv")
# lecture des types des champs dans le schéma d'information de chaque base listée dans db_list
types_champs <- purrr::map_dfr(db_list, ~DBI::dbGetQuery(.x, "SELECT table_catalog AS base, table_schema AS nom_schema, table_name AS nom_table,
column_name AS nom_col, ordinal_position AS num_ordre, udt_name AS type
FROM information_schema.columns")) %>%
# on ne garde que les enregistrement correspondant à la liste tb_sch_base
dplyr::semi_join(tb_sch_base, by = c("nom_schema", "nom_table" = "table")) %>%
dplyr::arrange(base, nom_schema, nom_table, num_ordre) %>%
dplyr::left_join(types_ref, by = "type")
commentaires_tables <- purrr::map_dfr(.x = c(1:nrow(tb_sch_base)),
.f = ~(datalibaba::get_table_comments(table = tb_sch_base$table[.x], schema = tb_sch_base$nom_schema[.x],
db = tb_sch_base$base[.x], user = role) %>%
dplyr::mutate(nom_col = dplyr::coalesce(nom_col, ""), base = tb_sch_base$base[.x]))
)
tables_catalog <- dplyr::full_join(types_champs, commentaires_tables, by = c("base", "nom_schema", "nom_table", "nom_col")) %>%
dplyr::distinct() %>%
dplyr::arrange(base, nom_schema, nom_table)
base_recherche_txt <- tables_catalog %>%
dplyr::mutate(col_info = dplyr::if_else(is.na(commentaire), nom_col, paste0(commentaire, " : ", nom_col))) %>%
dplyr::group_by(base, nom_schema, nom_table) %>%
dplyr::summarise(description = paste(col_info, collapse = '\n'), .groups = "keep") %>%
dplyr::mutate(description = paste(nom_table, description, collapse = '\n', sep = "\n")) %>%
# Ajouter un identifiant unique à chaque ligne
dplyr::mutate(id = row_number()) %>%
dplyr::select(id, base, nom_schema, nom_table, description)
date_datamart <- Sys.Date() %>% format.Date("%d/%m/%Y")
tb_datamart <- c("commentaires_schema", "tables_catalog", "base_recherche_txt","bases_avec_commentaires", "commentaires_tables",
"db_schema_list", "tb_sch_base", "date_datamart") # liste des objets à inclure
nom_datamart <- "datamart_catalogue.RData" # nom du RData
save(list = tb_datamart, file = nom_datamart)
save(list = tb_datamart, file = paste0("T:/datalab/SCTE/CATALOGUE/PRODUCTION/", nom_datamart))
if(nrow(anomalies_noms_tables) > 0) {
write.csv2(x = anomalies_noms_tables,
file = paste0("T:/datalab/SCTE/CATALOGUE/DONNEES_INTERNES/anomalies_noms_tables_", Sys.Date(), ".csv"))
}