diff --git a/README.md b/README.md index 278519f6cd3a958e1570030603a64402ae209a6e..8aad2dfdefa041ad23f786a8f631d8047f3730b5 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ la connexion est prevue pour se connecter au site "https://gitlab-forge.din.deve ## But du projet : -le but principal est de d'obtenire un tableau de bord des projets des différent dépots disponible afin d'en extraire des données afin de réaliser un tableau de bord des suivie des différent projet mener par la DREAL Pays De Le Loire Datalab. +le but principal est de d'obtenir un tableau de bord des projets des différent dépots disponible afin d'en extraire des données afin de réaliser un tableau de bord des suivie des différent projet mener par la DREAL Pays De Le Loire Datalab. diff --git a/annuaire/export_pauline_2025-03.csv b/annuaire/export_pauline_2025-03.csv new file mode 100644 index 0000000000000000000000000000000000000000..c106563ee6c7b618815190afd78c079fc0490259 --- /dev/null +++ b/annuaire/export_pauline_2025-03.csv @@ -0,0 +1,13 @@ +"Civilité","Prénom","Nom","Observation","Unité","Adresse","Code postal","Ville","Messagerie","Téléphone","Mobile","Fax","Pièce","Description","Fonction hiérarchique","Fonction métier","Missions" +"Mme","Christelle","BELKACEM","Directrice de projet connaissance","DREAL Pays Loire/SCTE","5 rue Françoise Giroud CS 16326","44263","NANTES cedex 2","christelle.belkacem@developpement-durable.gouv.fr","02 72 74 74 50","06 20 58 46 22","","B 316","","","","", +"M.","Denis","DOUILLARD","","DREAL Pays Loire/SCTE/CSD","5 rue Françoise Giroud CS 16326","44263","NANTES cedex 2","denis.douillard@developpement-durable.gouv.fr","02 72 74 74 51","","","B 310","Chargé d'études","","","", +"Mme","Juliette","ENGELAERE-LEFEBVRE","Responsable du Centre de services de la donnée","DREAL Pays Loire/SCTE/CSD","5 rue Françoise Giroud CS 16326","44263","NANTES cedex 2","Juliette.Engelaere@developpement-durable.gouv.fr","02 72 74 74 55","","","A 309","","","","", +"Mme","Christine","GALLAIS-JOUADET","","DREAL Pays Loire/SCTE/CSD","5 rue Françoise Giroud CS 16326","44263","NANTES cedex 2","christine.gallais-jouadet@developpement-durable.gouv.fr","02 72 74 74 78","","","A311","Cheffe de projets géomatiques, spécialité données foncières","","","", +"M.","Franck","GASPARD","","DREAL Pays Loire/SCTE/CSD","5 rue Françoise Giroud CS 16326","44263","NANTES cedex 2","Franck.Gaspard@developpement-durable.gouv.fr","02 72 74 74 56","","","A 307","Analyste de données","","","", +"M.","Daniel","KALIOUDJOGLOU","","DREAL Pays Loire/SCTE/CSD","5 rue Françoise Giroud CS 16326","44263","NANTES cedex 2","Daniel.Kalioudjoglou@developpement-durable.gouv.fr","02 72 74 74 52","","02 74 74 74 49","A 308","Analyste de données","","","", +"M.","Matteo","LE DURAND","Apprenti","DREAL Pays Loire/SCTE/CSD","5 rue Françoise Giroud CS 16326","44263","NANTES cedex 2","matteo.le-durand@developpement-durable.gouv.fr","02 72 74 74 57","","","A 309","","","","", +"M.","Edouard","MORIN","","DREAL Pays Loire/SCTE/CSD","5 rue Françoise Giroud CS 16326","44263","NANTES cedex 2","edouard.morin@developpement-durable.gouv.fr","02 72 74 74 76","","","A310","","","","", +"Mme","Anne-Cécile","SIMON","","DREAL Pays Loire/SCTE/CSD","5 rue Françoise Giroud CS 16326","44263","NANTES cedex 2","anne-cecile.simon@developpement-durable.gouv.fr","02 72 74 74 75","","02 72 74 74 49","A313","Géomaticienne spécialiste data","","","", +"Mme","Lisa","SMAH","Apprentie","DREAL Pays Loire/SCTE/CSD","5 rue Françoise Giroud CS 16326","44263","NANTES cedex 2","lisa.smah@developpement-durable.gouv.fr","02 72 74 74 53","07 55 98 55 40","","A 300","","","","", +"M.","Philippe","TERME","Chef de projets et administrateur de données","DREAL Pays Loire/SCTE/CSD","5 rue Françoise Giroud CS 16326","44263","NANTES cedex 2","Philippe.Terme@developpement-durable.gouv.fr","02 72 74 74 83","","","A 314","Chef de projets et administrateur de données","","","", +"M.","Ronan","VIGNARD","Adjoint à la responsable du Centre de services de la donnée","DREAL Pays Loire/SCTE/CSD","5 rue Françoise Giroud CS 16326","44263","NANTES cedex 2","ronan.vignard@developpement-durable.gouv.fr","02 72 74 74 81","06 61 15 42 17","","A312","","","","", diff --git a/dev/github_extraire.R b/dev/github_extraire.R index e8763ce2f054bb9a872951c6c46f431188c7f809..0e69eb2e60568827c0f9abfb84390e371ac4e203 100644 --- a/dev/github_extraire.R +++ b/dev/github_extraire.R @@ -3,11 +3,10 @@ library(httr) library(jsonlite) library(dplyr) library(DT) +library(stringr) # Récupérer le token GitHub depuis l'environnement -extract_before_at <- function(email) { - sub("@.*", "", email) -} + # Fonction pour récupérer les commits d'un projet GitHub avec clé API get_github_commits <- function(repo) { url <- paste0("https://api.github.com/repos/", repo, "/commits") @@ -17,12 +16,12 @@ get_github_commits <- function(repo) { } commits <- fromJSON(content(response, "text"), flatten = TRUE) commits <- commits %>% - mutate(project_name = repo, # Ajouter le nom du projet + mutate(name = repo, # Ajouter le nom du projet type = "commit", # Spécifier le type comme "commit" message = commit.message, # Utiliser le message du commit - author = extract_before_at(commit.committer.email) , # Ajout de l'auteur + author = commit.committer.email , # Ajout de l'auteur updated_at = commit.committer.date) %>% # Date de la dernière mise à jour - select(project_name, type, message, updated_at,author) # Conserver les colonnes pertinentes + select(name, type, message, updated_at,author) # Conserver les colonnes pertinentes return(commits) } @@ -35,12 +34,12 @@ get_github_issues <- function(repo) { } issues <- fromJSON(content(response, "text"), flatten = TRUE) issues <- issues %>% - mutate(project_name = repo, # Ajouter le nom du projet + mutate(name = repo, # Ajouter le nom du projet type = "issue", # Spécifier le type comme "issue" message = title, # Utiliser le titre de l'issue author = user.login , # Ajout de l'auteur updated_at = updated_at) %>% # Date de la dernière mise à jour - select(project_name, type, message, updated_at,author) # Conserver les colonnes pertinentes + select(name, type, message, updated_at,author) # Conserver les colonnes pertinentes return(issues) } @@ -51,7 +50,7 @@ get_github_topics <- function(repo) { stop("Failed to fetch topics from GitHub API for repo: ", repo) } topics <- fromJSON(content(response, "text"), flatten = TRUE)$names - return(data.frame(project_name = repo, topics = paste(topics, collapse = ", "))) # Combine topics en une chaîne unique + return(data.frame(name = repo, topics = paste(topics, collapse = ", "))) # Combine topics en une chaîne unique } @@ -119,6 +118,11 @@ get_github_topics <- function(repo) { commits_analyse_spatiale <- get_github_commits(repo_analyse_spatiale) issues_analyse_spatiale <- get_github_issues(repo_analyse_spatiale) + # COGiter + repo_cogiter <- "MaelTheuliere/COGiter" + commits_cogiter <- get_github_commits(repo_cogiter) + issues_cogiter <- get_github_issues(repo_cogiter) + repos <- c( "spyrales/shinygouv", "spyrales/gouvdown", "MTES-MCT/parcours_r_module_applications_shiny", @@ -130,7 +134,8 @@ get_github_topics <- function(repo) { "MTES-MCT/parcours_r_socle_preparation_des_donnees", "MTES-MCT/savoirfR", "MTES-MCT/parcours-r", "MTES-MCT/parcours_r_module_datavisualisation", - "MTES-MCT/parcours_r_module_analyse_spatiale" + "MTES-MCT/parcours_r_module_analyse_spatiale", + "MaelTheuliere/COGiter" ) topics_list <- do.call(rbind, lapply(repos, get_github_topics)) @@ -148,11 +153,34 @@ get_github_topics <- function(repo) { commits_savoirfR, issues_savoirfR, commits_parcours_r, issues_parcours_r, commits_datavisualisation, issues_datavisualisation, - commits_analyse_spatiale, issues_analyse_spatiale + commits_analyse_spatiale, issues_analyse_spatiale, + commits_cogiter,issues_cogiter ) + + rm(commits_shinygouv, issues_shinygouv, + commits_gouvdown, issues_gouvdown, + commits_app_shiny, issues_app_shiny, + commits_ateliers_rpackage, issues_ateliers_rpackage, + commits_multi_dim, issues_multi_dim, + commits_stat_desc, issues_stat_desc, + commits_rmarkdown, issues_rmarkdown, + commits_introduction, issues_introduction, + commits_preparation_des_donnees, issues_preparation_des_donnees, + commits_savoirfR, issues_savoirfR, + commits_parcours_r, issues_parcours_r, + commits_datavisualisation, issues_datavisualisation, + commits_analyse_spatiale, issues_analyse_spatiale, + commits_cogiter,issues_cogiter + ) + combined_data <- combined_data %>% - left_join(topics_list, by = "project_name") %>% - mutate(origine = "Github") %>% + left_join(topics_list, by = "name") %>% + mutate(origine = "Github", + groupe = case_when( + str_detect(name, "parcours_r") ~ "r-formation", + str_detect(name, "spyrales") ~ "spyrales", + TRUE ~ "autre" + )) %>% unique() # Afficher les premières lignes du tableau combiné diff --git a/dev/gitlab_classique.R b/dev/gitlab_classique.R index bfb98f42cea725322ebdadaec9ef0b17d855d3f5..780fde61e0d0cfc4c15a1f36ed26806dcf84190d 100644 --- a/dev/gitlab_classique.R +++ b/dev/gitlab_classique.R @@ -7,8 +7,34 @@ library(gitlabr) set_gitlab_connection( gitlab_url = "https://gitlab.com" , - private_token = Sys.getenv("GITLAB_PAT") + private_token = Sys.getenv("GITLAB_COM_TOKEN") ) +response <- data.frame() +# on charge les projets du groupe CSD +buildres <- function(i){ + res_proj100 <- GET( + "https://gitlab.com/api/v4/groups/6567080/projects", + add_headers(`PRIVATE-TOKEN` = Sys.getenv("GITLAB_COM_TOKEN")), + query = list( + include_subgroups = "true", + per_page = 100, + page = i + ), + timeout(120) # Timeout augmenté pour les projets + ) + res <- content(res_proj100, as = "text", encoding = "UTF-8") %>% fromJSON() + return(res) +} +#on réalise une boucle si plus de 100 projets +x <- 1 +repeat { + res <- buildres(x) + if (length(res) == 0) { + break + } + response <- bind_rows(response, res) + x <- x + 1 +} project_ids <- c(41600697, 19859695, 49118792, 21138017, 44145525, 18441361, 17610613,18439010) get_project_name <- function(base_url, project_id, private_token) { @@ -27,12 +53,12 @@ get_project_name <- function(base_url, project_id, private_token) { # Boucle pour récupérer tous les noms de projets name_gitlab <- bind_rows(lapply(project_ids, get_project_name, base_url = "https://gitlab.com", - private_token = Sys.getenv("GITLAB_PAT"))) + private_token = Sys.getenv("GITLAB_COM_TOKEN"))) name_gitlab$id <- as.character(name_gitlab$id) # Variables pour l'API GitLab base_url <- "https://gitlab.com" -private_token <- Sys.getenv("GITLAB_PAT") # Récupération du token depuis Renviron +private_token <- Sys.getenv("GITLAB_COM_TOKEN") # Récupération du token depuis Renviron # Fonction pour gérer la pagination de l'API GitLab avec une limite stricte à 160 pages, en vérifiant la présence de pages suivantes @@ -103,10 +129,7 @@ get_all_pages <- function(url, private_token, max_retries = 3, max_pages = 159) -# Fonction pour extraire le nom du projet à partir de l'URL -extract_project_name <- function(web_url) { - str_extract(web_url, "(?<=/)[^/]+(?=/-/)") -} + # Fonction pour récupérer les issues d'un projet GitLab get_gitlab_issues <- function(base_url, project_id, private_token) { @@ -114,10 +137,10 @@ get_gitlab_issues <- function(base_url, project_id, private_token) { data <- get_all_pages(api_url, private_token) data <- data %>% mutate( - project_name = extract_project_name(web_url), + project_name = as.character(project_id), type = "issue", message = title, - author = author.username + author = paste0(author.username,"@developpement-durable.gouv.fr") ) return(data %>% select(project_name, type, message, updated_at, author)) } @@ -129,16 +152,28 @@ get_gitlab_events <- function(base_url, project_id, private_token) { data <- data %>% mutate( project_name = as.character(project_id), - type = if_else((is.null(target_type) | target_type == "") | as.character(target_type) == "Issue" | as.character(action_name) == "joined", # remplecer par | as.ch(target-type)=="Issue" - as.character(action_name), - as.character(target_type)), - message = note.body, + + type = case_when( + is.null(target_type) | target_type == "" ~ as.character(action_name), + as.character(action_name) == "joined" ~ as.character(action_name), + TRUE ~ as.character(target_type) + ), + + message = case_when( + # Cas spécifique : MergeRequest avec action_name == "accepted" + as.character(target_type) == "MergeRequest" & as.character(action_name) == "accepted" ~ as.character(target_title), + + # Cas général : autres MergeRequest ou WikiPage::Meta + as.character(target_type) %in% c("WikiPage::Meta") ~ as.character(target_title), + + # Si ce n'est pas un cas spécial, garder note.body + TRUE ~ as.character(note.body) + ), + updated_at = created_at, - author = author.username - ) %>% - left_join(name_gitlab, by = c("project_name" = "id")) %>% - mutate(project_name = name) %>% - select(-name) + author = paste0(author.username,"@developpement-durable.gouv.fr") + ) + return(data %>% select(project_name, type, message, updated_at, author)) } @@ -157,30 +192,6 @@ get_data_from_multiple_projects <- function(base_url, project_ids, private_token return(bind_rows(all_data)) } -# Topics -get_project_topics <- function(base_url, project_id, private_token) { - api_url <- paste0(base_url, "/api/v4/projects/", project_id) - response <- GET(api_url, add_headers("PRIVATE-TOKEN" = private_token)) - - if (status_code(response) == 200) { - project_info <- fromJSON(content(response, "text", encoding = "UTF-8"), flatten = TRUE) - return(data.frame( - id = as.character(project_info$name), - topics = paste(project_info$topics, collapse = ", "), # Combine les topics en une chaîne de caractères - stringsAsFactors = FALSE - )) - } else { - warning("Impossible de récupérer les topics pour le projet ID: ", project_id, " - Code: ", status_code(response)) - return(data.frame( - id = project_id, - topics = NA, - stringsAsFactors = FALSE - )) - } -} -extract_before_at <- function(email) { - sub("@.*", "", email) -} ############################################################### ----- # Fonction générique process_projects <- function(project_ids) { @@ -211,16 +222,12 @@ process_projects <- function(project_ids) { final_result <- process_projects(project_ids) final_result$project_id <- as.character(final_result$project_id) -final_result <- final_result %>% left_join(name_gitlab, by = c("project_id" = "id")) %>% - mutate(project_name = name, type = "commit") %>% - select(-name) -final_result <- final_result %>% mutate(author = extract_before_at(committer_email)) -final_result <- final_result %>% rename( "updated_at" = 'committed_date' ) + +final_result <- final_result %>% mutate(author = committer_email, type = "commit") +final_result <- final_result %>% rename( "updated_at" = 'committed_date',"project_name" = 'project_id' ) fg <- final_result %>% filter(is_duplicate == FALSE) %>% select(project_name,type , message ,updated_at,author) ############################################################### -# Récupérer les topics pour tous les projets -projects_topics <- bind_rows(lapply(project_ids, get_project_topics, base_url = base_url, private_token = private_token)) # Récupérer les données pour tous les projets all_data_gitlab <- get_data_from_multiple_projects(base_url, project_ids, private_token) @@ -229,6 +236,20 @@ all_data_gitlab <- all_data_gitlab %>% all_data_gitlab <- bind_rows(fg,all_data_gitlab) all_data_gitlab <- all_data_gitlab %>% mutate(origine ="Gitlab") -all_data_gitlab <- left_join(all_data_gitlab, projects_topics, by = c("project_name" = "id")) + +#data.frame avec les groupes et le nom pour left join----- +response_df <- data.frame( + id = as.character(response$id), + name = response$name, + groupe = response$namespace$name, + topics = sapply(response$topics, function(x) paste(x, collapse = ", ")) # on degroupe et regroupe ?? mais ca fonctionne +) +# Joindre les deux jeux de données +all_data_gitlab <- all_data_gitlab %>% + left_join(response_df, by = c("project_name" = "id")) # Faire un premier left_join() sur la colonne name +all_data_gitlab$author <- tolower(all_data_gitlab$author) + +# sauvegarde du jeu de donnée----- +all_data_gitlab <- all_data_gitlab %>% select(name , type, message , updated_at , author , topics, origine,groupe) # save.image("gitlab.RData") save(all_data_gitlab, file = "gitlab.RData") diff --git a/dev/gitlab_forge.R b/dev/gitlab_forge.R index 87158a13962e18fab81a3ccffb98583da5e7496c..bb3396efd6b8e1031ff23f32993cca34ce17a705 100644 --- a/dev/gitlab_forge.R +++ b/dev/gitlab_forge.R @@ -7,7 +7,7 @@ library(gitlabr) set_gitlab_connection( gitlab_url = "https://gitlab-forge.din.developpement-durable.gouv.fr" , - private_token = Sys.getenv("GITLAB_COM_TOKEN") + private_token = Sys.getenv("GITLAB_PAT") ) response <- data.frame() @@ -15,7 +15,7 @@ response <- data.frame() buildres <- function(i){ res_proj100 <- GET( "https://gitlab-forge.din.developpement-durable.gouv.fr/api/v4/groups/1013/projects", - add_headers(`PRIVATE-TOKEN` = Sys.getenv("GITLAB_COM_TOKEN")), + add_headers(`PRIVATE-TOKEN` = Sys.getenv("GITLAB_PAT")), query = list( include_subgroups = "true", per_page = 100, @@ -38,7 +38,7 @@ repeat { } # response <- GET( # "https://gitlab-forge.din.developpement-durable.gouv.fr/api/v4/groups/1013/projects", -# add_headers(`PRIVATE-TOKEN` = Sys.getenv("GITLAB_COM_TOKEN")), +# add_headers(`PRIVATE-TOKEN` = Sys.getenv("GITLAB_PAT")), # query = list( # include_subgroups = "true", # per_page = 1000 @@ -68,12 +68,12 @@ get_project_name <- function(base_url, project_id, private_token) { # Boucle pour récupérer tous les noms de projets name_gitlab <- bind_rows(lapply(project_ids, get_project_name, base_url = "https://gitlab-forge.din.developpement-durable.gouv.fr", - private_token = Sys.getenv("GITLAB_COM_TOKEN"))) + private_token = Sys.getenv("GITLAB_PAT"))) name_gitlab$id <- as.character(name_gitlab$id) # Variables pour l'API GitLab base_url <- "https://gitlab-forge.din.developpement-durable.gouv.fr" -private_token <- Sys.getenv("GITLAB_COM_TOKEN") # Récupération du token depuis Renviron +private_token <- Sys.getenv("GITLAB_PAT") # Récupération du token depuis Renviron # Fonction pour gérer la pagination de l'API GitLab avec une limite stricte à 160 pages, en vérifiant la présence de pages suivantes @@ -155,10 +155,10 @@ get_gitlab_issues <- function(base_url, project_id, private_token) { data <- get_all_pages(api_url, private_token) data <- data %>% mutate( - project_name = extract_project_name(web_url), + project_name = as.character(project_id), type = "issue", message = as.character(title), - author = author.username + author = paste0(author.username,"@developpement-durable.gouv.fr") ) return(data %>% select(project_name, type, message, updated_at, author)) } @@ -170,14 +170,28 @@ get_gitlab_events <- function(base_url, project_id, private_token) { data <- data %>% mutate( project_name = as.character(project_id), - type = if_else((is.null(target_type) | target_type == "") | as.character(target_type) == "Issue" | as.character(action_name) == "joined", # remplecer par | as.ch(target-type)=="Issue" - as.character(action_name), - as.character(target_type)), - message = ifelse(as.character(target_type) == "WikiPage::Meta",as.character(target_title),as.character(note.body)), + + type = case_when( + is.null(target_type) | target_type == "" ~ as.character(action_name), + as.character(action_name) == "joined" ~ as.character(action_name), + TRUE ~ as.character(target_type) + ), + + message = case_when( + # Cas spécifique : MergeRequest avec action_name == "accepted" + as.character(target_type) == "MergeRequest" & as.character(action_name) == "accepted" ~ as.character(target_title), + + # Cas général : autres MergeRequest ou WikiPage::Meta + as.character(target_type) %in% c("WikiPage::Meta") ~ as.character(target_title), + + # Si ce n'est pas un cas spécial, garder note.body + TRUE ~ as.character(note.body) + ), + updated_at = created_at, - author = author.username - ) %>% - left_join(name_gitlab, by = c("project_name" = "id")) %>% + author = paste0(author.username,"@developpement-durable.gouv.fr") + )%>% + # left_join(name_gitlab, by = c("project_name" = "id")) %>% mutate(project_name = name) %>% select(-name) return(data %>% select(project_name, type, message, updated_at, author)) @@ -198,29 +212,6 @@ get_data_from_multiple_projects <- function(base_url, project_ids, private_token return(bind_rows(all_data)) } -get_project_topics <- function(base_url, project_id, private_token) { - api_url <- paste0(base_url, "/api/v4/projects/", project_id) - response <- GET(api_url, add_headers("PRIVATE-TOKEN" = private_token)) - - if (status_code(response) == 200) { - project_info <- fromJSON(content(response, "text", encoding = "UTF-8"), flatten = TRUE) - return(data.frame( - id = as.character(project_info$name), - topics = paste(project_info$topics, collapse = ", "), # Combine les topics en une chaîne de caractères - stringsAsFactors = FALSE - )) - } else { - warning("Impossible de récupérer les topics pour le projet ID: ", project_id, " - Code: ", status_code(response)) - return(data.frame( - id = project_id, - topics = NA, - stringsAsFactors = FALSE - )) - } -} -extract_before_at <- function(email) { - sub("@.*", "", email) -} ########################################################### process_projects <- function(project_ids) { result_list <- lapply(project_ids, function(project_id) { @@ -251,23 +242,33 @@ process_projects <- function(project_ids) { final_result <- process_projects(project_ids) final_result$project_id <- as.character(final_result$project_id) -final_result <- final_result %>% left_join(name_gitlab, by = c("project_id" = "id")) %>% - mutate(project_name = name, type = "commit") %>% - select(-name) -final_result <- final_result %>% mutate(author = extract_before_at(committer_email)) -final_result <- final_result %>% rename( "updated_at" = 'committed_date' ) + +final_result <- final_result %>% mutate(author = committer_email,type = "commit") +final_result <- final_result %>% rename( "updated_at" = 'committed_date', "project_name" = 'project_id' ) ff <- final_result %>% filter(is_duplicate == FALSE) %>% select(project_name,type , message ,updated_at,author) ########################################################### -# Récupérer les topics pour tous les projets -projects_topics <- bind_rows(lapply(project_ids, get_project_topics, base_url = base_url, private_token = private_token)) - # Récupérer les données pour tous les projets all_data_forge <- get_data_from_multiple_projects(base_url, project_ids, private_token) + all_data_forge <- all_data_forge %>% filter(!(is.na(all_data_forge$type) | is.na(all_data_forge$message)) | all_data_forge$type == "joined"|all_data_forge$type == "WikiPage::Meta") all_data_forge <- bind_rows(ff,all_data_forge) all_data_forge <- all_data_forge %>% mutate(origine ="Gitlab_Forge") -all_data_forge <- left_join(all_data_forge, projects_topics, by = c("project_name" = "id")) + +#data.frame avec les groupes et le nom pour left join----- +response_df <- data.frame( + id = as.character(response$id), + name = response$name, + groupe = response$namespace$name, + topics = sapply(response$topics, function(x) paste(x, collapse = ", ")) # on degroupe et regroupe ?? mais ca fonctionne +) +# Joindre les deux jeux de données +all_data_forge <- all_data_forge %>% + left_join(response_df, by = c("project_name" = "id")) # Faire un premier left_join() sur la colonne name +all_data_forge$author <- tolower(all_data_forge$author) + +# sauvegarde du jeu de donnée----- +all_data_forge <- all_data_forge %>% select(name , type, message , updated_at , author , topics, origine,groupe) save(all_data_forge, file = "gitlab_forge.RData") diff --git a/dev/script_routine.R b/dev/script_routine.R index e145d0307b409c001c8e00edf2e7c998afda9bfd..637455f1636e4c564de9c144a669116f5e34ee10 100644 --- a/dev/script_routine.R +++ b/dev/script_routine.R @@ -1,10 +1,22 @@ Sys.setenv("HTTP_PROXY" = "http://pfrie-std.proxy.e2.rie.gouv.fr:8080") Sys.setenv("HTTPS_PROXY" = "http://pfrie-std.proxy.e2.rie.gouv.fr:8080") +Sys.setenv("NO_PROXY" = "127.0.0.1,localhost,.i2,.rie.gouv.fr,192.168.,10.,172.") + setwd("T:\\datalab\\SCTE_CSD\\dataviz_gitlabr") +library(stringr) +library(lubridate) # etape de mise à jour des données sur T------------- source(file = "dev/github_extraire.R") source(file = "dev/gitlab_classique.R") source(file = "dev/gitlab_forge.R") source(file = "dev/deploiment_sur_dataviz.R", verbose = TRUE) + +path_annuaire <- paste0("annuaire/export_pauline_", str_sub(today(), 1, 7), ".csv") +if (!file.exists(path_annuaire)) { + unlink("annuaire", recursive = TRUE) + dir.create("annuaire") + download.file(url = "http://annuaire.e2.rie.gouv.fr/index.php?vue=exportcsv&dn=ou=CSD,ou=SCTE,ou=DREAL%20Pays%20Loire,ou=DR,ou=melanie", + destfile = path_annuaire, method = "libcurl") +} diff --git a/global.R b/global.R index d35e2cbf44e6982971b1f5d0700edaff8302e403..0e42336652986fb3d1617b92a0ee0a06fd4342d5 100644 --- a/global.R +++ b/global.R @@ -4,7 +4,7 @@ library(stringr) library(gitlabr) library(DT) library(shiny) -library(dplyr) +library(tidyverse) library(ggplot2) library(plotly) # remotes::install_github("spyrales/shinygouv") @@ -20,25 +20,30 @@ library(bizdays) library(shinyWidgets) library(textclean) library(stringi) +library(readr) +library(fuzzyjoin) # chargement des données (résultat du script "dev/script_chargement_rdata.R") load("github.RData") load("gitlab_forge.RData") load("gitlab.RData") load("date_MAJ.RData") + +annuaire <- read_csv(path_annuaire,col_types = "c") +annuaire$Messagerie <- tolower(annuaire$Messagerie) +annuaire <- annuaire %>% rename( mail = Messagerie) +annuaire <- annuaire %>% filter( str_detect(Unité,"CSD" ) ) +annuaire <- annuaire %>% mutate(auteur = paste(Prénom, Nom), + mail_clean = str_remove(mail, "@.*") )%>% select(mail_clean, auteur) + all_data <- bind_rows(all_data_gitlab, all_data_forge, combined_data) all_data$updated_at <- as.POSIXct(all_data$updated_at, format = ("%Y-%m-%dT%H:%M:%S")) -all_data <- all_data %>% mutate(message = paste0(type,sep = " : ",message), - categorie = case_when( - str_detect(project_name, "parcours_r") ~ "r-formation", - str_detect(project_name, "spyrales") ~ "spyrales", - TRUE ~ "autre" - )) -# transformation author ---- -all_data$author <- all_data$author %>% - tolower() %>% - stri_trans_general("Latin-ASCII") %>% - str_replace_all("[^a-z ]", "") +all_data <- all_data %>% mutate(email_clean = str_remove(author, "@.*")) +all_data <- all_data %>% stringdist_left_join(annuaire,by = c("email_clean" = "mail_clean"), method = "jw", max_dist = 0.265) # ajout de auteur des membre du SCTE via l'annuaire, 75% des auteurs sont du csd + +all_data <- all_data %>% dplyr::mutate(etiquette = paste(groupe,topics,sep = ",")) +all_data <- all_data %>% mutate(message = paste(type,sep = " : ",message,auteur), + auteur = replace_na(auteur,"autre")) # transformation re_code ---- traduction <- c( @@ -51,10 +56,11 @@ traduction <- c( "WikiPage::Meta" = "Documentation", "joined" = "Gestion de projet" , "WorkItem" = "Gestion de projet" , - "MergeRequest" = "Code" , + "MergeRequest" = "Code" , # ajouter un "item" merge ? "Milestone" = "Gestion de projet" ) all_data <- all_data %>% dplyr::mutate(re_code = dplyr::recode(type , !!!traduction)) +all_data <- all_data %>% dplyr::rename(project_name = name) #transformation message all_data$message <- all_data$message %>% tolower() diff --git a/server.R b/server.R index 90e2c87d46b24bc2179e0858e4943cd631b397b2..9a7b8cc9c151f6672e25282c45b174a8ab8614f3 100644 --- a/server.R +++ b/server.R @@ -1,118 +1,258 @@ server <- function(input, output, session) { + # Stocker les valeurs réactives r <- reactiveValues( - filteredData = all_data, - filteredByProject = all_data, # Étape intermédiaire filtrée par daterange et project_name - filteredTopics = NULL, - filteredProjects = NULL, - filteredAuthors = NULL, - filteredReCodes = NULL, - filteredCategories = NULL, - filteredByDate = NULL, + filteredByDate = NULL, # Filtrage uniquement par date + filteredData = NULL # Filtrage final après application de tous les filtres + ) - # Filtrage par date et project_name uniquement + # 1️⃣ Filtrage prioritaire par date ---- observeEvent(input$daterange, { - req(input$daterange) + req(input$daterange) # Vérifier que la sélection est valide + + # Filtrage initial sur la plage de dates r$filteredByDate <- all_data %>% filter( updated_at >= as.Date(input$daterange[[1]]) & - updated_at <= as.Date(input$daterange[[2]]) & - (if (length(input$project_name)) project_name %in% input$project_name else TRUE) + updated_at <= as.Date(input$daterange[[2]]) ) - updateFilters() # Mise à jour des choix basés sur le filtrage intermédiaire + # Mise à jour dynamique des autres filtres en fonction des données filtrées par date + updateFilters() }) - # Mise à jour des autres filtres + # 2️⃣ Mise à jour dynamique des filtres ---- + updateFilters <- reactive({ + req(r$filteredByDate) # Vérifier que les données sont disponibles + isolate({ + updateSelectizeInput(session, "project_name", + choices = unique(r$filteredByDate$project_name), + selected = input$project_name) + + updateSelectizeInput(session, "etiquette", + choices = r$filteredByDate$etiquette %>% + strsplit(split = ",") %>% + unlist() %>% + na.omit() %>% + .[. != ""] %>% + trimws() %>% + unique(), + selected = input$etiquette) + + updateSelectizeInput(session, "auteur", + choices = unique(r$filteredByDate$auteur), + selected = input$auteur) + + updateSelectizeInput(session, "categorie", + choices = unique(r$filteredByDate$type), + selected = input$categorie) + + updateCheckboxGroupInput_dsfr( inputId = "re_code", + label = "Type d'évènement :", + choices = unique(r$filteredByDate$re_code), + selected = input$re_code, + inline = TRUE) + }) + }) + # 3️⃣ Filtrage final en fonction des autres filtres ---- observe({ - req(input$daterange) + req(r$filteredByDate) # Vérifier que les données de base sont filtrées par date + r$filteredData <- r$filteredByDate %>% filter( - (if (length(input$topics)) sapply(input$topics, function(t) grepl(t, topics)) %>% rowSums() > 0 else TRUE) & - (if (length(input$author)) author %in% input$author else TRUE) & + (if (length(input$project_name)) project_name %in% input$project_name else TRUE) & + (if (length(input$etiquette)) sapply(input$etiquette, function(t) grepl(t, etiquette)) %>% rowSums() > 0 else TRUE) & + (if (length(input$auteur)) auteur %in% input$auteur else TRUE) & (if (length(input$re_code)) re_code %in% input$re_code else TRUE) & - (if (length(input$categorie)) categorie %in% input$categorie else TRUE) + (if (length(input$categorie)) type %in% input$categorie else TRUE) ) - updateFilters() - }) - # Fonction de mise à jour des choix pour les filtres - updateFilters <- reactive({ - # Basé sur les données filtrées par date et projet - updateSelectizeInput(session, "topics", choices = unique(r$filteredByDate$topics)%>% - strsplit(split = ",") %>% # Divise les chaînes en éléments séparés - unlist() %>% # Aplatit la liste obtenue - na.omit() %>% # Supprime les NA (au cas où) - .[. != ""] %>% # Supprime les chaînes vides - trimws() %>% # suppremie les espace avnt et apres - unique() , selected = input$topics) - updateSelectizeInput(session, "author", choices = unique(r$filteredByDate$author), selected = input$author) - updateSelectizeInput(session, "categorie", choices = unique(r$filteredByDate$categorie), selected = input$categorie) - - # Basé uniquement sur les données globales pour éviter que project_name soit affecté - updateSelectizeInput(session, "project_name", choices = sort(unique(r$filteredByDate$project_name)), selected = input$project_name) + # Mettre à jour les filtres restants + updateFilters() }) - # Réinitialisation des filtres + # 4️⃣ Réinitialisation des filtres ---- observeEvent(input$reset, { r$filteredByDate <- all_data %>% - filter(updated_at >= as.Date(input$daterange[[1]]) & - updated_at <= as.Date(input$daterange[[2]])) - r$filteredData <- r$filteredByDate - updateSelectizeInput(session, "topics", selected = NULL) - updateSelectizeInput(session, "author", selected = NULL) + filter( + updated_at >= as.Date(input$daterange[[1]]) & + updated_at <= as.Date(input$daterange[[2]]) + ) + r$filteredData <- r$filteredByDate # Reset total + updateSelectizeInput(session, "project_name", selected = NULL) + updateSelectizeInput(session, "etiquette", selected = NULL) + updateSelectizeInput(session, "auteur", selected = NULL) updateSelectizeInput(session, "categorie", selected = NULL) + updateCheckboxGroupInput_dsfr( "re_code", selected = NULL,inline = TRUE) updateFilters() }) - output$lien <- renderUI({ - projects_with_links <- r$filteredData %>% - filter(origine == "Gitlab_Forge" & project_name %in% input$project_name) - - if (nrow(projects_with_links) == 0) { - return(NULL) # Aucun lien à afficher - } - - urls <- paste0("https://gitlab-forge.din.developpement-durable.gouv.fr/dreal-pdl/csd/", projects_with_links$project_name) - links <- lapply(seq_along(unique(projects_with_links$project_name)), function(i) { - a(href = urls[i], target = "_blank", projects_with_links$project_name[i]) - }) - do.call(tagList, links) - }) - # Graphique interactif + # 5️⃣ Graphique interactif ---- output$filteredPlot2 <- plotly::renderPlotly({ - req(r$filteredData) + req(r$filteredData) # Vérifier qu'on a des données filtrées p <- ggplot2::ggplot(r$filteredData, ggplot2::aes(x = updated_at, y = as.factor(project_name))) + ggplot2::geom_point(ggplot2::aes(color = re_code, text = paste(updated_at, message, sep = "\n")), size = 3, alpha = 0.5) + ggplot2::labs(shape = "Événement", color = "Événement", x = "") + ggplot2::scale_x_datetime(timezone = "Europe/Paris") + - gouvdown::theme_gouv()+ + gouvdown::theme_gouv() + ggplot2::theme( axis.text.x = ggplot2::element_text(angle = 0, hjust = 1), axis.title.y = ggplot2::element_blank() ) - plotly::ggplotly(p, tooltip = "text") + plotly::ggplotly(p, tooltip = "text", dynamicTicks = TRUE) %>% + plotly::config(locale = "fr", displaylogo = FALSE) }) - - output$table <- renderDataTable({ + # 6️⃣ Graphique des indicateurs ---- + output$bar_chart <- renderPlotly({ req(r$filteredData) - datatable(r$filteredData , - options = list( - pageLength = 10, # Nombre de lignes affichées par page - lengthMenu = c(5, 10, 25, 50), # Choix du nombre de lignes - autoWidth = TRUE, # Ajuste automatiquement la largeur des colonnes - scrollX = TRUE, # Active le défilement horizontal - class = "display", # Ajoute du style CSS - rownames = FALSE, # Supprime les numéros de ligne, - searchHighlight = TRUE - ) + + percent_time <- as.numeric(max(r$filteredData$updated_at) - min(r$filteredData$updated_at)) / + as.numeric(max(all_data$updated_at) - min(all_data$updated_at)) * 100 + + # Création du dataset + data <- data.frame( + dataset = c("Commit", "Projet", "Heure"), + total = c(sum(r$filteredData$type == "commit"), + length(unique(r$filteredData$project_name)), + percent_time) ) + plotly::ggplotly( + ggplot(data, aes(x = dataset, y = total, fill = dataset)) + + geom_bar(stat = "identity") + + coord_flip() + + scale_fill_manual(values = c("#3498db", "#e74c3c", "#2ecc71")) + + labs( + x = "Comparaison", + y = "Pourcentage / Nombre", + title = "Comparaison des Commits, Projets et Temps" + ) + + theme_minimal() + + theme(legend.position = "none") + + geom_text( + aes(label = paste0(round(total, 1),"%")), + hjust = -0.2 + ) + ) %>% + plotly::config(locale = "fr", + displaylogo = FALSE) }) + output$nb_projet <- renderText({ + req(r$filteredData) + paste("Nombre de projets visualisés",length(unique(r$filteredData$project_name)),sep = " : ") + }) + output$nb_commit <- renderText({ + req(r$filteredData) + paste("Nombre de commits",sum(r$filteredData$type == "commit"),sep = " : ") + }) + output$nb_temps <- renderText({ + req(r$filteredData) + paste("Estimation du temps passé sur la période :",sep = " ",round(difftime( + max(r$filteredData$updated_at) , min(r$filteredData$updated_at),units = "hours")), " heures") + }) + + + # 7️⃣ Table des résultats ---- + output$table <- renderDT({ + req(r$filteredData) + datatable(r$filteredData, options = list( + pageLength = 10, + lengthMenu = c(5, 10, 25, 50), + autoWidth = TRUE, + scrollX = TRUE, + class = "display", + rownames = FALSE, + searchHighlight = TRUE + )) + }) + + # 8️⃣ Graphique des indicateurs ----- + + output$min_value <- renderText({ + req(input$project_name) # Vérifie que l'input existe + + # Nombre de projets sélectionnés + selected_projects <- length(unique(input$project_name)) + + # Condition : Si 1 seul projet est sélectionné + if (selected_projects == 1) { + # Filtrer les données pour ce projet spécifique + projet_data <- all_data %>% + filter(project_name == input$project_name) + + min_value <- min(projet_data$updated_at, na.rm = TRUE) + + paste("Date de création du projet : ", format(min_value, "%d %B %Y, %H:%M")) + } else { + "Sélectionnez un seul projet pour voir la date de création." + } + }) + + + output$temps <- renderText({ + req(input$project_name) # Vérifie que l'input existe + + # Nombre de projets sélectionnés + selected_projects <- length(unique(input$project_name)) + # Condition : Si 1 seul projet est sélectionné + if (selected_projects == 1) { + # Filtrer les données pour ce projet spécifique + projet_data <- all_data %>% + filter(project_name == input$project_name) + + temps <- difftime(max(projet_data$updated_at), min(projet_data$updated_at), units = "secs") + + jours <- as.numeric(temps, units = "days") %/% 1 + heures <- (as.numeric(temps, units = "hours") %% 24) %/% 1 + minutes <- (as.numeric(temps, units = "mins") %% 60) %/% 1 + print(temps) + paste("Estimation du temps passé : ", jours, "jours,", heures, "heures,", minutes, "minutes") + } else { + "Sélectionnez un seul projet pour voir l'estimation." + } + }) + + output$bar <- renderPlot({ + req(input$project_name) # Vérifie que l'input existe + + selected_projects <- length(unique(input$project_name)) + if (selected_projects == 1) { + # Filtrer les données pour le project_name sélectionné + filtered_data <- all_data %>% + filter(project_name == input$project_name) %>% + mutate(month = format(as.Date(updated_at), "%Y-%m")) + # Définir la plage de mois spécifique au projet sélectionné + first_month <- floor_date(min(as.Date(filtered_data$updated_at)), "month") + last_month <- ceiling_date(max(as.Date(filtered_data$updated_at)), "month") + # Générer tous les mois entre le premier et le dernier + full_months <- seq(first_month, last_month, by = "month") %>% + tibble(month = format(., "%Y-%m")) + # Compter les entrées par mois et compléter les mois manquants + complete_data <- full_months %>% + left_join(filtered_data %>% count(month), by = "month") %>% + replace_na(list(n = 0)) + + # Créer le bar plot + ggplot(complete_data, aes(x = month, y = n)) + + geom_bar(stat = "identity", fill = "#ff7f27", width = 0.7) + + theme_minimal() + + scale_x_discrete( + name = "Mois", + labels = function(x) format(as.Date(paste0(x, "-01")), "%B %Y") + ) + + labs(y = "Nombre d'entrées") + + gouvdown::theme_gouv() + + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + }else{ + ggplot() + + annotate("text", x = 1, y = 1, label = "Sélectionnez un seul projet pour voir la timeline", size = 5) + + theme_void() + } + }) } diff --git a/ui.R b/ui.R index 5411578aa0bd77e044556d0ddd311d30b0ad0f28..7fc73230eb9f5734b88dc28b10d051a58fb8cf76 100644 --- a/ui.R +++ b/ui.R @@ -4,7 +4,7 @@ ui <- shinygouv::navbarPage_dsfr( id = "nav", header = shinygouv::header_dsfr( intitule = c("DREAL", "Pays de la Loire"), - nom_site_service = "Tableau de bord GitLab-Forge", + nom_site_service = "Visualisation des projets du CSD gérés avec git", baseline = "Les Projets du Centre de Services de la Donnée" ), footer = footer_dsfr( @@ -16,8 +16,7 @@ ui <- shinygouv::navbarPage_dsfr( ), # First tab Projet --------- shinygouv::navbarPanel_dsfr( - title = "Graphique des projets par durée", - + title = "Graphique par période", htmltools::tags$head(tags$style(".fr-container { max-width: calc(100% - 2rem); }")), shinygouv::fluidRow_dsfr( shinygouv::column_dsfr( @@ -33,18 +32,9 @@ ui <- shinygouv::navbarPage_dsfr( shinygouv::column_dsfr( 3, selectizeInput( - inputId = "project_name", - label = "Projet : ", - choices = NULL, - multiple = TRUE - ),extra_class = "fr-mt-6w" - ), - column_dsfr( - 3, - selectizeInput( - inputId = "topics", + inputId = "etiquette", label = "Étiquette :", - choices = all_data$topics %>% + choices = all_data$etiquette %>% strsplit(split = ",") %>% # Divise les chaînes en éléments séparés unlist() %>% # Aplatit la liste obtenue na.omit() %>% # Supprime les NA (au cas où) @@ -53,14 +43,25 @@ ui <- shinygouv::navbarPage_dsfr( unique() , # Extrait les éléments uniques multiple = TRUE - ),extra_class = "fr-mt-6w" + ), + extra_class = "fr-mt-6w" + ), + column_dsfr( + 3, + selectizeInput( + inputId = "project_name", + label = "Sélectionnez projet : ", + choices = NULL, + multiple = TRUE + ), + extra_class = "fr-mt-6w" ), column_dsfr( 3, selectizeInput( - inputId = "author", + inputId = "auteur", label = "Auteur :", - choices = unique(str_to_title(replace_non_ascii(all_data$author))), + choices = unique(str_to_title(replace_non_ascii(all_data$auteur))), selected = NULL, multiple = TRUE ),extra_class = "fr-mt-6w" @@ -83,12 +84,8 @@ ui <- shinygouv::navbarPage_dsfr( 4, selectizeInput( inputId = "categorie", - label = "Groupe: ", - choices = c( - "Autre" = "tous", - "Spyrales" = "spyrale", - "R Formation" = "github" - ), + label = " Évenement (précis) ", + choices = unique(all_data$type), selected = NULL, multiple = TRUE ) @@ -106,24 +103,35 @@ ui <- shinygouv::navbarPage_dsfr( plotly::plotlyOutput("filteredPlot2", height = "700px"), extra_class = "fr-my-1w" ) - ) ),# tab graphique par choix ↑ tabPanel_dsfr( id = "indicateur" , title = "Indicateurs", shinygouv::fluidRow_dsfr( - shinygouv::column_dsfr(12) + column_dsfr(3, + textOutput("nb_projet"), + textOutput("nb_commit"), + textOutput("nb_temps"), extra_class = "fr-my-1w" + ), + shinygouv::column_dsfr(9, + plotlyOutput("bar_chart"), + extra_class = "fr-my-1w"), + + column_dsfr(12,textOutput("min_value")), + column_dsfr(12,textOutput("temps")), + column_dsfr(12,plotOutput("bar"), + extra_class = "fr-my-1w") ) - ), + ),# tab indicateur par choix ↑ tabPanel_dsfr( - id = "table" , - title = "tableau", + id = "Table" , + title = "Table", shinygouv::fluidRow_dsfr( shinygouv::column_dsfr(12, - DT::dataTableOutput("table")) - ) + DTOutput('table')) ) + )# tab table par choix ↑ )#↑ tableau_proj )# ↑ Navbar )# ↑ UI