From a905a4522cec012721b27cda27cc9b254269596d Mon Sep 17 00:00:00 2001 From: LE DURAND Matteo <matteo.le-durand@developpement-durable.gouv.fr> Date: Fri, 14 Mar 2025 16:18:28 +0100 Subject: [PATCH] les groupes, les topics sont automatiques et le noms des projets aussi , attention les auteurs sont encore sous formes de mail --- dev/gitlab_classique.R | 95 ++++++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 45 deletions(-) diff --git a/dev/gitlab_classique.R b/dev/gitlab_classique.R index cd71b90..780fde6 100644 --- a/dev/gitlab_classique.R +++ b/dev/gitlab_classique.R @@ -9,6 +9,32 @@ set_gitlab_connection( gitlab_url = "https://gitlab.com" , private_token = Sys.getenv("GITLAB_COM_TOKEN") ) +response <- data.frame() +# on charge les projets du groupe CSD +buildres <- function(i){ + res_proj100 <- GET( + "https://gitlab.com/api/v4/groups/6567080/projects", + add_headers(`PRIVATE-TOKEN` = Sys.getenv("GITLAB_COM_TOKEN")), + query = list( + include_subgroups = "true", + per_page = 100, + page = i + ), + timeout(120) # Timeout augmenté pour les projets + ) + res <- content(res_proj100, as = "text", encoding = "UTF-8") %>% fromJSON() + return(res) +} +#on réalise une boucle si plus de 100 projets +x <- 1 +repeat { + res <- buildres(x) + if (length(res) == 0) { + break + } + response <- bind_rows(response, res) + x <- x + 1 +} project_ids <- c(41600697, 19859695, 49118792, 21138017, 44145525, 18441361, 17610613,18439010) get_project_name <- function(base_url, project_id, private_token) { @@ -103,10 +129,7 @@ get_all_pages <- function(url, private_token, max_retries = 3, max_pages = 159) -# Fonction pour extraire le nom du projet à partir de l'URL -extract_project_name <- function(web_url) { - str_extract(web_url, "(?<=/)[^/]+(?=/-/)") -} + # Fonction pour récupérer les issues d'un projet GitLab get_gitlab_issues <- function(base_url, project_id, private_token) { @@ -114,10 +137,10 @@ get_gitlab_issues <- function(base_url, project_id, private_token) { data <- get_all_pages(api_url, private_token) data <- data %>% mutate( - project_name = extract_project_name(web_url), + project_name = as.character(project_id), type = "issue", message = title, - author = author.username + author = paste0(author.username,"@developpement-durable.gouv.fr") ) return(data %>% select(project_name, type, message, updated_at, author)) } @@ -148,11 +171,9 @@ get_gitlab_events <- function(base_url, project_id, private_token) { ), updated_at = created_at, - author = author.username - ) %>% - left_join(name_gitlab, by = c("project_name" = "id")) %>% - mutate(project_name = name) %>% - select(-name) + author = paste0(author.username,"@developpement-durable.gouv.fr") + ) + return(data %>% select(project_name, type, message, updated_at, author)) } @@ -171,30 +192,6 @@ get_data_from_multiple_projects <- function(base_url, project_ids, private_token return(bind_rows(all_data)) } -# Topics -get_project_topics <- function(base_url, project_id, private_token) { - api_url <- paste0(base_url, "/api/v4/projects/", project_id) - response <- GET(api_url, add_headers("PRIVATE-TOKEN" = private_token)) - - if (status_code(response) == 200) { - project_info <- fromJSON(content(response, "text", encoding = "UTF-8"), flatten = TRUE) - return(data.frame( - id = as.character(project_info$name), - topics = paste(project_info$topics, collapse = ", "), # Combine les topics en une chaîne de caractères - stringsAsFactors = FALSE - )) - } else { - warning("Impossible de récupérer les topics pour le projet ID: ", project_id, " - Code: ", status_code(response)) - return(data.frame( - id = project_id, - topics = NA, - stringsAsFactors = FALSE - )) - } -} -extract_before_at <- function(email) { - sub("@.*", "", email) -} ############################################################### ----- # Fonction générique process_projects <- function(project_ids) { @@ -225,16 +222,12 @@ process_projects <- function(project_ids) { final_result <- process_projects(project_ids) final_result$project_id <- as.character(final_result$project_id) -final_result <- final_result %>% left_join(name_gitlab, by = c("project_id" = "id")) %>% - mutate(project_name = name, type = "commit") %>% - select(-name) -final_result <- final_result %>% mutate(author = extract_before_at(committer_email)) -final_result <- final_result %>% rename( "updated_at" = 'committed_date' ) + +final_result <- final_result %>% mutate(author = committer_email, type = "commit") +final_result <- final_result %>% rename( "updated_at" = 'committed_date',"project_name" = 'project_id' ) fg <- final_result %>% filter(is_duplicate == FALSE) %>% select(project_name,type , message ,updated_at,author) ############################################################### -# Récupérer les topics pour tous les projets -projects_topics <- bind_rows(lapply(project_ids, get_project_topics, base_url = base_url, private_token = private_token)) # Récupérer les données pour tous les projets all_data_gitlab <- get_data_from_multiple_projects(base_url, project_ids, private_token) @@ -242,9 +235,21 @@ all_data_gitlab <- all_data_gitlab %>% filter(!(is.na(all_data_gitlab$type) | is.na(all_data_gitlab$message)) | all_data_gitlab$type == "joined" | all_data_gitlab$type == "WikiPage::Meta") all_data_gitlab <- bind_rows(fg,all_data_gitlab) all_data_gitlab <- all_data_gitlab %>% - mutate(origine ="Gitlab", - groupe = "RDES_DREAL") -all_data_gitlab <- left_join(all_data_gitlab, projects_topics, by = c("project_name" = "id")) + mutate(origine ="Gitlab") + +#data.frame avec les groupes et le nom pour left join----- +response_df <- data.frame( + id = as.character(response$id), + name = response$name, + groupe = response$namespace$name, + topics = sapply(response$topics, function(x) paste(x, collapse = ", ")) # on degroupe et regroupe ?? mais ca fonctionne +) +# Joindre les deux jeux de données +all_data_gitlab <- all_data_gitlab %>% + left_join(response_df, by = c("project_name" = "id")) # Faire un premier left_join() sur la colonne name +all_data_gitlab$author <- tolower(all_data_gitlab$author) +# sauvegarde du jeu de donnée----- +all_data_gitlab <- all_data_gitlab %>% select(name , type, message , updated_at , author , topics, origine,groupe) # save.image("gitlab.RData") save(all_data_gitlab, file = "gitlab.RData") -- GitLab