From 0266e07ff454b2b37150094165168eaaa16d2aa9 Mon Sep 17 00:00:00 2001 From: LE DURAND Matteo <matteo.le-durand@developpement-durable.gouv.fr> Date: Thu, 13 Mar 2025 10:13:29 +0100 Subject: [PATCH] =?UTF-8?q?changement=20de=20r=C3=A9cup=C3=A9ration,=20mai?= =?UTF-8?q?ntenant=20on=20gader=20que=20les=20id=20en=20nombre=20et=20ensu?= =?UTF-8?q?ite=20on=20join=20grace=20=C3=A0=20response=20qui=20=C3=A0=20be?= =?UTF-8?q?aucoup=20d'info=20comme=20le=20nom,=20le=20groupe=20et=20le=20t?= =?UTF-8?q?opics?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dev/gitlab_forge.R | 59 ++++++++++------------------------------------ 1 file changed, 13 insertions(+), 46 deletions(-) diff --git a/dev/gitlab_forge.R b/dev/gitlab_forge.R index 3ac686a..bafacb2 100644 --- a/dev/gitlab_forge.R +++ b/dev/gitlab_forge.R @@ -155,7 +155,7 @@ get_gitlab_issues <- function(base_url, project_id, private_token) { data <- get_all_pages(api_url, private_token) data <- data %>% mutate( - project_name = extract_project_name(web_url), + project_name = as.character(project_id), type = "issue", message = as.character(title), author = author.username @@ -191,7 +191,7 @@ get_gitlab_events <- function(base_url, project_id, private_token) { updated_at = created_at, author = author.username )%>% - left_join(name_gitlab, by = c("project_name" = "id")) %>% + # left_join(name_gitlab, by = c("project_name" = "id")) %>% mutate(project_name = name) %>% select(-name) return(data %>% select(project_name, type, message, updated_at, author)) @@ -212,29 +212,6 @@ get_data_from_multiple_projects <- function(base_url, project_ids, private_token return(bind_rows(all_data)) } -get_project_topics <- function(base_url, project_id, private_token) { - api_url <- paste0(base_url, "/api/v4/projects/", project_id) - response <- GET(api_url, add_headers("PRIVATE-TOKEN" = private_token)) - - if (status_code(response) == 200) { - project_info <- fromJSON(content(response, "text", encoding = "UTF-8"), flatten = TRUE) - return(data.frame( - id = as.character(project_info$name), - topics = paste(project_info$topics, collapse = ", "), # Combine les topics en une chaîne de caractères - stringsAsFactors = FALSE - )) - } else { - warning("Impossible de récupérer les topics pour le projet ID: ", project_id, " - Code: ", status_code(response)) - return(data.frame( - id = project_id, - topics = NA, - stringsAsFactors = FALSE - )) - } -} -extract_before_at <- function(email) { - sub("@.*", "", email) -} ########################################################### process_projects <- function(project_ids) { result_list <- lapply(project_ids, function(project_id) { @@ -265,17 +242,14 @@ process_projects <- function(project_ids) { final_result <- process_projects(project_ids) final_result$project_id <- as.character(final_result$project_id) -final_result <- final_result %>% left_join(name_gitlab, by = c("project_id" = "id")) %>% - mutate(project_name = name, type = "commit") %>% - select(-name) -final_result <- final_result %>% mutate(author = extract_before_at(committer_email)) -final_result <- final_result %>% rename( "updated_at" = 'committed_date' ) +# final_result <- final_result %>% left_join(name_gitlab, by = c("project_id" = "id")) %>% +# mutate(project_name = name, type = "commit") %>% +# select(-name) +final_result <- final_result %>% mutate(author = committer_email,type = "commit") +final_result <- final_result %>% rename( "updated_at" = 'committed_date', "project_name" = 'project_id' ) ff <- final_result %>% filter(is_duplicate == FALSE) %>% select(project_name,type , message ,updated_at,author) ########################################################### -# Récupérer les topics pour tous les projets -projects_topics <- bind_rows(lapply(project_ids, get_project_topics, base_url = base_url, private_token = private_token)) - # Récupérer les données pour tous les projets all_data_forge <- get_data_from_multiple_projects(base_url, project_ids, private_token) @@ -284,26 +258,19 @@ all_data_forge <- all_data_forge %>% all_data_forge <- bind_rows(ff,all_data_forge) all_data_forge <- all_data_forge %>% mutate(origine ="Gitlab_Forge") -all_data_forge <- left_join(all_data_forge, projects_topics, by = c("project_name" = "id")) #data.frame avec les groupes et le nom pour left join----- response_df <- data.frame( + id = as.character(response$id), name = response$name, - name2 = response$path, #name2 car si le projet à changé de nom alors la jointure ne se fait plus par le nom mais par le passage d'origine ce qui corrige grandement le manque d'information - groupe = response$namespace$path + groupe = response$namespace$name, + topics = sapply(response$topics, function(x) paste(x, collapse = ", ")) # on degroupe et regroupe ?? mais ca fonctionne ) # Joindre les deux jeux de données all_data_forge <- all_data_forge %>% - left_join(response_df, by = c("project_name" = "name")) # Faire un premier left_join() sur la colonne name -all_data_forge <- all_data_forge %>% # Compléter les lignes non appariées avec un second left_join() sur la colonne name2 - left_join( - response_df %>% - select(name2, groupe), # On ne conserve que les colonnes utiles - by = c("project_name" = "name2"), - na_matches = "never" # Empêche l'association des NA - ) %>% - mutate(groupe = coalesce(groupe.x, groupe.y)) %>% # Priorité au 1er join - select(-groupe.x, -groupe.y) # Nettoyage des colonnes temporaires + left_join(response_df, by = c("project_name" = "id")) # Faire un premier left_join() sur la colonne name + # sauvegarde du jeu de donnée----- +all_data_forge <- all_data_forge %>% select(name , type, message , updated_at , author , topics, origine,groupe) save(all_data_forge, file = "gitlab_forge.RData") -- GitLab