Skip to content
Snippets Groups Projects
Commit 0266e07f authored by LE DURAND Matteo's avatar LE DURAND Matteo
Browse files

changement de récupération, maintenant on gader que les id en nombre et...

changement de récupération, maintenant on gader que les id en nombre et ensuite on join grace à response qui à beaucoup d'info comme le nom, le groupe et le topics
parent 0613c724
No related branches found
No related tags found
2 merge requests!28Ajout de l'onglet indicateur et plus,!18MAJ de main de la branche dev
......@@ -155,7 +155,7 @@ get_gitlab_issues <- function(base_url, project_id, private_token) {
data <- get_all_pages(api_url, private_token)
data <- data %>%
mutate(
project_name = extract_project_name(web_url),
project_name = as.character(project_id),
type = "issue",
message = as.character(title),
author = author.username
......@@ -191,7 +191,7 @@ get_gitlab_events <- function(base_url, project_id, private_token) {
updated_at = created_at,
author = author.username
)%>%
left_join(name_gitlab, by = c("project_name" = "id")) %>%
# left_join(name_gitlab, by = c("project_name" = "id")) %>%
mutate(project_name = name) %>%
select(-name)
return(data %>% select(project_name, type, message, updated_at, author))
......@@ -212,29 +212,6 @@ get_data_from_multiple_projects <- function(base_url, project_ids, private_token
return(bind_rows(all_data))
}
get_project_topics <- function(base_url, project_id, private_token) {
api_url <- paste0(base_url, "/api/v4/projects/", project_id)
response <- GET(api_url, add_headers("PRIVATE-TOKEN" = private_token))
if (status_code(response) == 200) {
project_info <- fromJSON(content(response, "text", encoding = "UTF-8"), flatten = TRUE)
return(data.frame(
id = as.character(project_info$name),
topics = paste(project_info$topics, collapse = ", "), # Combine les topics en une chaîne de caractères
stringsAsFactors = FALSE
))
} else {
warning("Impossible de récupérer les topics pour le projet ID: ", project_id, " - Code: ", status_code(response))
return(data.frame(
id = project_id,
topics = NA,
stringsAsFactors = FALSE
))
}
}
extract_before_at <- function(email) {
sub("@.*", "", email)
}
###########################################################
process_projects <- function(project_ids) {
result_list <- lapply(project_ids, function(project_id) {
......@@ -265,17 +242,14 @@ process_projects <- function(project_ids) {
final_result <- process_projects(project_ids)
final_result$project_id <- as.character(final_result$project_id)
final_result <- final_result %>% left_join(name_gitlab, by = c("project_id" = "id")) %>%
mutate(project_name = name, type = "commit") %>%
select(-name)
final_result <- final_result %>% mutate(author = extract_before_at(committer_email))
final_result <- final_result %>% rename( "updated_at" = 'committed_date' )
# final_result <- final_result %>% left_join(name_gitlab, by = c("project_id" = "id")) %>%
# mutate(project_name = name, type = "commit") %>%
# select(-name)
final_result <- final_result %>% mutate(author = committer_email,type = "commit")
final_result <- final_result %>% rename( "updated_at" = 'committed_date', "project_name" = 'project_id' )
ff <- final_result %>% filter(is_duplicate == FALSE) %>% select(project_name,type , message ,updated_at,author)
###########################################################
# Récupérer les topics pour tous les projets
projects_topics <- bind_rows(lapply(project_ids, get_project_topics, base_url = base_url, private_token = private_token))
# Récupérer les données pour tous les projets
all_data_forge <- get_data_from_multiple_projects(base_url, project_ids, private_token)
......@@ -284,26 +258,19 @@ all_data_forge <- all_data_forge %>%
all_data_forge <- bind_rows(ff,all_data_forge)
all_data_forge <- all_data_forge %>%
mutate(origine ="Gitlab_Forge")
all_data_forge <- left_join(all_data_forge, projects_topics, by = c("project_name" = "id"))
#data.frame avec les groupes et le nom pour left join-----
response_df <- data.frame(
id = as.character(response$id),
name = response$name,
name2 = response$path, #name2 car si le projet à changé de nom alors la jointure ne se fait plus par le nom mais par le passage d'origine ce qui corrige grandement le manque d'information
groupe = response$namespace$path
groupe = response$namespace$name,
topics = sapply(response$topics, function(x) paste(x, collapse = ", ")) # on degroupe et regroupe ?? mais ca fonctionne
)
# Joindre les deux jeux de données
all_data_forge <- all_data_forge %>%
left_join(response_df, by = c("project_name" = "name")) # Faire un premier left_join() sur la colonne name
all_data_forge <- all_data_forge %>% # Compléter les lignes non appariées avec un second left_join() sur la colonne name2
left_join(
response_df %>%
select(name2, groupe), # On ne conserve que les colonnes utiles
by = c("project_name" = "name2"),
na_matches = "never" # Empêche l'association des NA
) %>%
mutate(groupe = coalesce(groupe.x, groupe.y)) %>% # Priorité au 1er join
select(-groupe.x, -groupe.y) # Nettoyage des colonnes temporaires
left_join(response_df, by = c("project_name" = "id")) # Faire un premier left_join() sur la colonne name
# sauvegarde du jeu de donnée-----
all_data_forge <- all_data_forge %>% select(name , type, message , updated_at , author , topics, origine,groupe)
save(all_data_forge, file = "gitlab_forge.RData")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment