From 1533b7d8e873943f0e34d14c9a3743c3badb327d Mon Sep 17 00:00:00 2001 From: LE DURAND Matteo <matteo.le-durand@developpement-durable.gouv.fr> Date: Thu, 27 Mar 2025 14:37:13 +0100 Subject: [PATCH] =?UTF-8?q?ajout=20de=20l'annuaire=20au=20donn=C3=A9e=20+?= =?UTF-8?q?=20prefix=20des=20fonctions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- global.R | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/global.R b/global.R index 0e42336..dd1e76c 100644 --- a/global.R +++ b/global.R @@ -27,23 +27,23 @@ load("github.RData") load("gitlab_forge.RData") load("gitlab.RData") load("date_MAJ.RData") +load("path_annuaire.RData") - -annuaire <- read_csv(path_annuaire,col_types = "c") +annuaire <- readr::read_csv(path_annuaire,col_types = "c") annuaire$Messagerie <- tolower(annuaire$Messagerie) -annuaire <- annuaire %>% rename( mail = Messagerie) -annuaire <- annuaire %>% filter( str_detect(Unité,"CSD" ) ) -annuaire <- annuaire %>% mutate(auteur = paste(Prénom, Nom), - mail_clean = str_remove(mail, "@.*") )%>% select(mail_clean, auteur) +annuaire <- annuaire %>% dplyr::rename( mail = Messagerie) +annuaire <- annuaire %>% dplyr::filter( stringr::str_detect(Unité,"CSD" ) ) +annuaire <- annuaire %>% dplyr::mutate(auteur = paste(Prénom, Nom), + mail_clean = stringr::str_remove(mail, "@.*") )%>% dplyr::select(mail_clean, auteur) -all_data <- bind_rows(all_data_gitlab, all_data_forge, combined_data) +all_data <- dplyr::bind_rows(all_data_gitlab, all_data_forge, combined_data) all_data$updated_at <- as.POSIXct(all_data$updated_at, format = ("%Y-%m-%dT%H:%M:%S")) -all_data <- all_data %>% mutate(email_clean = str_remove(author, "@.*")) -all_data <- all_data %>% stringdist_left_join(annuaire,by = c("email_clean" = "mail_clean"), method = "jw", max_dist = 0.265) # ajout de auteur des membre du SCTE via l'annuaire, 75% des auteurs sont du csd +all_data <- all_data %>% dplyr::mutate(email_clean = stringr::str_remove(author, "@.*")) +all_data <- all_data %>% fuzzyjoin::stringdist_left_join(annuaire,by = c("email_clean" = "mail_clean"), method = "jw", max_dist = 0.265) # ajout de auteur des membre du SCTE via l'annuaire, 75% des auteurs sont du csd all_data <- all_data %>% dplyr::mutate(etiquette = paste(groupe,topics,sep = ",")) -all_data <- all_data %>% mutate(message = paste(type,sep = " : ",message,auteur), - auteur = replace_na(auteur,"autre")) +all_data <- all_data %>% dplyr::mutate(message = paste(type,sep = " : ",message,auteur), + auteur = tidyr::replace_na(auteur,"autre")) # transformation re_code ---- traduction <- c( @@ -64,4 +64,4 @@ all_data <- all_data %>% dplyr::rename(project_name = name) #transformation message all_data$message <- all_data$message %>% tolower() -min_all_data <- all_data %>% filter(str_detect(message,'commit : initial commit')) +min_all_data <- all_data %>% dplyr::filter(stringr::str_detect(message,'commit : initial commit')) -- GitLab