diff --git a/global.R b/global.R index 0e42336652986fb3d1617b92a0ee0a06fd4342d5..dd1e76cc4aa1996277b89bc77e88854d16e27048 100644 --- a/global.R +++ b/global.R @@ -27,23 +27,23 @@ load("github.RData") load("gitlab_forge.RData") load("gitlab.RData") load("date_MAJ.RData") +load("path_annuaire.RData") - -annuaire <- read_csv(path_annuaire,col_types = "c") +annuaire <- readr::read_csv(path_annuaire,col_types = "c") annuaire$Messagerie <- tolower(annuaire$Messagerie) -annuaire <- annuaire %>% rename( mail = Messagerie) -annuaire <- annuaire %>% filter( str_detect(Unité,"CSD" ) ) -annuaire <- annuaire %>% mutate(auteur = paste(Prénom, Nom), - mail_clean = str_remove(mail, "@.*") )%>% select(mail_clean, auteur) +annuaire <- annuaire %>% dplyr::rename( mail = Messagerie) +annuaire <- annuaire %>% dplyr::filter( stringr::str_detect(Unité,"CSD" ) ) +annuaire <- annuaire %>% dplyr::mutate(auteur = paste(Prénom, Nom), + mail_clean = stringr::str_remove(mail, "@.*") )%>% dplyr::select(mail_clean, auteur) -all_data <- bind_rows(all_data_gitlab, all_data_forge, combined_data) +all_data <- dplyr::bind_rows(all_data_gitlab, all_data_forge, combined_data) all_data$updated_at <- as.POSIXct(all_data$updated_at, format = ("%Y-%m-%dT%H:%M:%S")) -all_data <- all_data %>% mutate(email_clean = str_remove(author, "@.*")) -all_data <- all_data %>% stringdist_left_join(annuaire,by = c("email_clean" = "mail_clean"), method = "jw", max_dist = 0.265) # ajout de auteur des membre du SCTE via l'annuaire, 75% des auteurs sont du csd +all_data <- all_data %>% dplyr::mutate(email_clean = stringr::str_remove(author, "@.*")) +all_data <- all_data %>% fuzzyjoin::stringdist_left_join(annuaire,by = c("email_clean" = "mail_clean"), method = "jw", max_dist = 0.265) # ajout de auteur des membre du SCTE via l'annuaire, 75% des auteurs sont du csd all_data <- all_data %>% dplyr::mutate(etiquette = paste(groupe,topics,sep = ",")) -all_data <- all_data %>% mutate(message = paste(type,sep = " : ",message,auteur), - auteur = replace_na(auteur,"autre")) +all_data <- all_data %>% dplyr::mutate(message = paste(type,sep = " : ",message,auteur), + auteur = tidyr::replace_na(auteur,"autre")) # transformation re_code ---- traduction <- c( @@ -64,4 +64,4 @@ all_data <- all_data %>% dplyr::rename(project_name = name) #transformation message all_data$message <- all_data$message %>% tolower() -min_all_data <- all_data %>% filter(str_detect(message,'commit : initial commit')) +min_all_data <- all_data %>% dplyr::filter(stringr::str_detect(message,'commit : initial commit'))