Skip to content
Snippets Groups Projects
Verified Commit 230c5591 authored by Geoffrey Arthaud's avatar Geoffrey Arthaud
Browse files

Improve obsolete files detection for Maven type registry

parent 2dad17b5
No related branches found
No related tags found
No related merge requests found
...@@ -412,6 +412,7 @@ dependencies = [ ...@@ -412,6 +412,7 @@ dependencies = [
"gitlab", "gitlab",
"human_bytes", "human_bytes",
"indicatif", "indicatif",
"lazy_static",
"regex", "regex",
"serde", "serde",
"structopt", "structopt",
......
...@@ -14,6 +14,7 @@ git2 = "0.15.0" ...@@ -14,6 +14,7 @@ git2 = "0.15.0"
gitlab = "0.1502.0" gitlab = "0.1502.0"
human_bytes = "0.3.1" human_bytes = "0.3.1"
indicatif = "0.17.0" indicatif = "0.17.0"
lazy_static = "1.4.0"
regex = "1.6.0" regex = "1.6.0"
serde = "1.0.142" serde = "1.0.142"
structopt = "0.3.26" structopt = "0.3.26"
......
...@@ -2,6 +2,8 @@ use chrono::{DateTime, Local}; ...@@ -2,6 +2,8 @@ use chrono::{DateTime, Local};
use gitlab::api::{Pagination, Query}; use gitlab::api::{Pagination, Query};
use gitlab::Gitlab; use gitlab::Gitlab;
use human_bytes::human_bytes; use human_bytes::human_bytes;
use lazy_static::lazy_static;
use regex::Regex;
use serde::Deserialize; use serde::Deserialize;
use crate::{Reportable, ReportJob, ReportPending, ReportStatus}; use crate::{Reportable, ReportJob, ReportPending, ReportStatus};
...@@ -16,7 +18,7 @@ pub struct GitlabPackage { ...@@ -16,7 +18,7 @@ pub struct GitlabPackage {
pub created_at: DateTime<Local>, pub created_at: DateTime<Local>,
} }
#[derive(Debug, Deserialize)] #[derive(Debug, Deserialize, Clone)]
pub struct GitlabPackageFile { pub struct GitlabPackageFile {
pub id: u64, pub id: u64,
pub created_at: DateTime<Local>, pub created_at: DateTime<Local>,
...@@ -40,6 +42,7 @@ pub struct PackageAnalysisReport { ...@@ -40,6 +42,7 @@ pub struct PackageAnalysisReport {
pub project: Project, pub project: Project,
pub packages: Vec<PackageWithFile>, pub packages: Vec<PackageWithFile>,
pub report_status: Vec<ReportStatus>, pub report_status: Vec<ReportStatus>,
pub obsolete_files: Vec<GitlabPackageFile>,
pub savable_files: usize, pub savable_files: usize,
pub savable_bytes: u64, pub savable_bytes: u64,
} }
...@@ -57,6 +60,7 @@ impl PackageAnalysisJob { ...@@ -57,6 +60,7 @@ impl PackageAnalysisJob {
project: self.project, project: self.project,
packages: vec![], packages: vec![],
report_status: vec![status], report_status: vec![status],
obsolete_files: vec![],
savable_files: 0, savable_files: 0,
savable_bytes: 0, savable_bytes: 0,
} }
...@@ -91,6 +95,7 @@ impl ReportJob for PackageAnalysisJob { ...@@ -91,6 +95,7 @@ impl ReportJob for PackageAnalysisJob {
let mut savable_bytes = 0; let mut savable_bytes = 0;
let mut savable_files = 0; let mut savable_files = 0;
let mut packages_with_files = vec![]; let mut packages_with_files = vec![];
let mut obsolete_files = vec![];
for package in packages.into_iter() { for package in packages.into_iter() {
let endpoint = PackageFiles::builder() let endpoint = PackageFiles::builder()
.project(self.project.id) .project(self.project.id)
...@@ -101,14 +106,16 @@ impl ReportJob for PackageAnalysisJob { ...@@ -101,14 +106,16 @@ impl ReportJob for PackageAnalysisJob {
gitlab::api::paged(endpoint, Pagination::All) gitlab::api::paged(endpoint, Pagination::All)
.query(&self.gitlab) .query(&self.gitlab)
.unwrap(); .unwrap();
files.sort_by(|a, b| a.created_at.partial_cmp(&b.created_at).unwrap()); files.sort_by(|a, b| b.created_at.partial_cmp(&a.created_at).unwrap());
let obsolete_ids = detect_obsolete_files(&package, &files); let obsolete_ids = detect_obsolete_files(&package, &files);
savable_files += obsolete_ids.len(); savable_files += obsolete_ids.len();
obsolete_ids.iter().for_each(|i| obsolete_files.push(files[*i].clone()));
savable_bytes += obsolete_ids.into_iter() savable_bytes += obsolete_ids.into_iter()
.map(|i| files[i].size) .map(|i| files[i].size)
.sum::<u64>(); .sum::<u64>();
packages_with_files.push(PackageWithFile { packages_with_files.push(PackageWithFile {
package, package,
sorted_files: files, sorted_files: files,
...@@ -125,6 +132,7 @@ impl ReportJob for PackageAnalysisJob { ...@@ -125,6 +132,7 @@ impl ReportJob for PackageAnalysisJob {
packages: packages_with_files, packages: packages_with_files,
savable_files, savable_files,
savable_bytes, savable_bytes,
obsolete_files
} }
} }
} }
...@@ -146,12 +154,22 @@ impl PackageAnalysisJob { ...@@ -146,12 +154,22 @@ impl PackageAnalysisJob {
impl PackageAnalysisReport {} impl PackageAnalysisReport {}
fn detect_obsolete_files(_: &GitlabPackage, fn detect_obsolete_files(package: &GitlabPackage,
sorted_files: &[GitlabPackageFile]) -> Vec<usize> { sorted_files: &[GitlabPackageFile]) -> Vec<usize> {
let mut ids = vec![]; let mut ids = vec![];
let mut names : Vec<&str> = vec![]; let slash_idx = package.name.rfind('/').map(|i| i + 1).unwrap_or(0);
let package_name = &package.name[slash_idx..];
let mut names: Vec<&str> = vec![];
let mut pkg_files: Vec<String> = vec![];
for (idx, file) in sorted_files.iter().enumerate() { for (idx, file) in sorted_files.iter().enumerate() {
if names.iter().any(|e| **e == file.file_name) { if file.file_name.starts_with(package_name) && package.package_type == "maven" {
let generic_name = package_name.to_string() + _get_extension(&file.file_name);
if pkg_files.contains(&generic_name) {
ids.push(idx);
} else {
pkg_files.push(generic_name);
}
} else if names.iter().any(|e| **e == file.file_name) {
ids.push(idx); ids.push(idx);
} else { } else {
names.push(&file.file_name); names.push(&file.file_name);
...@@ -160,6 +178,13 @@ fn detect_obsolete_files(_: &GitlabPackage, ...@@ -160,6 +178,13 @@ fn detect_obsolete_files(_: &GitlabPackage,
ids ids
} }
fn _get_extension(file_name: &str) -> &str {
lazy_static! {
static ref RE_EXT: Regex = Regex::new(r"(\.[a-z]\w+)+$").unwrap();
}
RE_EXT.find(file_name).map(|m| m.as_str()).unwrap_or("")
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use chrono::Duration; use chrono::Duration;
...@@ -190,17 +215,16 @@ mod tests { ...@@ -190,17 +215,16 @@ mod tests {
created_at: Local::now() - Duration::days(6), created_at: Local::now() - Duration::days(6),
file_name: "zyx.txt".to_string(), file_name: "zyx.txt".to_string(),
size: 13, size: 13,
}, }, GitlabPackageFile {
GitlabPackageFile { id: 56,
id: 56, created_at: Local::now() - Duration::days(7),
created_at: Local::now() - Duration::days(7), file_name: "abc.txt".to_string(),
file_name: "abc.txt".to_string(), size: 13,
size: 13, }, GitlabPackageFile {
}, GitlabPackageFile { id: 50,
id: 50, created_at: Local::now() - Duration::days(8),
created_at: Local::now() - Duration::days(8), file_name: "zyx.txt".to_string(),
file_name: "zyx.txt".to_string(), size: 13,
size: 13,
}]; }];
// WHEN // WHEN
...@@ -209,4 +233,118 @@ mod tests { ...@@ -209,4 +233,118 @@ mod tests {
// THEN // THEN
assert_eq!(ids, vec![1, 3, 4]); assert_eq!(ids, vec![1, 3, 4]);
} }
#[test]
fn detect_obsolete_files_empty() {
// GIVEN
let package = GitlabPackage {
id: 42,
name: "my-generic".to_string(),
package_type: "generic".to_string(),
created_at: Local::now() - Duration::days(30),
};
let files = vec![];
// WHEN
let ids = detect_obsolete_files(&package, &files);
// THEN
assert_eq!(ids, Vec::<usize>::new());
}
#[test]
fn detect_maven_obsolete_files_nominal() {
// GIVEN
let package = GitlabPackage {
id: 42,
name: "my-app".to_string(),
package_type: "maven".to_string(),
created_at: Local::now() - Duration::days(30),
};
let files = vec![GitlabPackageFile {
id: 50,
created_at: Local::now() - Duration::days(4),
file_name: "my-app-1.5-20181107.152550-1.jar".to_string(),
size: 13,
}, GitlabPackageFile {
id: 54,
created_at: Local::now() - Duration::days(4),
file_name: "my-app-1.5-20181107.152550-1.pom".to_string(),
size: 13,
}, GitlabPackageFile {
id: 50,
created_at: Local::now() - Duration::days(4),
file_name: "maven-metadata.xml".to_string(),
size: 13,
}, GitlabPackageFile {
id: 50,
created_at: Local::now() - Duration::days(5),
file_name: "my-app-1.5-20181007.142550-1.jar".to_string(),
size: 13,
}, GitlabPackageFile {
id: 54,
created_at: Local::now() - Duration::days(5),
file_name: "my-app-1.5-20181007.142550-1.pom".to_string(),
size: 13,
}, GitlabPackageFile {
id: 50,
created_at: Local::now() - Duration::days(5),
file_name: "maven-metadata.xml".to_string(),
size: 13,
}, ];
// WHEN
let ids = detect_obsolete_files(&package, &files);
// THEN
assert_eq!(ids, vec![3, 4, 5]);
}
#[test]
fn detect_generic_obsolete_files_with_mvn_traps() {
// GIVEN
let package = GitlabPackage {
id: 42,
name: "my-app".to_string(),
package_type: "generic".to_string(),
created_at: Local::now() - Duration::days(30),
};
let files = vec![GitlabPackageFile {
id: 50,
created_at: Local::now() - Duration::days(4),
file_name: "my-app-1.5-20181107.152550-1.jar".to_string(),
size: 13,
}, GitlabPackageFile {
id: 54,
created_at: Local::now() - Duration::days(4),
file_name: "my-app-1.5-20181107.152550-1.pom".to_string(),
size: 13,
}, GitlabPackageFile {
id: 50,
created_at: Local::now() - Duration::days(4),
file_name: "maven-metadata.xml".to_string(),
size: 13,
}, GitlabPackageFile {
id: 50,
created_at: Local::now() - Duration::days(5),
file_name: "my-app-1.5-20181007.142550-1.jar".to_string(),
size: 13,
}, GitlabPackageFile {
id: 54,
created_at: Local::now() - Duration::days(5),
file_name: "my-app-1.5-20181007.142550-1.pom".to_string(),
size: 13,
}, GitlabPackageFile {
id: 50,
created_at: Local::now() - Duration::days(5),
file_name: "maven-metadata.xml".to_string(),
size: 13,
}, ];
// WHEN
let ids = detect_obsolete_files(&package, &files);
// THEN
assert_eq!(ids, vec![5]);
}
} }
\ No newline at end of file
...@@ -48,5 +48,6 @@ fn main() { ...@@ -48,5 +48,6 @@ fn main() {
// Analysis of packages // Analysis of packages
let report_pending = PackageAnalysisJob::from(&connection_data).diagnose(); let report_pending = PackageAnalysisJob::from(&connection_data).diagnose();
let _ = cli::display_report_pending(report_pending); let report = cli::display_report_pending(report_pending);
eprintln!("{:?}", report.obsolete_files);
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment