From 0841907de0f71124538a304ecdd4e81dd0fb5ee7 Mon Sep 17 00:00:00 2001
From: Geoffrey Arthaud <geoffrey.arthaud@developpement-durable.gouv.fr>
Date: Wed, 21 Sep 2022 14:41:30 +0200
Subject: [PATCH] Add analysis of container registry (#8)

---
 i18n/en-GB/gitlab_project_doctor.ftl |   5 +-
 i18n/fr-FR/gitlab_project_doctor.ftl |   5 +-
 src/api.rs                           |   1 +
 src/api/registry.rs                  |  14 +++
 src/api/registry/repositories.rs     |  53 ++++++++
 src/api/registry/tag.rs              |  42 +++++++
 src/diagnosis.rs                     |   6 +-
 src/diagnosis/conf_analysis.rs       |   7 +-
 src/diagnosis/container_analysis.rs  | 182 +++++++++++++++++++++++++++
 src/diagnosis/gitlab_connection.rs   |   2 +-
 src/main.rs                          |   8 ++
 11 files changed, 320 insertions(+), 5 deletions(-)
 create mode 100644 src/api/registry.rs
 create mode 100644 src/api/registry/repositories.rs
 create mode 100644 src/api/registry/tag.rs
 create mode 100644 src/diagnosis/container_analysis.rs

diff --git a/i18n/en-GB/gitlab_project_doctor.ftl b/i18n/en-GB/gitlab_project_doctor.ftl
index 31d77eb..bac24ce 100644
--- a/i18n/en-GB/gitlab_project_doctor.ftl
+++ b/i18n/en-GB/gitlab_project_doctor.ftl
@@ -36,4 +36,7 @@ conf-analysing = Analysis of package configuration
 duplicate-assets-option-onepackage = The number of duplicate assets to keep is 1
 duplicate-assets-option-warn = The number of duplicate assets to keep is NOT 1
 duplicate-assets-option-error = Cannot get the number of duplicate assets to keep option
-conf-fix = Fix this : {$url}
\ No newline at end of file
+conf-fix = Fix this : {$url}
+container-analysing = Analysis of container registry
+container-report = {$image_count} images in container registry. {$old_image_count} are older than {$nb_days} days
+container-summary = Container registry size: {$registry_size}
\ No newline at end of file
diff --git a/i18n/fr-FR/gitlab_project_doctor.ftl b/i18n/fr-FR/gitlab_project_doctor.ftl
index 12c13cd..98d4720 100644
--- a/i18n/fr-FR/gitlab_project_doctor.ftl
+++ b/i18n/fr-FR/gitlab_project_doctor.ftl
@@ -36,4 +36,7 @@ conf-analysing = Analyse de la configuration des packages
 duplicate-assets-option-onepackage = L'option "The number of duplicate assets to keep" vaut 1
 duplicate-assets-option-warn = L'option "The number of duplicate assets to keep" ne vaut PAS 1
 duplicate-assets-option-error = Cannot get the number of duplicate assets to keep option
-conf-fix = Pour corriger : {$url}
\ No newline at end of file
+conf-fix = Pour corriger : {$url}
+container-analysing = Analyse du container registry
+container-report = {$image_count} images dans le container registry. {$old_image_count} datent de plus de {$nb_days} jours
+container-summary = Taille du container registry : {$registry_size}
\ No newline at end of file
diff --git a/src/api.rs b/src/api.rs
index 3265f50..c02a403 100644
--- a/src/api.rs
+++ b/src/api.rs
@@ -1,3 +1,4 @@
 // Add specific endpoint for gitlab client, not covered by the crate gitlab 0.15.x
 
 pub mod packages;
+pub mod registry;
diff --git a/src/api/registry.rs b/src/api/registry.rs
new file mode 100644
index 0000000..c9dcfba
--- /dev/null
+++ b/src/api/registry.rs
@@ -0,0 +1,14 @@
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Project packages API endpoints.
+//!
+//! These endpoints are used for querying Gitlab container registry.
+pub use self::repositories::Repositories;
+pub use self::tag::Tag;
+
+mod repositories;
+mod tag;
diff --git a/src/api/registry/repositories.rs b/src/api/registry/repositories.rs
new file mode 100644
index 0000000..fddc8dd
--- /dev/null
+++ b/src/api/registry/repositories.rs
@@ -0,0 +1,53 @@
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use derive_builder::Builder;
+use gitlab::api::common::NameOrId;
+use gitlab::api::endpoint_prelude::*;
+
+/// Query for registry repositories within a project.
+#[derive(Debug, Builder)]
+#[builder(setter(strip_option))]
+pub struct Repositories<'a> {
+    /// The project to query for repositories.
+    #[builder(setter(into))]
+    project: NameOrId<'a>,
+    /// Includes an array of tags in the response.
+    #[builder(default)]
+    tags: Option<bool>,
+    /// Includes the tags count in the response
+    #[builder(default)]
+    tags_count: Option<bool>,
+}
+
+impl<'a> Repositories<'a> {
+    /// Create a builder for the endpoint.
+    pub fn builder() -> RepositoriesBuilder<'a> {
+        RepositoriesBuilder::default()
+    }
+}
+
+impl<'a> Endpoint for Repositories<'a> {
+    fn method(&self) -> Method {
+        Method::GET
+    }
+
+    fn endpoint(&self) -> Cow<'static, str> {
+        format!("projects/{}/registry/repositories", self.project).into()
+    }
+
+    fn parameters(&self) -> QueryParams {
+        let mut params = QueryParams::default();
+
+        params
+            .push_opt("tags", self.tags)
+            .push_opt("tags_count", self.tags_count);
+
+        params
+    }
+}
+
+impl<'a> Pageable for Repositories<'a> {}
diff --git a/src/api/registry/tag.rs b/src/api/registry/tag.rs
new file mode 100644
index 0000000..59aeaa1
--- /dev/null
+++ b/src/api/registry/tag.rs
@@ -0,0 +1,42 @@
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use derive_builder::Builder;
+use gitlab::api::common::NameOrId;
+use gitlab::api::endpoint_prelude::*;
+
+/// Query a single tag on a repository from the container registry.
+#[derive(Debug, Builder)]
+pub struct Tag<'a> {
+    /// The project to query for package.
+    #[builder(setter(into))]
+    project: NameOrId<'a>,
+    /// The ID of the repository.
+    repository: u64,
+    /// The name of the tag
+    tag_name: String,
+}
+
+impl<'a> Tag<'a> {
+    /// Create a builder for the endpoint.
+    pub fn builder() -> TagBuilder<'a> {
+        TagBuilder::default()
+    }
+}
+
+impl<'a> Endpoint for Tag<'a> {
+    fn method(&self) -> Method {
+        Method::GET
+    }
+
+    fn endpoint(&self) -> Cow<'static, str> {
+        format!(
+            "projects/{}/registry/repositories/{}/tags/{}",
+            self.project, self.repository, self.tag_name
+        )
+        .into()
+    }
+}
diff --git a/src/diagnosis.rs b/src/diagnosis.rs
index 1f3d88d..10b93ad 100644
--- a/src/diagnosis.rs
+++ b/src/diagnosis.rs
@@ -1,6 +1,7 @@
 use std::sync::mpsc::Receiver;
 use std::thread::JoinHandle;
 pub mod conf_analysis;
+pub mod container_analysis;
 pub mod gitlab_connection;
 pub mod job_analysis;
 pub mod package_analysis;
@@ -13,7 +14,10 @@ pub const REPO_LIMIT: u64 = 100_000_000;
 pub const ARTIFACT_JOBS_LIMIT: u64 = 500_000_000;
 pub const ARTIFACT_JOBS_NB_LIMIT: usize = 1_000;
 pub const PACKAGE_REGISTRY_LIMIT: u64 = 1_000_000_000;
-pub const DOCKER_REGISTRY_LIMIT: u64 = 5_000_000_000;
+pub const CONTAINER_REGISTRY_LIMIT: u64 = 5_000_000_000;
+pub const CONTAINER_NB_TAGS_LIMIT: u64 = 7;
+pub const CONTAINER_NB_IMAGES_LIMIT: u64 = 20;
+pub const CONTAINER_DAYS_LIMIT: u64 = 90;
 
 pub const GITLAB_403_ERROR: &str = "403 Forbidden";
 pub const GITLAB_SCOPE_ERROR: &str = "insufficient_scope";
diff --git a/src/diagnosis/conf_analysis.rs b/src/diagnosis/conf_analysis.rs
index b59377b..f5b9034 100644
--- a/src/diagnosis/conf_analysis.rs
+++ b/src/diagnosis/conf_analysis.rs
@@ -71,7 +71,12 @@ impl ConfAnalysisJob {
 
     fn _report_container_policy(&self) -> ReportStatus {
         if !self.project.container_registry_enabled
-            || self.project.container_expiration_policy.enabled
+            || self
+                .project
+                .container_expiration_policy
+                .as_ref()
+                .map(|c| c.enabled)
+                .unwrap_or(false)
         {
             ReportStatus::OK(fl!("container-policy-enabled"))
         } else {
diff --git a/src/diagnosis/container_analysis.rs b/src/diagnosis/container_analysis.rs
new file mode 100644
index 0000000..13f14a8
--- /dev/null
+++ b/src/diagnosis/container_analysis.rs
@@ -0,0 +1,182 @@
+use chrono::{DateTime, Duration, Local};
+use gitlab::api::{Pagination, Query};
+use gitlab::Gitlab;
+use human_bytes::human_bytes;
+use serde::Deserialize;
+
+use crate::diagnosis::gitlab_connection::{GitlabRepository, Project};
+use crate::diagnosis::{warning_if, CONTAINER_REGISTRY_LIMIT};
+use crate::{api, fl, ReportJob, ReportPending, ReportStatus, Reportable};
+
+#[derive(Debug, Deserialize)]
+pub struct GitlabRawContainerRepository {
+    pub id: u64,
+    pub created_at: DateTime<Local>,
+    pub tags: Vec<GitlabContainerTagSummary>,
+}
+
+#[derive(Debug, Deserialize)]
+pub struct GitlabContainerRepository {
+    pub id: u64,
+    pub created_at: DateTime<Local>,
+    pub tags: Vec<GitlabContainerTag>,
+}
+
+#[derive(Debug, Deserialize)]
+pub struct GitlabContainerTagSummary {
+    pub name: String,
+}
+
+#[derive(Debug, Deserialize)]
+pub struct GitlabContainerTag {
+    pub name: String,
+    pub created_at: DateTime<Local>,
+    pub total_size: u64,
+}
+
+pub struct ContainerAnalysisJob {
+    pub gitlab: Gitlab,
+    pub project: Project,
+    pub days: usize,
+}
+
+pub struct ContainerAnalysisReport {
+    pub gitlab: Gitlab,
+    pub project: Project,
+    pub containers: Vec<GitlabContainerRepository>,
+    pub report_status: Vec<ReportStatus>,
+}
+
+impl Reportable for ContainerAnalysisReport {
+    fn report(&self) -> Vec<ReportStatus> {
+        self.report_status.clone()
+    }
+}
+
+impl ContainerAnalysisJob {
+    fn to_report(
+        self,
+        report_status: Vec<ReportStatus>,
+        containers: Vec<GitlabContainerRepository>,
+    ) -> ContainerAnalysisReport {
+        ContainerAnalysisReport {
+            gitlab: self.gitlab,
+            project: self.project,
+            containers,
+            report_status,
+        }
+    }
+
+    fn get_detailed_repo(
+        &self,
+        containers: &[GitlabRawContainerRepository],
+    ) -> Vec<GitlabContainerRepository> {
+        containers
+            .iter()
+            .map(|cr| GitlabContainerRepository {
+                id: cr.id,
+                created_at: cr.created_at,
+                tags: cr
+                    .tags
+                    .iter()
+                    .map(|t| self.get_detailed_tag(t, cr.id))
+                    .collect(),
+            })
+            .collect()
+    }
+
+    fn get_detailed_tag(
+        &self,
+        tag: &GitlabContainerTagSummary,
+        repo_id: u64,
+    ) -> GitlabContainerTag {
+        let endpoint = api::registry::Tag::builder()
+            .project(self.project.id)
+            .repository(repo_id)
+            .tag_name(tag.name.clone())
+            .build()
+            .unwrap();
+        endpoint.query(&self.gitlab).unwrap()
+    }
+}
+
+impl ReportJob for ContainerAnalysisJob {
+    type Diagnosis = ContainerAnalysisReport;
+
+    fn diagnose(self) -> ReportPending<Self::Diagnosis> {
+        ReportPending::<Self::Diagnosis> {
+            pending_msg: fl!("container-analysing"),
+            job: std::thread::spawn(move || {
+                if !self.project.jobs_enabled {
+                    return self.to_report(vec![ReportStatus::NA(fl!("no-cicd"))], vec![]);
+                }
+
+                let endpoint = api::registry::Repositories::builder()
+                    .project(self.project.id)
+                    .tags(true)
+                    .build()
+                    .unwrap();
+                let query: Result<Vec<GitlabRawContainerRepository>, _> =
+                    gitlab::api::paged(endpoint, Pagination::All).query(&self.gitlab);
+                match query {
+                    Err(e) => self.to_report(
+                        vec![ReportStatus::ERROR(format!(
+                            "{} {}",
+                            fl!("error"),
+                            e.to_string()
+                        ))],
+                        vec![],
+                    ),
+                    Ok(containers) => {
+                        let container_repos = self.get_detailed_repo(&containers);
+                        let days = self.days;
+                        let ref_date = Local::now() - Duration::days(days as i64);
+                        let image_count: usize =
+                            container_repos.iter().map(|cr| cr.tags.len()).sum();
+                        let registry_size: u64 = container_repos
+                            .iter()
+                            .map(|cr| {
+                                let res: u64 = cr.tags.iter().map(|t| t.total_size).sum();
+                                res
+                            })
+                            .sum();
+                        let old_image_count: usize = container_repos
+                            .iter()
+                            .map(|cr| cr.tags.iter().filter(|t| t.created_at < ref_date).count())
+                            .sum();
+                        self.to_report(
+                            vec![
+                                warning_if(
+                                    registry_size > CONTAINER_REGISTRY_LIMIT,
+                                    fl!(
+                                        "container-summary",
+                                        registry_size = human_bytes(registry_size as f64)
+                                    ),
+                                ),
+                                ReportStatus::NA(fl!(
+                                    "container-report",
+                                    image_count = image_count,
+                                    old_image_count = old_image_count,
+                                    nb_days = days
+                                )),
+                            ],
+                            container_repos,
+                        )
+                    }
+                }
+            }),
+            progress: None,
+            total: None,
+        }
+    }
+}
+
+impl ContainerAnalysisJob {
+    pub fn from(gitlab: &GitlabRepository, days: usize) -> ContainerAnalysisJob {
+        ContainerAnalysisJob {
+            gitlab: gitlab.gitlab.clone(),
+            project: gitlab.project.clone(),
+            days,
+        }
+    }
+}
diff --git a/src/diagnosis/gitlab_connection.rs b/src/diagnosis/gitlab_connection.rs
index c1fdbc7..06affba 100644
--- a/src/diagnosis/gitlab_connection.rs
+++ b/src/diagnosis/gitlab_connection.rs
@@ -36,7 +36,7 @@ pub struct Project {
     pub statistics: Statistics,
     pub jobs_enabled: bool,
     pub container_registry_enabled: bool,
-    pub container_expiration_policy: ContainerExpirationPolicy,
+    pub container_expiration_policy: Option<ContainerExpirationPolicy>,
     pub web_url: String,
     pub path_with_namespace: String,
 }
diff --git a/src/main.rs b/src/main.rs
index fdfb882..7c182ec 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -3,6 +3,7 @@ use structopt::StructOpt;
 use cli::Args;
 
 use crate::diagnosis::conf_analysis::{ConfAnalysisJob, ConfAnalysisReport};
+use crate::diagnosis::container_analysis::{ContainerAnalysisJob, ContainerAnalysisReport};
 use crate::diagnosis::gitlab_connection::{ConnectionJob, GitlabRepository, Statistics};
 use crate::diagnosis::job_analysis::{JobAnalysisJob, JobAnalysisReport};
 use crate::diagnosis::package_analysis::{PackageAnalysisJob, PackageAnalysisReport};
@@ -158,6 +159,12 @@ fn _analyze_configuration(connection_data: &GitlabRepository) -> ConfAnalysisRep
     let report_pending = ConfAnalysisJob::from(connection_data).diagnose();
     cli::display_report_pending(report_pending)
 }
+
+fn _analyze_registry(days: usize, connection_data: &GitlabRepository) -> ContainerAnalysisReport {
+    let report_pending = ContainerAnalysisJob::from(connection_data, days).diagnose();
+    cli::display_report_pending(report_pending)
+}
+
 fn _clean_packages(report: PackageAnalysisReport) {
     if !report.obsolete_files.is_empty() {
         return;
@@ -171,6 +178,7 @@ fn main() {
     eprintln!("Gitlab Project Doctor v{}", env!("CARGO_PKG_VERSION"));
     let connection_data = _connect_to_gitlab(&args);
     let _ = _analyze_configuration(&connection_data);
+    let _ = _analyze_registry(args.days, &connection_data);
     if args.analysis_mode {
         // Analysis mode
 
-- 
GitLab