Skip to content
Snippets Groups Projects
Commit b7bc68fa authored by CANÉVET Cindy's avatar CANÉVET Cindy Committed by Martins, Laurent
Browse files

fix #390 : Analyse du sitemap

parent c9fc7a94
No related branches found
No related tags found
No related merge requests found
Showing
with 369 additions and 34 deletions
......@@ -2,6 +2,10 @@ package fr.numeco.analyser.service;
import fr.numeco.analyser.AnalyzerConfig;
import fr.numeco.misis.analyzer.service.AnalyserService;
import fr.numeco.misis.enums.AnalyseStatut;
import fr.numeco.misis.sitemap.service.SitemapService;
import fr.numeco.misis.suividesite.domain.Page;
import io.quarkus.logging.Log;
import io.quarkus.scheduler.Scheduled;
import static io.quarkus.scheduler.Scheduled.ConcurrentExecution.SKIP;
import io.quarkus.scheduler.ScheduledExecution;
......@@ -12,12 +16,35 @@ import lombok.RequiredArgsConstructor;
@RequiredArgsConstructor
public class AnalysisScheduler {
private final SitemapService sitemapService;
private final AnalyserService analyserService;
private final AnalyzerConfig analyzerConfig;
@Scheduled(identity = "AnalysisScheduler", cron = "{analyser.refresh.schedule.cron.expression}", concurrentExecution = SKIP, skipExecutionIf = AnalyserService.class)
void scheduleGroupesDePagesAnalysis(final ScheduledExecution execution) {
analyserService.process(0, analyzerConfig.paginationSize()-1);
sitemapService.process()
.subscribe().with(
this::log,
failure -> Log.error("Sitemap error", failure),
() -> {
analyserService.process(0, analyzerConfig.paginationSize()-1);
if(Log.isDebugEnabled()) {
Log.debug("Flux completed");
}
}
)
;
}
private void log(Page page) {
if(Log.isDebugEnabled()) {
if (page.getAnalyseStatut() == AnalyseStatut.NOUVEAU) {
Log.debugf("Page %s créée par le sitemap.", page.getUrl());
} else {
Log.debugf("Page %s mise à jour par le sitemap.", page.getId());
}
}
}
}
package fr.numeco.misis.analyzer.resource;
import org.eclipse.microprofile.openapi.annotations.Operation;
import org.eclipse.microprofile.openapi.annotations.responses.APIResponse;
import fr.numeco.misis.analyzer.service.AnalyserService;
import io.quarkus.panache.common.Page;
import jakarta.ws.rs.GET;
......@@ -9,8 +12,6 @@ import jakarta.ws.rs.QueryParam;
import jakarta.ws.rs.core.MediaType;
import jakarta.ws.rs.core.Response;
import lombok.RequiredArgsConstructor;
import org.eclipse.microprofile.openapi.annotations.Operation;
import org.eclipse.microprofile.openapi.annotations.responses.APIResponse;
@Path("api/analyser")
@RequiredArgsConstructor
......
package fr.numeco.misis.analyzer.service;
import java.util.Map;
import fr.numeco.misis.analyzer.dto.PageDto;
import fr.numeco.misis.analyzer.dto.RessourceDto;
import fr.numeco.misis.suividesite.domain.Page;
import fr.numeco.misis.suividesite.domain.Ressource;
import io.quarkus.scheduler.Scheduled.SkipPredicate;
import java.util.Map;
public interface AnalyserService extends SkipPredicate {
void process(io.quarkus.panache.common.Page pagination);
void process(int startIndex, int lastIndex);
......
......@@ -153,7 +153,7 @@ public class DefaultAnalyserService implements AnalyserService {
@Override
public boolean test(ScheduledExecution execution) {
return isRunning;
return false; //isRunning;
}
}
package fr.numeco.misis.enums;
public enum MethodeDeCreationDeGroupe {
MANUELLE
MANUELLE,
AUTOMATIQUE
}
package fr.numeco.misis.error;
import fr.numeco.misis.sitemap.exception.SitemapUrlException;
import io.quarkus.logging.Log;
import jakarta.ws.rs.NotFoundException;
import jakarta.ws.rs.core.MediaType;
......@@ -34,6 +35,10 @@ public class MisisExceptionHandler implements ExceptionMapper<Exception> {
.entity(errorResponse)
.type(MediaType.APPLICATION_JSON)
.build();
} else if (e instanceof SitemapUrlException) {
return Response
.status(Response.Status.CONFLICT)
.build();
}
Log.error("unhandled exception", e);
......
package fr.numeco.misis.sitemap;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import jakarta.enterprise.context.ApplicationScoped;
public class SitemapConfiguration {
@ApplicationScoped
public DocumentBuilder getDocumentBuilder() throws ParserConfigurationException {
return DocumentBuilderFactory.newInstance().newDocumentBuilder();
}
}
package fr.numeco.misis.sitemap.exception;
public class SitemapUrlException extends RuntimeException {
public SitemapUrlException(String message) {
super(message);
}
}
package fr.numeco.misis.sitemap.resource;
import java.util.Set;
import fr.numeco.misis.sitemap.service.SitemapService;
import jakarta.ws.rs.GET;
import jakarta.ws.rs.Path;
import jakarta.ws.rs.QueryParam;
import lombok.RequiredArgsConstructor;
@Path("/sitemap")
@RequiredArgsConstructor
public class SitemapResource {
private final SitemapService sitemapService;
@GET
public Set<String> getSitemapUrls(@QueryParam("q") String sitemap, @QueryParam("isrecursive") boolean isRecursive) {
return sitemapService.readSitemap(sitemap);
}
}
package fr.numeco.misis.sitemap.service;
import java.util.Set;
import fr.numeco.misis.suividesite.domain.Page;
import io.smallrye.mutiny.Multi;
public interface SitemapService {
public Set<String> readSitemap(String sitemapUrl);
public Multi<Page> process();
}
package fr.numeco.misis.sitemap.service.impl;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.time.LocalDateTime;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.DocumentBuilder;
import org.eclipse.microprofile.config.inject.ConfigProperty;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import fr.numeco.misis.enums.AnalyseStatut;
import fr.numeco.misis.sitemap.exception.SitemapUrlException;
import fr.numeco.misis.sitemap.service.SitemapService;
import fr.numeco.misis.suividesite.domain.GroupeDePages;
import fr.numeco.misis.suividesite.domain.Page;
import fr.numeco.misis.suividesite.dto.PageFormDto;
import fr.numeco.misis.suividesite.repository.GroupeDePagesRepository;
import fr.numeco.misis.suividesite.repository.PageRepository;
import io.quarkus.hibernate.orm.panache.PanacheQuery;
import io.smallrye.mutiny.Multi;
import io.smallrye.mutiny.infrastructure.Infrastructure;
import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;
import jakarta.transaction.Transactional;
@ApplicationScoped
@Transactional
public class DefaultSitemapService implements SitemapService {
public static final String PAGE_ATTRIBUT_URL = "url";
private static final String SITEMAP_URL_REGEX = ".*\\.xml$";
private static final Pattern SITEMAP_URL_PARTTERN = Pattern.compile(SITEMAP_URL_REGEX, Pattern.CASE_INSENSITIVE);
@Inject
private DocumentBuilder dBuilder;
@Inject
private PageRepository pageRepository;
@Inject
private GroupeDePagesRepository groupeDePagesRepository;
@ConfigProperty(name = "sitemap.size.limit")
private Integer sizeLimit;
public void setGroupeDePagesRepository(final GroupeDePagesRepository groupeDePagesRepository) {
this.groupeDePagesRepository = groupeDePagesRepository;
}
public void setPageRepository(final PageRepository pageRepository) {
this.pageRepository = pageRepository;
}
@Override
public Set<String> readSitemap(final String sitemapUrl) {
final Set<String> urlsExtractedFromSitemap = new LinkedHashSet<>();
if (sitemapUrl != null) {
final Matcher XMLMatcher = SITEMAP_URL_PARTTERN.matcher(sitemapUrl);
if (!XMLMatcher.find()) {
throw new SitemapUrlException("Not a XML file.");
}
} else {
throw new SitemapUrlException("XML file is missing.");
}
try {
final Document parsedSitemap = this.dBuilder.parse(sitemapUrl);
parsedSitemap.getDocumentElement().normalize();
final NodeList urlNodeList = parsedSitemap.getElementsByTagName("loc");
for (int i = 0; i < urlNodeList.getLength(); i++) {
if (urlsExtractedFromSitemap.size() >= this.sizeLimit) {
break;
}
final Node node = urlNodeList.item(i);
if (node.getNodeType() == Node.ELEMENT_NODE && node instanceof Element urlElement) {
final String url = urlElement.getTextContent();
if (this.isValidUrl(url)) {
urlsExtractedFromSitemap.add(url);
}
}
}
} catch (SAXException | IOException | URISyntaxException e) {
throw new SitemapUrlException(e.getMessage());
}
return urlsExtractedFromSitemap;
}
@Override
public Multi<Page> process() {
final List<GroupeDePages> groupeDePagesSitemap = groupeDePagesRepository.findSitemapGroups();
return Multi.createFrom().items(groupeDePagesSitemap.stream())
.emitOn(Infrastructure.getDefaultExecutor())
.runSubscriptionOn(Infrastructure.getDefaultExecutor())
.onItem().transformToMultiAndMerge(this::createPagesFromSitemap);
}
private Multi<Page> createPagesFromSitemap(final GroupeDePages groupeDePages) {
final List<String> sitemapUrls = this.readSitemap(groupeDePages.getSitemap()).stream().toList();
pageRepository.filterOutdatedUrls(sitemapUrls, groupeDePages);
return Multi.createFrom().items(sitemapUrls.stream())
.onItem().transform(value -> new PageFormDto(null, value))
.onItem().transform(value -> this.createPageIfNotExist(value, groupeDePages));
}
private Page createPageIfNotExist(final PageFormDto pageFormDto, final GroupeDePages groupeDePages) {
final PanacheQuery<Page> query = pageRepository.find(PAGE_ATTRIBUT_URL, pageFormDto.getUrl());
final Optional<Page> optionalPage = query.singleResultOptional();
Page page;
if (optionalPage.isPresent()) {
page = optionalPage.get();
} else {
page = new Page();
page.setUrl(pageFormDto.getUrl());
page.setGroupeDePages(groupeDePages);
page.setAnalyseStatut(AnalyseStatut.NOUVEAU);
page.setModifiedAt(LocalDateTime.now());
pageRepository.persist(page);
}
return page;
}
protected boolean isValidUrl(final String url) throws MalformedURLException, URISyntaxException {
try {
new URI(url).toURL();
return true;
} catch (IllegalArgumentException | URISyntaxException | IOException e) {
return false;
}
}
}
package fr.numeco.misis.suividesite.constraints;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
import fr.numeco.misis.suividesite.constraints.validators.MethodeDeCreationDeGroupeValidator;
import jakarta.validation.Constraint;
import jakarta.validation.Payload;
@Constraint(validatedBy = MethodeDeCreationDeGroupeValidator.class)
@Target(ElementType.TYPE)
@Retention(RetentionPolicy.RUNTIME)
public @interface MethodeDeCreationDeGroupeConstraint {
String message() default "Les champs ne sont pas valides";
Class<?>[] groups() default {};
Class<? extends Payload>[] payload() default {};
}
/*
* Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license
* Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template
*/
package fr.numeco.misis.suividesite.constraints.validators;
import java.util.Objects;
import fr.numeco.misis.suividesite.constraints.MethodeDeCreationDeGroupeConstraint;
import fr.numeco.misis.suividesite.dto.GroupOfPagesFormDto;
import jakarta.validation.ConstraintValidator;
import jakarta.validation.ConstraintValidatorContext;
/**
*
* @author laurent-martins
*/
public class MethodeDeCreationDeGroupeValidator implements ConstraintValidator<MethodeDeCreationDeGroupeConstraint, GroupOfPagesFormDto> {
@Override
public boolean isValid(GroupOfPagesFormDto formDto, ConstraintValidatorContext context) {
return switch(formDto.getMethodeDeCreationDeGroupe()) {
case AUTOMATIQUE -> Objects.nonNull(formDto.getSitemap()) && !formDto.getSitemap().isBlank();
case MANUELLE -> Objects.nonNull(formDto.getPages()) && !formDto.getPages().isEmpty() ;
default -> false;
};
}
}
package fr.numeco.misis.suividesite.domain;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import fr.numeco.misis.enums.MethodeDeCreationDeGroupe;
import fr.numeco.misis.enums.PeriodiciteDuSuivi;
import fr.numeco.misis.permission.domain.Auditable;
import fr.numeco.misis.suividesite.dto.GroupOfPagesFormDto;
import fr.numeco.misis.suividesite.dto.PageFormDto;
import jakarta.persistence.*;
import static io.quarkus.hibernate.orm.panache.Panache.getEntityManager;
import jakarta.persistence.CascadeType;
import jakarta.persistence.Column;
import jakarta.persistence.Entity;
import jakarta.persistence.EnumType;
import jakarta.persistence.Enumerated;
import jakarta.persistence.ManyToOne;
import jakarta.persistence.OneToMany;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import static io.quarkus.hibernate.orm.panache.Panache.getEntityManager;
@Entity
@Getter
@Setter
......@@ -44,6 +49,8 @@ public class GroupeDePages extends Auditable {
@ManyToOne(cascade = CascadeType.MERGE)
private SuiviDeSite suiviDeSite;
private String sitemap;
public void addPage(Page page) {
if (Objects.isNull(pages)) {
pages = new ArrayList<>();
......@@ -92,7 +99,11 @@ public class GroupeDePages extends Auditable {
groupeDePages.setName(groupOfPagesFormDto.getName());
groupeDePages.setPeriodiciteDuSuivi(groupOfPagesFormDto.getPeriodiciteDuSuivi());
groupeDePages.setMethodeDeCreationDeGroupe(groupOfPagesFormDto.getMethodeDeCreationDeGroupe());
manageOrphan(Page.createOrUpdate(suiviDeSite.getUrl(), groupOfPagesFormDto.getPages(), groupeDePages), groupeDePages);
if(groupOfPagesFormDto.getMethodeDeCreationDeGroupe() == MethodeDeCreationDeGroupe.MANUELLE) {
manageOrphan(Page.createOrUpdate(suiviDeSite.getUrl(), groupOfPagesFormDto.getPages(), groupeDePages), groupeDePages);
}
if (Objects.isNull(groupeDePages.getCreatedBy())) {
groupeDePages.setCreatedBy(suiviDeSite.getCreatedBy());
}
......@@ -102,7 +113,8 @@ public class GroupeDePages extends Auditable {
}
groupeDePages.setModifiedDate(LocalDate.now());
groupeDePages.setSuiviDeSite(suiviDeSite);
groupeDePages.setSitemap(groupOfPagesFormDto.getSitemap());
return groupeDePages;
}
......
package fr.numeco.misis.suividesite.dto;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import fr.numeco.misis.enums.MethodeDeCreationDeGroupe;
import fr.numeco.misis.enums.PeriodiciteDuSuivi;
import fr.numeco.misis.suividesite.constraints.MethodeDeCreationDeGroupeConstraint;
import io.quarkus.logging.Log;
import jakarta.validation.Valid;
import jakarta.validation.constraints.NotBlank;
import jakarta.validation.constraints.NotNull;
import jakarta.validation.constraints.Size;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
@NoArgsConstructor
@AllArgsConstructor
@Getter
@Setter
@MethodeDeCreationDeGroupeConstraint
public class GroupOfPagesFormDto {
private Long id; //NOSONAR
......@@ -33,12 +34,11 @@ public class GroupOfPagesFormDto {
private PeriodiciteDuSuivi periodiciteDuSuivi; //NOSONAR
@NotNull
private MethodeDeCreationDeGroupe methodeDeCreationDeGroupe; //NOSONAR
@Valid
@Size
private List<PageFormDto> pages; //NOSONAR
private String sitemap; //NOSONAR
public GroupOfPagesFormDto(Long id, final String name, final PeriodiciteDuSuivi periodiciteDuSuivi, final MethodeDeCreationDeGroupe methodeDeCreationDeGroupe, final Object pages) {
this(id, name, periodiciteDuSuivi, methodeDeCreationDeGroupe, map(pages));
this(id, name, periodiciteDuSuivi, methodeDeCreationDeGroupe, map(pages), null);
}
private static List<PageFormDto> map(Object object) {
......
package fr.numeco.misis.suividesite.repository;
import java.util.List;
import fr.numeco.misis.suividesite.domain.GroupeDePages;
import fr.numeco.misis.suividesite.dto.GroupOfPagesFormDto;
import io.quarkus.hibernate.orm.panache.PanacheRepository;
import jakarta.enterprise.context.ApplicationScoped;
import java.util.List;
@ApplicationScoped
public class GroupeDePagesRepository implements PanacheRepository<GroupeDePages> {
......@@ -30,6 +30,11 @@ public class GroupeDePagesRepository implements PanacheRepository<GroupeDePages>
GROUP BY g.id
""";
public static final String QUERY_SITEMAP_GROUPS = """
SELECT g FROM GroupeDePages AS g
WHERE g.sitemap IS NOT NULL AND g.methodeDeCreationDeGroupe = 'AUTOMATIQUE'
""";
public List<GroupeDePages> findGroupsAnalysedForHistorization() {
return list(QUERY_GROUPS_ANALYZED_FOR_HISTORISATION);
}
......@@ -39,4 +44,8 @@ public class GroupeDePagesRepository implements PanacheRepository<GroupeDePages>
.project(GroupOfPagesFormDto.class)
.list();
}
public List<GroupeDePages> findSitemapGroups() {
return list(QUERY_SITEMAP_GROUPS);
}
}
package fr.numeco.misis.suividesite.repository;
import java.util.List;
import fr.numeco.misis.suividesite.domain.GroupeDePages;
import fr.numeco.misis.suividesite.domain.Page;
import io.quarkus.hibernate.orm.panache.PanacheQuery;
import io.quarkus.hibernate.orm.panache.PanacheRepository;
......@@ -22,8 +25,15 @@ public class PageRepository implements PanacheRepository<Page> {
ORDER BY p.modifiedAt ASC
""";
private static final String DELETE_OUTDATED_URLS = """
DELETE FROM Page WHERE url NOT IN (?1) AND groupeDePages = ?2
""";
public PanacheQuery<Page> findAvailableByPeriodicity() {
return find(QUERY_ANALYZABLE_PAGES);
}
public void filterOutdatedUrls(List<String> sitemapUrls, GroupeDePages groupeDePages) {
delete(DELETE_OUTDATED_URLS, sitemapUrls, groupeDePages);
}
}
......@@ -20,10 +20,10 @@ public class WebsiteRepository implements PanacheRepository<SuiviDeSite> {
private static final String QUERY_ALL_WEBSITE_SUMMARY_VISIBLE_BY_USER = """
SELECT s
FROM PermissionSurLeSuiviDeSite AS p
FROM PermissionSurLeSuiviDeSite AS p
JOIN p.domain AS s
JOIN s.groupesDePages AS g
JOIN g.pages AS gp
LEFT JOIN g.pages AS gp
JOIN p.allowed AS pt
JOIN p.user AS u
JOIN s.createdBy AS owner
......
......@@ -17,9 +17,13 @@ quarkus.smallrye-openapi.info-license-url=https://www.apache.org/licenses/LICENS
analyser.refresh.schedule.cron.expression=0 0 0 * * ?
# execution every 15 minutes after analyser (+5mins)
stats.refresh.schedule.cron.expression=0 0 0 * * ?
# Sitemap
sitemap.size.limit=10
# Logging
quarkus.log.category."fr.numeco.analyser.service.impl".min-level=TRACE
quarkus.log.category."fr.numeco.analyser.service.impl".level=TRACE
quarkus.log.category."fr.numeco.analyser.service.impl".min-level=INFO
quarkus.log.category."fr.numeco.analyser.service.impl".level=INFO
quarkus.log.category."org.openqa.selenium.devtools".level=WARNING
quarkus.log.category."io.quarkus.oidc.runtime.OidcProviderClient".level=DEBUG
quarkus.log.category."fr.numeco.misis.security.JwtDecodingUserInfoCache".level=DEBUG
......
......@@ -8,5 +8,9 @@ quarkus.scheduler.enabled=false
# Cron for tests
analyser.refresh.schedule.cron.expression=0 0 0 * * ?
stats.refresh.schedule.cron.expression=0 0 0 * * ?
# Sitemap
sitemap.size.limit=3
# OIDC
quarkus.oidc.enabled=true
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment