Skip to content
Snippets Groups Projects
Commit 7cecaec2 authored by Martins, Laurent's avatar Martins, Laurent
Browse files

fix #390 : quelques ajustement

parent bc9d8f4a
Branches
Tags
No related merge requests found
Showing
with 73 additions and 42 deletions
package fr.numeco.analyser.service;
import java.util.List;
import fr.numeco.misis.statistic.service.StatisticService;
import fr.numeco.misis.suividesite.domain.GroupeDePages;
import fr.numeco.misis.suividesite.repository.GroupeDePagesRepository;
......@@ -9,8 +11,6 @@ import io.quarkus.scheduler.ScheduledExecution;
import jakarta.enterprise.context.ApplicationScoped;
import lombok.RequiredArgsConstructor;
import java.util.List;
@ApplicationScoped
@RequiredArgsConstructor
public class StatsScheduler {
......@@ -24,10 +24,11 @@ public class StatsScheduler {
@Scheduled(cron = "{stats.refresh.schedule.cron.expression}")
void scheduleCreationOfPageGroupStatistics(ScheduledExecution execution) {
Log.info(SCHEDULER_START);
final List<GroupeDePages> groupesDePages = groupeDePagesRepository.findGroupsAnalysedForHistorization();
if (!groupesDePages.isEmpty()) {
Log.info(SCHEDULER_START);
}
for (GroupeDePages groupeDePages : groupesDePages) {
statisticService.createStatistics(groupeDePages);
......@@ -40,7 +41,9 @@ public class StatsScheduler {
);
}
Log.info(SCHEDULER_END);
if (!groupesDePages.isEmpty()) {
Log.info(SCHEDULER_END);
}
}
}
......@@ -72,7 +72,9 @@ public class DefaultAnalyserService implements AnalyserService {
} else {
Log.infof("Processing %s page(s)", pageDtos.size());
}
pageRepository.update("analyseStatut=?1 WHERE id IN (?2)", AnalyseStatut.EN_COURS, pagesIds);
if (!pagesIds.isEmpty()) {
pageRepository.update("analyseStatut=?1 WHERE id IN (?2)", AnalyseStatut.EN_COURS, pagesIds);
}
isRunning = Boolean.TRUE;
networkAnalyser.analyse(pageDtos.stream())
.onTermination().invoke(() -> isRunning = Boolean.FALSE)
......@@ -153,7 +155,7 @@ public class DefaultAnalyserService implements AnalyserService {
@Override
public boolean test(ScheduledExecution execution) {
return false; //isRunning;
return isRunning;
}
}
package fr.numeco.misis.sitemap;
import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
......@@ -10,7 +11,10 @@ public class SitemapConfiguration {
@ApplicationScoped
public DocumentBuilder getDocumentBuilder() throws ParserConfigurationException {
return DocumentBuilderFactory.newInstance().newDocumentBuilder();
final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
documentBuilderFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
documentBuilderFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
return documentBuilderFactory.newDocumentBuilder();
}
}
package fr.numeco.misis.sitemap.dto;
import java.util.Set;
import fr.numeco.misis.enums.SitemapMessage;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.Setter;
import java.util.Set;
@AllArgsConstructor
@Getter @Setter
public class SitemapDto {
private Set<String> urls;
private Set<SitemapMessage> messages;
private Set<String> urls; //NOSONAR
private Set<SitemapMessage> messages; //NOSONAR
}
......@@ -17,6 +17,6 @@ public class SitemapResource {
@GET
public SitemapDto getSitemapUrls(@QueryParam("q") String sitemap, @QueryParam("isrecursive") boolean isRecursive) {
return sitemapService.readSitemap(sitemap);
return sitemapService.readSitemap(sitemap, true);
}
}
......@@ -8,6 +8,8 @@ public interface SitemapService {
public SitemapDto readSitemap(String sitemapUrl);
public SitemapDto readSitemap(String sitemapUrl, boolean requireFullResult);
public Multi<Page> process();
}
......@@ -3,6 +3,7 @@ package fr.numeco.misis.sitemap.service.impl;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.util.LinkedHashSet;
import java.util.List;
......@@ -30,7 +31,7 @@ import fr.numeco.misis.suividesite.domain.Page;
import fr.numeco.misis.suividesite.dto.PageFormDto;
import fr.numeco.misis.suividesite.repository.GroupeDePagesRepository;
import fr.numeco.misis.suividesite.repository.PageRepository;
import io.quarkus.hibernate.orm.panache.PanacheQuery;
import io.quarkus.logging.Log;
import io.smallrye.mutiny.Multi;
import io.smallrye.mutiny.infrastructure.Infrastructure;
import jakarta.enterprise.context.ApplicationScoped;
......@@ -64,32 +65,42 @@ public class DefaultSitemapService implements SitemapService {
@Override
public SitemapDto readSitemap(final String sitemapUrl) {
return this.readSitemap(sitemapUrl, false);
}
@Override
public SitemapDto readSitemap(final String sitemapUrl, final boolean requireFullResult) {
final Set<String> urlsExtractedFromSitemap = new LinkedHashSet<>();
Set<SitemapMessage> messages = new LinkedHashSet<>();
if (sitemapUrl != null) {
final Matcher XMLMatcher = SITEMAP_URL_PARTTERN.matcher(sitemapUrl);
if (!XMLMatcher.find()) {
throw new SitemapUrlException("Not a XML file.");
}
} else {
if (sitemapUrl == null) {
throw new SitemapUrlException("XML file is missing.");
}
final Matcher xmlMatcher = SITEMAP_URL_PARTTERN.matcher(sitemapUrl);
if (!xmlMatcher.find()) {
throw new SitemapUrlException("Not a XML file.");
}
try {
final Document parsedSitemap = this.dBuilder.parse(sitemapUrl);
parsedSitemap.getDocumentElement().normalize();
final NodeList urlNodeList = parsedSitemap.getElementsByTagName("loc");
if (urlNodeList.getLength() == 0) {
final int nodeLength = urlNodeList.getLength();
if (nodeLength == 0) {
messages.add(SitemapMessage.EMPTY_SITEMAP);
}
for (int i = 0; i < urlNodeList.getLength(); i++) {
if (urlsExtractedFromSitemap.size() >= this.sizeLimit) {
Log.infof("%s URLs in sitemap %s", nodeLength, sitemapUrl);
for (int i = 0; i < nodeLength; i++) {
if (urlsExtractedFromSitemap.size() == this.sizeLimit) {
messages.add(SitemapMessage.NUMBER_OF_ITEMS_LIMIT_REACHED);
return new SitemapDto(urlsExtractedFromSitemap, messages);
if (!requireFullResult) {
return new SitemapDto(urlsExtractedFromSitemap, messages);
}
}
final Node node = urlNodeList.item(i);
......@@ -98,6 +109,8 @@ public class DefaultSitemapService implements SitemapService {
if (this.isValidUrl(url)) {
urlsExtractedFromSitemap.add(url);
} else {
Log.warnf("invalide url in sitemap %s", url);
}
}
}
......@@ -123,6 +136,11 @@ public class DefaultSitemapService implements SitemapService {
private Multi<Page> createPagesFromSitemap(final GroupeDePages groupeDePages) {
final List<String> sitemapUrls = this.readSitemap(groupeDePages.getSitemap()).getUrls().stream().toList();
if (sitemapUrls.isEmpty()) {
return Multi.createFrom().empty();
} else {
groupeDePages.setModifiedDate(LocalDate.now());
}
pageRepository.filterOutdatedUrls(sitemapUrls, groupeDePages);
return Multi.createFrom().items(sitemapUrls.stream())
......@@ -131,8 +149,7 @@ public class DefaultSitemapService implements SitemapService {
}
private Page createPageIfNotExist(final PageFormDto pageFormDto, final GroupeDePages groupeDePages) {
final PanacheQuery<Page> query = pageRepository.find(PAGE_ATTRIBUT_URL, pageFormDto.getUrl());
final Optional<Page> optionalPage = query.singleResultOptional();
final Optional<Page> optionalPage = pageRepository.findByUrlAndGroupeDePages(pageFormDto.getUrl(), groupeDePages);
Page page;
if (optionalPage.isPresent()) {
......
......@@ -37,8 +37,8 @@ public class GroupOfPagesFormDto {
private List<PageFormDto> pages; //NOSONAR
private String sitemap; //NOSONAR
public GroupOfPagesFormDto(Long id, final String name, final PeriodiciteDuSuivi periodiciteDuSuivi, final MethodeDeCreationDeGroupe methodeDeCreationDeGroupe, final Object pages) {
this(id, name, periodiciteDuSuivi, methodeDeCreationDeGroupe, map(pages), null);
public GroupOfPagesFormDto(Long id, final String name, final PeriodiciteDuSuivi periodiciteDuSuivi, final MethodeDeCreationDeGroupe methodeDeCreationDeGroupe, final Object pages, String sitemap) {
this(id, name, periodiciteDuSuivi, methodeDeCreationDeGroupe, map(pages), sitemap);
}
private static List<PageFormDto> map(Object object) {
......
......@@ -22,7 +22,7 @@ public class GroupeDePagesRepository implements PanacheRepository<GroupeDePages>
GROUP BY g
""";
public static final String FIND_GROUP_FOR_UPDATE = """
SELECT g.id, g.name, g.periodiciteDuSuivi, g.methodeDeCreationDeGroupe, JSON_AGG(DISTINCT JSONB_BUILD_OBJECT(p.id, p.url))
SELECT g.id, g.name, g.periodiciteDuSuivi, g.methodeDeCreationDeGroupe, JSON_AGG(DISTINCT JSONB_BUILD_OBJECT(p.id, p.url)), g.sitemap
FROM GroupeDePages AS g
JOIN g.pages AS p
JOIN g.suiviDeSite AS s
......@@ -33,6 +33,10 @@ public class GroupeDePagesRepository implements PanacheRepository<GroupeDePages>
public static final String QUERY_SITEMAP_GROUPS = """
SELECT g FROM GroupeDePages AS g
WHERE g.sitemap IS NOT NULL AND g.methodeDeCreationDeGroupe = 'AUTOMATIQUE'
AND (g.pages IS EMPTY
OR ( g.periodiciteDuSuivi = 'QUOTIDIEN' AND g.modifiedDate < CURRENT_DATE )
OR ( g.periodiciteDuSuivi = 'HEBDOMADAIRE' AND g.modifiedDate < DATEADD(week, -1, CURRENT_DATE) )
OR ( g.periodiciteDuSuivi = 'MENSUEL' AND g.modifiedDate < DATEADD(month, -1, CURRENT_DATE) ))
""";
public List<GroupeDePages> findGroupsAnalysedForHistorization() {
......
package fr.numeco.misis.suividesite.repository;
import java.util.List;
import java.util.Optional;
import fr.numeco.misis.suividesite.domain.GroupeDePages;
import fr.numeco.misis.suividesite.domain.Page;
......@@ -36,4 +37,8 @@ public class PageRepository implements PanacheRepository<Page> {
public void filterOutdatedUrls(List<String> sitemapUrls, GroupeDePages groupeDePages) {
delete(DELETE_OUTDATED_URLS, sitemapUrls, groupeDePages);
}
public Optional<Page> findByUrlAndGroupeDePages(String url, GroupeDePages groupeDePages) {
return find("url = ?1 AND groupeDePages = ?2", url, groupeDePages).firstResultOptional();
}
}
......@@ -21,6 +21,7 @@ import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyInt;
import static org.mockito.ArgumentMatchers.anyList;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.ArgumentMatchers.eq;
import org.mockito.Mock;
import org.mockito.Mockito;
import static org.mockito.Mockito.mock;
......@@ -34,13 +35,11 @@ import org.xml.sax.SAXException;
import fr.numeco.misis.enums.SitemapMessage;
import fr.numeco.misis.sitemap.dto.SitemapDto;
import fr.numeco.misis.sitemap.exception.SitemapUrlException;
import static fr.numeco.misis.sitemap.service.impl.DefaultSitemapService.PAGE_ATTRIBUT_URL;
import fr.numeco.misis.suividesite.domain.GroupeDePages;
import fr.numeco.misis.suividesite.domain.Page;
import fr.numeco.misis.suividesite.domain.SuiviDeSite;
import fr.numeco.misis.suividesite.repository.GroupeDePagesRepository;
import fr.numeco.misis.suividesite.repository.PageRepository;
import io.quarkus.hibernate.orm.panache.PanacheQuery;
import io.quarkus.test.InjectMock;
import io.quarkus.test.junit.QuarkusTest;
import io.smallrye.mutiny.helpers.test.AssertSubscriber;
......@@ -94,9 +93,7 @@ public class SitemapServiceUnitTest {
private void mockDatabaseResponse(Map<String, List<String>> sitemapUrlWithPages) {
final List<GroupeDePages> mockedGroupesDePages = new ArrayList<>();
final SuiviDeSite suiviDeSite = mock(SuiviDeSite.class);
PanacheQuery<?> panacheQuery = mock(PanacheQuery.class);
Mockito.doReturn(panacheQuery).when(pageRepository).find(PAGE_ATTRIBUT_URL, "https://sample.com/not-existing");
Mockito.doReturn(Optional.empty()).when(panacheQuery).singleResultOptional();
Mockito.doReturn(Optional.empty()).when(pageRepository).findByUrlAndGroupeDePages(eq("https://sample.com/not-existing"), any(GroupeDePages.class));
for (Map.Entry<String, List<String>> entry : sitemapUrlWithPages.entrySet()) {
final GroupeDePages groupeDePages = mock(GroupeDePages.class);
......@@ -107,10 +104,7 @@ public class SitemapServiceUnitTest {
for (String url : entry.getValue()) {
final Page page = mock(Page.class);
panacheQuery = mock(PanacheQuery.class);
Mockito.doReturn(panacheQuery).when(pageRepository).find(PAGE_ATTRIBUT_URL, url);
Mockito.doReturn(Optional.of(page)).when(panacheQuery).singleResultOptional();
Mockito.doReturn(Optional.of(page)).when(pageRepository).findByUrlAndGroupeDePages(url, groupeDePages);
}
mockedGroupesDePages.add(groupeDePages);
......@@ -139,7 +133,7 @@ public class SitemapServiceUnitTest {
Set<String> actual = defaultSitemapService.readSitemap(provided).getUrls();
Mockito.verify(parsedSitemap, Mockito.times(1)).getElementsByTagName(anyString());
Mockito.verify(urlNodeList, Mockito.times(2)).getLength();
Mockito.verify(urlNodeList, Mockito.times(1)).getLength();
Mockito.verify(urlNodeList, Mockito.never()).item(anyInt());
Assertions.assertIterableEquals(expected, actual);
......@@ -175,7 +169,7 @@ public class SitemapServiceUnitTest {
Set<String> actual = defaultSitemapService.readSitemap(provided).getUrls();
Mockito.verify(parsedSitemap, Mockito.times(1)).getElementsByTagName(anyString());
Mockito.verify(urlNodeList, Mockito.times(3)).getLength();
Mockito.verify(urlNodeList, Mockito.times(1)).getLength();
Mockito.verify(urlNodeList, Mockito.times(1)).item(anyInt());
Assertions.assertIterableEquals(expected, actual);
......@@ -189,7 +183,7 @@ public class SitemapServiceUnitTest {
Set<String> actual = defaultSitemapService.readSitemap(provided).getUrls();
Mockito.verify(parsedSitemap, Mockito.times(1)).getElementsByTagName(anyString());
Mockito.verify(urlNodeList, Mockito.times(3)).getLength();
Mockito.verify(urlNodeList, Mockito.times(1)).getLength();
Mockito.verify(urlNodeList, Mockito.times(1)).item(anyInt());
Assertions.assertIterableEquals(expected, actual);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment