import {JSDOM} from 'jsdom';

const BASE_URL = 'https://www.ecologie.gouv.fr';

/**
 * Get Actualités links
 * @param {*} nbActu
 */
export async function getActu(nbActu) {
  let index = 0;
  const urls = [];
  while (true) {
    if (nbActu - urls.length <= 0) {
      break;
    }
    const newUrls = await getArticles({
      xpathNode: '//article[@role="article"]',
      xpathUrl: 'string(descendant::h2/a/@href)',
      xpathTitle: 'string(descendant::h2/a/span/text())',
      xpathDate:
      'string(descendant::span[@class="entity-date"]/text())',
      url: `${BASE_URL}/actualites?page=${index}`,
      nb: nbActu - urls.length,
      dateParser: (input) => {
        const frenchToEnglish = {
          'janvier': 'January',
          'février': 'February',
          'mars': 'March',
          'avril': 'April',
          'mai': 'May',
          'juin': 'June',
          'juillet': 'July',
          'août': 'August',
          'septembre': 'September',
          'octobre': 'October',
          'novembre': 'November',
          'décembre': 'December',
        };

        const parts = input.split(' ');
        parts[1] = frenchToEnglish[parts[1].toLowerCase()];
        const englishDate = parts.join(' ');

        const dateObj = new Date(Date.parse(englishDate));
        const day = dateObj.getDate().toString().padStart(2, '0');
        const month = (dateObj.getMonth() + 1).toString().padStart(2, '0');
        const year = dateObj.getFullYear();

        return `${day}/${month}/${year}`;
      },

    });
    if (newUrls.length === 0) {
      break;
    }
    urls.push(
        ...newUrls.slice(0, Math.min(nbActu - urls.length, newUrls.length)));
    index++;
    await new Promise((resolve) => setTimeout(resolve, 200));
  }
  return urls;
}

/**
 * Get Rendez-vous links
 * @param {*} nbLinks
 */
export async function getRendezvous(nbLinks) {
  return await getArticles({
    xpathNode:
      '//div[@class="taxonomy-term vocabulary-project-eco-edition rdv-list"]',
    xpathTitle: 'string(descendant::h3/a/text())',
    xpathUrl: 'string(descendant::h3/a/@href)',
    xpathDate:
      'string(descendant::div[contains(@class,"rdv-list-dates-first")])',
    dateParser: (input) => {
      const regex = /\s+/g;
      const cleanedString = input.replace(regex, ' ').trim();
      return cleanedString.replace(/(\d{2}) (\d{2}) (\d{4})/, '$1/$2/$3');
    },
    url: `${BASE_URL}/rendez-vous`,
    nb: nbLinks,
  });
}

/**
 * Generic function to get links
 * @param {*} param0
 */
async function getArticles({
  xpathNode,
  xpathUrl,
  xpathTitle,
  xpathDate,
  dateParser,
  url,
  nb,
}) {
  const urls = [];
  if (nb <= 0) {
    return urls;
  }
  console.error(`Fetching ${url} ...`);
  const response = await fetch(url);
  const data = await response.text();

  const dom = new JSDOM(data);
  const document = dom.window.document;

  const xpathResult = document.evaluate(
      xpathNode,
      document,
      null,
      dom.window.XPathResult.ORDERED_NODE_ITERATOR_TYPE,
      null,
  );

  let node = xpathResult.iterateNext();
  while (node) {
    let date = document.evaluate(
        xpathDate,
        node,
        null,
        dom.window.XPathResult.STRING_TYPE,
        null,
    ).stringValue;
    if (dateParser) {
      date = dateParser(date);
    }
    urls.push({
      url: toAbsoluteUrl(
          document.evaluate(
              xpathUrl,
              node,
              null,
              dom.window.XPathResult.STRING_TYPE,
              null,
          ).stringValue,
      ),
      title: document.evaluate(
          xpathTitle,
          node,
          null,
          dom.window.XPathResult.STRING_TYPE,
          null,
      ).stringValue,
      date,
    });
    if (urls.length === nb) {
      return urls;
    }
    node = xpathResult.iterateNext();
  }
  return urls;
}

/**
 * Convert captured URL into absolute URL
 * @param {*} value
 * @return {*}
 */
function toAbsoluteUrl(value) {
  if (value.startsWith('http')) {
    return value;
  } else if (value.startsWith('/')) {
    return `${BASE_URL}${value}`;
  } else {
    return `${BASE_URL}/${value}`;
  }
}
