import axios from 'axios';
import cheerio from 'cheerio';
import web from '../../../media_dsd/website-icon.png'


export interface Metadata {
  title: string;
  description: string;
  faviconUrl: string | null;
  siteName: string | null;
}

export async function scrapeWebsite(url: string): Promise<Metadata> {
  try {
    const response = await axios.get(url);
    const html = response.data;
    const $ = cheerio.load(html);

    let title = $('title').text();
    if (!title) {
      title = url
    }
    const description =
      $('meta[name="description"]').attr('content') ?? 'No description available';

    let faviconUrl: string | null = web;
    let siteName: string | null = getSiteNameFromUrl(url);

    $('link[rel="icon"], link[rel="shortcut icon"]').each((index, element) => {
      const href = $(element).attr('href');
      if (href) {
        faviconUrl = new URL(href, url).toString();
        siteName = getSiteNameFromUrl(faviconUrl);
      }
      
    });

    return { title, description, faviconUrl, siteName };
  } catch (error) {

    return {
      title: url,
      description: 'No description available',
      faviconUrl: web,
      siteName: getSiteNameFromUrl(url),
    }

  }
}

function getSiteNameFromUrl(url: string) {
  // Remove the protocol (http:// or https://) if it exists
  let cleanedUrl = url.replace(/(^\w+:|^)\/\//, '');

  // Remove www. if it exists
  cleanedUrl = cleanedUrl.replace('www.', '');

  // Split the URL by slashes and take the first part
  const parts = cleanedUrl.split('.');
  const siteName = parts[0];

  return siteName.charAt(0).toUpperCase() + siteName.slice(1);
}

