import { ScraperParams } from '@/types/augment';
import * as cheerio from 'cheerio';
import { z } from 'zod';
import { fetchWithRewrites } from './fetchWithRewrites';
import { createTimeoutSignal } from './signalConfig';

/**
 * Convert a zod schema string into a Zod Schema object.
 * @param string schemaString
 * @returns `z.ZodObject<any>` schema
 */
export function parseSchema(schemaString: string): z.ZodObject<any> {
  const schemaFunction = new Function('z', `return ${schemaString};`);
  return schemaFunction(z);
}

export const scrapePage = async (
  params: ScraperParams,
  debug: boolean = false,
  requestTimeout?: number
) => {
  const pageUrl: string = params.pageUrl;
  const scrapedPage = await fetchWithRewrites(pageUrl, {
    headers: {
      'Access-Control-Allow-Origin': '*',
      'Content-Type': '*/*',
    },
    signal: createTimeoutSignal(requestTimeout),
  });

  const scrapedPageStatus = scrapedPage.status;
  const scrapedPageStatusText = scrapedPage.statusText;
  const scrapedData = await scrapedPage.text();

  const $ = cheerio.load(scrapedData);

  // Extract the text from the HTML
  const text = $('body').text();
  const rootText = $.text();
  // Remove extra white spaces
  let cleanText = text.replace(/\s+/g, ' ').trim();

  if (debug) {
    console.log(
      'root length - body length = ',
      rootText.split(' ').length - text.split(' ').length
    );
    console.log(
      'root length - cleaned body length = ',
      rootText.split(' ').length - cleanText.split(' ').length
    );
    console.log(
      'Approx Percentage characters saved = ',
      ((rootText.length - cleanText.length) / rootText.length) * 100
    );
    console.log(
      'Approx Percentage token savings = ',
      ((rootText.split(' ').length - cleanText.split(' ').length) /
        rootText.split(' ').length) *
        100
    );
  }
  return {
    data: cleanText,
    statusText: scrapedPageStatusText,
    statusCode: scrapedPageStatus,
  };
};
