import { useQueries, useQuery } from '@tanstack/react-query';
import { markerPositivityColumnPrefix } from 'components/Procedure/SlidesViewer/DeckGLViewer/layers/parquetLayers/helpers';
import { FileMetaData, parquetMetadataAsync, parquetRead } from 'hyparquet';
import { compact, filter, fromPairs, keyBy, map, uniq } from 'lodash';
import { useMemo } from 'react';

export type ParquetRow = [X: number, Y: number, label: string, ...columns: any[]];

export interface ParquetFile {
  rows: ParquetRow[];
  metadata: FileMetaData;
}

/**
 * Get the byte length of a URL using a GET request.
 * This is a workaround for issues with pre-signed URLs and HEAD requests.
 * https://stackoverflow.com/questions/15717230/pre-signing-amazon-s3-urls-for-both-head-and-get-verbs#:~:text=When%20a%20request%20comes%20in%20with%20a%20GET%20verb%20to,needing%20to%20pre%2Dsign%20both
 *
 * @param {string} url
 * @returns {Promise<number>}
 */
export async function byteLengthFromUrlWithGet(url: string) {
  return fetch(url, {
    method: 'GET',
    // TODO: find a way to get the content length without downloading the whole file or using HEAD requests
    // headers: { Range: 'bytes=0-0' },
  }).then((res) => {
    if (!res.ok) throw new Error(`fetch head failed ${res.status}`);
    const length = res.headers.get('Content-Length');
    if (!length) throw new Error(`missing content length: url=${url}, headers=${res.headers}, res=${res}`);
    return parseInt(length);
  });
}
/**
 * Construct an AsyncBuffer for a URL, using a GET request to fetch byte ranges.
 *
 * @typedef {import('./types.js').AsyncBuffer} AsyncBuffer
 * @param {string} url
 * @param {number} [byteLength]
 * @returns {Promise<AsyncBuffer>}
 */
export async function asyncBufferFromUrlWithGet(url: string, byteLength?: number) {
  // byte length from HEAD request
  byteLength ||= await byteLengthFromUrlWithGet(url);
  return {
    byteLength,
    async slice(start: number, end: number | undefined) {
      // fetch byte range from url
      const headers = new Headers();
      const endStr = end === undefined ? '' : end - 1;
      headers.set('Range', `bytes=${start}-${endStr}`);
      const res = await fetch(url, { headers });
      if (!res.ok || !res.body) throw new Error(`fetch failed ${res.status}`);
      return res.arrayBuffer();
    },
  };
}

const readParquetFromUrl = async (parquetFileUrl: string, columns?: string[]): Promise<ParquetFile> => {
  const file = await asyncBufferFromUrlWithGet(parquetFileUrl);
  let metadata: FileMetaData | undefined;
  try {
    metadata = await parquetMetadataAsync(file);
  } catch (error) {
    console.error('Error reading parquet metadata', error);
  }
  return new Promise<ParquetFile>((resolve, reject) => {
    try {
      parquetRead({ file, columns, onComplete: (rows) => resolve({ rows: rows as ParquetRow[], metadata }) }).catch(
        (error) => {
          console.error('Error reading parquet file', error);
          reject(error);
        }
      );
    } catch (error) {
      console.error('Error reading parquet file', error);
      reject(error);
    }
  });
};

export const useParquetFile = (parquetFileUrl: string) => {
  return useQuery(['parquet', parquetFileUrl], async () => readParquetFromUrl(parquetFileUrl), {
    enabled: Boolean(parquetFileUrl) && !parquetFileUrl.startsWith('s3://'),
  });
};

export interface ParquetUrlAndColumns {
  url: string;
  columns?: string[];
}

const canQueryParquet = (url: string) => Boolean(url) && !url.startsWith('s3://');

/**
 * Fetch multiple parquet files and columns in parallel, and return the results as a hierarchical dictionary of parquet files by URL and
 * then by the specified columns requested.
 *
 * @param {ParquetUrlAndColumns[]} parquetQueries
 * @returns {{ parquetFiles: { [url: string]: { [columns: join(columns, ',')]: {
 *  rows: ParquetRow[];
 *  metadata: FileMetaData;
 *  isLoading: boolean;
 *  url: string;
 *  columns: string[];
 * } } } }}
 */
export const useParquetFiles = (parquetQueries: ParquetUrlAndColumns[]) => {
  const parquetFileQueries = useQueries({
    queries: map(parquetQueries, ({ url, columns }) => ({
      queryKey: ['parquet', url, columns],
      queryFn: async () => url && readParquetFromUrl(url, columns),
      enabled: canQueryParquet(url),
    })),
  });

  const parquetFiles = useMemo(() => {
    const parsedResults = map(parquetFileQueries, (query, index) => ({
      rows: map(query?.data?.rows, (row) => {
        return map(row, (value, columnIndex) => {
          const columnName = parquetQueries[index]?.columns?.[columnIndex];

          return typeof value === 'bigint'
            ? // Convert bigint to number
              Number(value)
            : columnName?.startsWith(markerPositivityColumnPrefix)
            ? Boolean(value)
            : value;
        }) as ParquetRow;
      }),
      metadata: query?.data?.metadata,
      isLoading: query?.isLoading && canQueryParquet(parquetQueries[index]?.url),
      updatedAt: query?.dataUpdatedAt,
      ...(parquetQueries[index] || {}),
    }));

    const parquetUrls = compact(uniq(map(parquetQueries, 'url')));

    return fromPairs(
      map(parquetUrls, (url) => {
        const resultsWithUrl = filter(parsedResults, (result) => result.url === url);
        return [url, keyBy(resultsWithUrl, 'columns')];
      })
    );
  }, [parquetQueries, JSON.stringify(map(parquetFileQueries, (query) => Boolean(query.dataUpdatedAt)))]);

  return { parquetFiles };
};
