/*
First we download the pricing html page using the fetch api from as text:

   https://cloud.google.com/compute/all-pricing,

and much more.
*/

import cheerio from "cheerio";
import { fetchGpuData, parseGpuData } from "./gpu-pricing";
import { getDisks } from "./disk-pricing";
import { getZones, ZoneData } from "./zones";

async function fetchPricingData() {
  const gcloudUrl = "https://cloud.google.com/compute/all-pricing";
  const response = await fetch(gcloudUrl, {
    headers: {
      "Accept-Language": "en-US",
    },
  });
  const body = await response.text();
  return body;
}

export async function parsePricingData() {
  const body = await fetchPricingData();
  // Use cheerio to load the HTML
  const $ = cheerio.load(body);
  const tables: any[] = [];

  for (const x of $("cloudx-pricing-table")) {
    const data = $(x);
    const tableLayout = data.attr("layout");
    if (!tableLayout) {
      continue;
    }
    const json = tableLayout
      .replace(/True/g, "true")
      .replace(/False/g, "false")
      .replace(/'/g, '"');
    const layout = JSON.parse(json);
    if (layout?.rows) {
      const type = data.prev().attr("id") ?? "";
      if (
        type.includes("image") ||
        type.includes("disk") ||
        type.includes("localssd")
      ) {
        // These are templated in some mysterious way so the data isn't available
        // to parse out of the raw html, so we just delete them.
        continue;
      }
      if (tables[type] != null) {
        console.log("warning -- overwriting", type);
      }
      tables.push(layout.rows);
    }
  }

  // The gpu data is stored in a separate iframe in a different format.
  // This gets fixed later.
  const gpuUrl = $("iframe").attr("src");
  let gpus;
  if (!gpuUrl) {
    console.warn("GPU data is missing");
    gpus = {};
  } else {
    gpus = await parseGpuData(await fetchGpuData(gpuUrl));
  }

  const disks = await getDisks();
  const zones = await getZones();

  return {
    tables,
    gpus,
    disks,
    zones,
  };
}

const PREFIX = [
  "us",
  "europe",
  "asia",
  "northamerica",
  "southamerica",
  "australia",
  "me",
  "africa",
];

function toRegion(key: string): string {
  if (key.includes("-")) return key;
  for (const prefix of PREFIX) {
    if (key.startsWith(prefix)) {
      return `${prefix}-${key.slice(prefix.length)}`;
    }
  }
  throw Error(`unknown region: "${key}"`);
}

function formatCostMap(costMap?: { [region: string]: string }):
  | {
      [region: string]: number;
    }
  | undefined {
  if (costMap == null) {
    return costMap;
  }
  const result = {};
  for (const key in costMap) {
    result[toRegion(key)] = parseFloat(costMap[key]);
  }
  return result;
}

export interface PriceData {
  prices?: { [region: string]: number };
  spot?: { [region: string]: number };
  vcpu?: number;
  memory?: number;
  count?: number; // for gpu's only
  max?: number; // for gpu's only
}

function toInteger(s?: string): number | undefined {
  if (s == null) return s;
  return parseInt(s.split(" ")[0]);
}

export function machineTypeToPriceData({ tables, gpus, disks, zones }): {
  machineTypes: { [machineType: string]: PriceData };
  disks: {
    "pd-standard": { prices: { [zone: string]: number } };
    "pd-ssd": { prices: { [zone: string]: number } };
    "pd-balanced": { prices: { [zone: string]: number } };
  };
  accelerators: { [acceleratorType: string]: PriceData };
  zones: { [zone: string]: ZoneData };
} {
  const machineTypes: { [name: string]: PriceData } = {};

  for (const rows of tables) {
    let foundOnDemand = false;
    const headings = rows[0].cells.map((heading) => {
      if (!foundOnDemand && heading.toLowerCase().includes("price")) {
        // For some reason there are a dozen choices for the column headings for non-spot instances.
        // It's always the first one that contains "price", I think.
        foundOnDemand = true;
        return "on-demand";
      }
      return heading.split(" ")[0].toLowerCase().split("(")[0];
    });
    if (headings[0] != "machine") {
      // this is part of table given how to make a custom machine type
      continue;
    }
    for (let i = 1; i < rows.length; i++) {
      const { cells } = rows[i];
      if (cells[0].includes("custom-machine-type")) {
        continue;
      }
      const row: any = {};
      for (let j = 0; j < headings.length; j++) {
        row[headings[j]] = cells[j];
      }
      const machineType = row.machine.split(" ")[0];
      let vcpu = toInteger(row.virtual ?? row.vcpu ?? row.vcpus ?? row.cores);

      // There's a special case with pricing for the shared cpu cases, which are
      // e2-micro, e2-small, e2-medium, where the vcpu that is the input for
      // computing the spot price is 0.25, 0.5, 1, respectively.
      if (machineType == "e2-micro") {
        vcpu = 0.25;
      } else if (machineType == "e2-small") {
        vcpu = 0.5;
      } else if (machineType == "e2-medium") {
        vcpu = 1;
      }

      // VERY important to get the actual amount of memory, and not just round to an integer
      // since we will use this later when recomputing all the prices based on the SKU data!
      const memory = parseFloat(row["memory"]);
      const prices = formatCostMap(
        (row.price ?? row["on-demand"])?.priceByRegion,
      );
      machineTypes[machineType] = {
        prices,
        spot: {}, // gets filled in based on data/pricing.csv; assume initially no discount
        vcpu,
        memory,
      };
    }
  }
  const accelerators: { [acceleratorType: string]: PriceData } = {};
  for (const name in gpus) {
    const d = gpus[name];
    accelerators[toApiAcceleratorType(name)] = {
      ...d,
      prices: formatCostMap(d.prices),
      spot: formatCostMap(d.spot),
    };
  }

  for (const name in disks) {
    // makes the format consistent with the PriceData interface,
    // and also makes it easy to add more data about each disk
    // later if we need to (e.g., about speed?)
    disks[name] = { prices: disks[name] };
  }

  return { machineTypes, accelerators, disks, zones };
}

// In scraping data we use the names in the data sources.
// However, we want to instead use exactly the same names
// as in the GCP API. The possibilities for name here are
//   'NVIDIA T4', 'NVIDIA P4', 'NVIDIA V100', 'NVIDIA P100', 'NVIDIA K80',
function toApiAcceleratorType(name: string): string {
  const family = name.split(" ")[1].toLowerCase();
  return `nvidia-tesla-${family}`;
}