import { AtUriString } from '@atproto/syntax'

export const SITE_STANDARD_NSID_PREFIX = 'site.standard.'

/**
 * Composes a stable map key from an `(uri, cid)` pair. A single hydration
 * batch can pull more than one version of the same SS record URI (different
 * posts pinning different cids), so the composite is needed for O(1)
 * version-exact lookups.
 */
export const siteStandardRecordKey = (uri: string, cid: string) =>
  `${uri}@${cid}`

export const parseSiteStandardRecordKey = (
  key: string,
): { uri: AtUriString; cid: string } => {
  const at = key.lastIndexOf('@')
  return {
    uri: key.slice(0, at) as AtUriString,
    cid: key.slice(at + 1),
  }
}

/**
 * Parse `url` as HTTP(S) and reduce it to a canonical
 * `protocol://host/path` string for equality comparison: lowercases host,
 * strips a trailing slash from the path, and drops query/fragment. Returns
 * `null` when the input isn't a valid HTTP(S) URL.
 */
const canonicalizeHttpUrl = (url: string): string | null => {
  let parsed: URL
  try {
    parsed = new URL(url)
  } catch {
    return null
  }
  if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') return null
  const path = parsed.pathname === '/' ? '' : parsed.pathname.replace(/\/$/, '')
  return `${parsed.protocol}//${parsed.host}${path}`
}

/**
 * Append `path` to `base` with exactly one slash between, or return `base`
 * unchanged when `path` is empty. Unlike `new URL(path, base)`, a leading
 * slash on `path` does NOT swallow `base`'s pathname — so
 * `joinPath('https://x.com/blog', '/foo')` is `https://x.com/blog/foo`,
 * not `https://x.com/foo`.
 */
const joinPath = (base: string, path: string): string => {
  if (!path) return base
  const baseTrimmed = base.endsWith('/') ? base.slice(0, -1) : base
  const pathTrimmed = path.startsWith('/') ? path.slice(1) : path
  return `${baseTrimmed}/${pathTrimmed}`
}

/**
 * Confirm that the supplied SS records actually back `assumedUrl`. The
 * record-side URL is built by concatenating the publication URL (or the
 * loose-doc site) with the document's `path` field, then both sides are
 * canonicalized for equality: lowercase host, query/fragment dropped,
 * trailing slash stripped.
 *
 * Path concatenation is `base + '/' + path` semantics — a leading `/` on
 * `path` does NOT swallow the base's pathname (the way
 * `new URL(path, base)` would). So
 * `'https://atproto.com/blog' + '/indexing-standard-site'` resolves to
 * `https://atproto.com/blog/indexing-standard-site` regardless of which
 * side carries the slash.
 *
 * Structural validation of the doc/pub pair (matching `site` ↔ pub URI,
 * no orphan docs that claim a missing publication) happens upstream in
 * `getSiteStandardRecordsFromHydrationMapsByRefs` /
 * `…ByDocumentUri` (see `hydration/external.ts`); by the time this
 * function runs the pair is already known to be structurally consistent,
 * so we only check whether the records back the URL.
 *
 * Cases:
 * - Document + publication: `publication.url + document.path` must
 *   canonicalize to `assumedUrl`.
 * - Loose document (web-URL `site`): `document.site + document.path`
 *   must canonicalize to `assumedUrl`. (Doc with at-uri `site` but no
 *   publication can't reach this function — the lookups reject it.)
 * - Publication only: `publication.url` must canonicalize to
 *   `assumedUrl`.
 * - Neither: vacuously valid; the caller short-circuits before we get
 *   here.
 */
export const validateStandardSiteForUrl = (
  document:
    | {
        ref: { uri: string }
        info: { record: { site: string; path?: string } }
      }
    | undefined,
  publication:
    | { ref: { uri: string }; info: { record: { url: string } } }
    | undefined,
  assumedUrl: string,
): boolean => {
  const canonicalAssumed = canonicalizeHttpUrl(assumedUrl)
  if (canonicalAssumed === null) return false

  if (document && publication) {
    const joined = canonicalizeHttpUrl(
      joinPath(publication.info.record.url, document.info.record.path ?? ''),
    )
    return joined === canonicalAssumed
  }
  if (document) {
    const joined = canonicalizeHttpUrl(
      joinPath(document.info.record.site, document.info.record.path ?? ''),
    )
    return joined === canonicalAssumed
  }
  if (publication) {
    return canonicalizeHttpUrl(publication.info.record.url) === canonicalAssumed
  }
  return true
}

const WORDS_PER_MINUTE = 200

/**
 * Estimate reading time in minutes from a plaintext document body. Returns
 * `undefined` when the input has no countable words. Uses a coarse
 * `WORDS_PER_MINUTE` heuristic; swap in a more accurate library here if
 * needed (e.g. `reading-time`).
 */
export const estimateReadingTimeMinutes = (
  text: string,
): number | undefined => {
  const words = text.trim().split(/\s+/).filter(Boolean).length
  if (!words) return undefined
  return Math.max(1, Math.ceil(words / WORDS_PER_MINUTE))
}