import { flow, pipe } from "fp-ts/lib/function"
import * as E from "fp-ts/lib/Either"
import * as R from "fp-ts/lib/ReadonlyArray"
import * as O from "fp-ts/lib/Option"
import { cvMean, varMedian, pqMode, mean, median, mode, std } from "../stats"
import { CategoricalConstraint, Constraint, ContinuousConstraint } from "./types"
import { Reliability } from "./types"

type DistributivePick<T, K extends keyof T> = T extends unknown ? Pick<T, K> : never

type Variability = "high" | "medium" | "low"

type BaseData = {
  readonly reliability: Reliability[]
}

export type Numerical = BaseData & {
  readonly value: number
  readonly data: readonly number[]
  readonly type: "numerical"
  readonly stats?: {
    readonly mean: number
    readonly median: number
    readonly std: number
    readonly variability: Variability
  }
  readonly constraint: ContinuousConstraint
}

export type Nominal = BaseData & {
  readonly value: string
  readonly data: readonly string[]
  readonly type: "nominal"
  readonly stats?: {
    readonly mode: string[]
    readonly relFreq: number
    readonly variability: Variability
  }
  readonly constraint: CategoricalConstraint
}

export type Ordinal = BaseData & {
  readonly value: string
  readonly data: readonly string[]
  readonly type: "ordinal"
  readonly stats?: {
    readonly mean: number
    readonly median: number
    readonly std: number
    readonly variability: Variability
  }
  readonly constraint: CategoricalConstraint
}

/**
 * Parsed collection of data from attribute-service
 * where the data has been narrowed and tagged with the appropriate type.
 */
export type DataCollection = Numerical | Nominal | Ordinal

export type PartialDataCollection = DistributivePick<DataCollection, "data" | "reliability" | "type">

/**
 * Parse a collection of raw data into the appropriate type
 * @param raw raw data
 * @param constraint the constraint associated with the data
 * @return a structure containing the parsed `data` and its `type`
 */
const infer = (
  raw: readonly string[],
  rels: Reliability[],
  constraint: Constraint,
): E.Either<Error, PartialDataCollection> => {
  switch (constraint.tag) {
    case "continuous": {
      const dataAsNumerical = pipe(
        raw,
        R.map(
          // for each item in data
          flow(
            Number, // attempt to create a number
            O.fromPredicate((x) => !isNaN(x)), // turn numbers to Some(<number>) and NaN to None
          ),
        ),
        O.sequenceArray, // Produce an array only if there are no None/s
      )

      if (O.isSome(dataAsNumerical)) {
        return E.right({ data: dataAsNumerical.value, reliability: rels, type: "numerical" })
      }
      return E.left(new Error("Data of " + constraint.name + " has incorrect type (expected Numerical)"))
    }
    case "categorical": {
      const maybeOrdinal = pipe(
        constraint.categories,
        R.map(
          flow(
            parseInt,
            O.fromPredicate((item) => !isNaN(item)),
          ),
        ),
        O.sequenceArray,
      )

      if (O.isSome(maybeOrdinal)) {
        return E.right({ data: raw, reliability: rels, type: "ordinal" })
      }
      return E.right({ data: raw, reliability: rels, type: "nominal" })
    }
  }
}

/**
 * Returns variability of the data based on sample variance (CTM mean)
 * @param data numerical observations
 * @returns variability for the data
 */
const variabilityMean = (data: readonly number[]): Variability => {
  const dataCv = cvMean(data)

  if (dataCv <= 0.3) return "low"
  else if (0.3 < dataCv && dataCv <= 0.7) return "medium"
  else return "high"
}

/**
 * Returns variability of the data based on range of 25% and 75% quartile (CTM median)
 * @param data numerical observations
 * @returns variability for the data
 */
const variabilityMedian = (data: readonly number[]): Variability => {
  const dataCv = varMedian(data)

  if (dataCv <= 1) return "low"
  else if (1 < dataCv && dataCv <= 2) return "medium"
  else return "high"
}

/**
 * Returns variability of the data based on a binomial, where p=mode/n (CTM mode)
 * @param data numerical observations
 * @returns variability for the data
 */
const variabilityMode = (data: readonly string[]): Variability => {
  const dataCv = pqMode(data)

  if (dataCv <= 0.09) return "low"
  else if (0.09 < dataCv && dataCv <= 0.16) return "medium"
  else return "high"
}

/**
 * Smart constructor for Numerical DataCollections.
 * @param data numerical observations
 * @param constraint the associated ContinuousConstraint of the observations
 */
const mkNumerical = (data: readonly number[], rels: Reliability[], constraint: ContinuousConstraint): Numerical => {
  if (data.length === 1) {
    // A single data point
    return {
      value: data[0],
      type: "numerical",
      reliability: rels,
      data,
      constraint,
    }
  } else {
    // Multiple data points
    const dataMean = mean(data)
    const dataMedian = median(data)
    const dataStd = std(data)

    const [relMode] = mode(rels) ?? []

    let value
    let dataVariability
    switch (constraint.ctMeasure) {
      case "median":
        value = dataMedian
        dataVariability = variabilityMedian(data)
        break
      case "mean":
      default:
        value = dataMean
        dataVariability = variabilityMean(data)
        break
    }

    return {
      value: value,
      type: "numerical",
      reliability: relMode,
      stats: {
        mean: dataMean,
        median: dataMedian,
        std: dataStd,
        variability: dataVariability,
      },
      data,
      constraint,
    }
  }
}

const mkOrdinal = (
  data: readonly string[],
  rels: Reliability[],
  constraint: CategoricalConstraint,
): E.Either<Error, Ordinal> => {
  /**
   * Attempt to convert a category to its numerical representation
   * @param category
   * @returns
   */
  const toNumericalRepr = (category: string): O.Option<number> => {
    return pipe(
      // Given a category
      category,
      // Attempt to parse it as an integer
      parseInt,
      // Ensure it is valid number
      O.fromPredicate((categoryAsInt) => !isNaN(categoryAsInt)),
    )
  }

  /**
   * Attempt to convert a numerical representation to a category
   * @param repr
   * @returns
   */
  const fromNumericalRepr = (repr: number): O.Option<string> => {
    const reprAsStr = repr.toString()
    return pipe(
      constraint.categories,
      R.filter((category) => category.startsWith(reprAsStr)),
      R.head,
    )
  }

  const maybeData = pipe(
    // for each item
    data,
    R.map(
      flow(
        // ensure it shows up as an item in constraint.categories, else map it to None
        O.fromPredicate((item) => constraint.categories.includes(item)),
        // convert the item to its numerical representation
        O.map(toNumericalRepr),
        O.flatten,
      ),
    ),
    // only produce an array if all items exist in constraint.categories, else None
    O.sequenceArray,
  )

  if (O.isNone(maybeData)) {
    return E.left(new Error(`Data for ${constraint.name} contains invalid values. Please contact support.`))
  }

  const parsedData = maybeData.value

  if (parsedData.length === 1) {
    const value = fromNumericalRepr(parsedData[0])

    if (O.isNone(value)) {
      return E.left(new Error(`Could not map number to category within ${constraint.name}.`))
    }

    return E.right({
      value: value.value,
      type: "ordinal",
      data,
      reliability: rels,
      constraint,
    })
  } else {
    const dataMean = mean(parsedData)
    const dataMedian = median(parsedData)
    const dataStd = std(parsedData)
    const dataVariability = variabilityMedian(parsedData)

    const [relMode] = mode(rels)

    let value
    switch (constraint.ctMeasure) {
      case "mean":
        value = fromNumericalRepr(Math.round(dataMean))
        break
      case "median":
      default:
        value = fromNumericalRepr(dataMedian)
        break
    }

    if (O.isNone(value)) {
      return E.left(new Error(`Could not map number to category within ${constraint.name}.`))
    }

    return E.right({
      value: value.value,
      type: "ordinal",
      reliability: relMode,
      stats: {
        mean: dataMean,
        median: dataMedian,
        std: dataStd,
        variability: dataVariability,
      },
      data,
      constraint,
    })
  }
}

const mkNominal = (
  data: readonly string[],
  rels: Reliability[],
  constraint: CategoricalConstraint,
): E.Either<Error, Nominal> => {
  const maybeData = pipe(
    // for each item
    data,
    // ensure it shows up as an item in constraint.categories, else map it to None
    R.map(O.fromPredicate((item) => constraint.categories.includes(item))),
    // only produce an array if all items exist in constraint.categories, else None
    O.sequenceArray,
  )

  if (O.isNone(maybeData)) {
    return E.left(new Error(`Data for ${constraint.name} contains invalid values. Please contact support.`))
  }

  const parsedData = maybeData.value

  if (data.length === 1) {
    // A single data point
    return E.right({
      value: parsedData[0],
      type: "nominal",
      reliability: rels,
      data,
      constraint,
    })
  } else {
    // Multiple data points
    const [dataMode, dataFreq] = mode(data)
    const dataRelFreq = dataFreq / data.length
    const dataVariability = variabilityMode(data)

    const [relMode] = mode(rels)

    // Note that for multi-modal data, this chooses the left-most value.
    const value = dataMode[0]

    return E.right({
      value,
      type: "nominal",
      reliability: relMode,
      stats: {
        mode: dataMode,
        relFreq: dataRelFreq,
        variability: dataVariability,
      },
      data,
      constraint,
    })
  }
}

/**
 * Smart constructor. Creates a DataCollection with the appropriate type from
 * an array of string values, usually unvalidated input from attribute-service.
 * @param raw observations
 * @param constraint canonical values ?
 * @param reliabilities reliability of the data.
 * @returns
 */
export const mkDataCollection = (
  raw: readonly string[],
  reliabilities: Reliability[],
  constraint: Constraint,
): E.Either<Error, DataCollection> => {
  // Parse the raw data
  const inferredData = infer(raw, reliabilities, constraint)

  /**
   * Parse inferred data to specific (sub) constructors
   * @param data
   * @returns Either an error or a data collection
   */
  const parse = (data: PartialDataCollection): E.Either<Error, DataCollection> => {
    switch (data.type) {
      case "numerical":
        return E.right(mkNumerical(data.data, reliabilities, constraint as ContinuousConstraint))
      case "ordinal":
        return mkOrdinal(data.data, reliabilities, constraint as CategoricalConstraint)
      case "nominal":
        return mkNominal(data.data, reliabilities, constraint as CategoricalConstraint)
    }
  }

  return pipe(
    inferredData, // may throw an error (hence the use of Either)
    E.chain(parse),
  )
}

export const testables = {
  infer: infer,
  mkNumerical: mkNumerical,
  mkOrdinal: mkOrdinal,
  mkNominal: mkNominal,
  mkDataCollection: mkDataCollection,
}
