//$ nobt
//$ nocpp

/**
 * @file CDSPFracInterpolator.h
 *
 * @brief Fractional delay interpolator and filter bank classes.
 *
 * This file includes fractional delay interpolator class.
 *
 * r8brain-free-src Copyright (c) 2013-2025 Aleksey Vaneev
 *
 * See the "LICENSE" file for license.
 */

#ifndef R8B_CDSPFRACINTERPOLATOR_INCLUDED
#define R8B_CDSPFRACINTERPOLATOR_INCLUDED

#include "CDSPProcessor.h"
#include "CDSPSincFilterGen.h"

namespace r8b {

#if R8B_FLTTEST
extern int InterpFilterFracs; ///< Force this number of fractional filter
///< positions. -1 - use default.
#endif // R8B_FLTTEST

/**
 * @brief Sinc function-based fractional delay filter bank class.
 *
 * Class implements storage and initialization of a bank of sinc-based
 * fractional delay filters, expressed as 0th, 1st, 2nd or 3rd order
 * polynomial interpolation coefficients. The filters are windowed by the
 * "Kaiser" power-raised window function.
 */

class CDSPFracDelayFilterBank : public R8B_BASECLASS {
  R8BNOCTOR(CDSPFracDelayFilterBank)

  friend class CDSPFracDelayFilterBankCache;

 public:
  /**
	 * @brief Initializes the filter bank object.
	 *
	 * @param aFilterFracs The number of fractional delay positions to sample,
	 * -1 - use default.
	 * @param aElementSize The size of each filter's tap, in "double" values.
	 * This parameter corresponds to the complexity of interpolation. 4 should
	 * be set for 3rd order, 3 for 2nd order, 2 for linear interpolation, 1
	 * for whole-numbered stepping.
	 * @param aInterpPoints The number of points the interpolation is based
	 * on. This value should not be confused with the `ElementSize`. Set to 2
	 * for linear or no interpolation.
	 * @param aReqAtten Required filter attentuation.
	 * @param aIsThird `true` if one-third filter is required.
	 */

  CDSPFracDelayFilterBank(
      const int aFilterFracs,
      const int aElementSize,
      const int aInterpPoints,
      const double aReqAtten,
      const bool aIsThird)
      : InitFilterFracs(aFilterFracs),
        ElementSize(aElementSize),
        InterpPoints(aInterpPoints),
        ReqAtten(aReqAtten),
        IsThird(aIsThird),
        Next(R8B_NULL),
        RefCount(1) {
    R8BASSERT(ElementSize >= 1 && ElementSize <= 4);

    // Kaiser window function Params, for half and third-band.

    const double *const Params = getWinParams(ReqAtten, IsThird, FilterLen);

    FilterSize = FilterLen * ElementSize;

    if (InitFilterFracs == -1) {
      FilterFracs = (int)ceil(pow(6.4, ReqAtten / 50.0));

#if R8B_FLTTEST

      if (InterpFilterFracs != -1) {
        FilterFracs = InterpFilterFracs;
      }

#endif // R8B_FLTTEST
    } else {
      FilterFracs = InitFilterFracs;
    }

    Table.alloc(FilterSize * (FilterFracs + InterpPoints));

    CDSPSincFilterGen sinc;
    sinc.Len2 = FilterLen / 2;

    double *p = Table;
    const int pc2 = InterpPoints / 2;
    int i;

    for (i = -pc2 + 1; i <= FilterFracs + pc2; i++) {
      sinc.FracDelay = (double)(FilterFracs - i) / FilterFracs;
      sinc.initFrac(CDSPSincFilterGen ::wftKaiser, Params, true);
      sinc.generateFrac(p, &CDSPSincFilterGen ::calcWindowKaiser, ElementSize);

      normalizeFIRFilter(p, FilterLen, 1.0, ElementSize);
      p += FilterSize;
    }

    const int TablePos2 = FilterSize;
    const int TablePos3 = FilterSize * 2;
    const int TablePos4 = FilterSize * 3;
    const int TablePos5 = FilterSize * 4;
    const int TablePos6 = FilterSize * 5;
    const int TablePos7 = FilterSize * 6;
    const int TablePos8 = FilterSize * 7;
    double *const TableEnd = Table + (FilterFracs + 1) * FilterSize;
    p = Table;

    if (InterpPoints == 8) {
      if (ElementSize == 3) {
        // Calculate 2nd order spline (polynomial) interpolation
        // coefficients using 8 points.

        while (p < TableEnd) {
          calcSpline2p8Coeffs(
              p,
              p[0],
              p[TablePos2],
              p[TablePos3],
              p[TablePos4],
              p[TablePos5],
              p[TablePos6],
              p[TablePos7],
              p[TablePos8]);

          p += ElementSize;
        }

#if defined(R8B_SIMD_ISH)
        shuffle2_3(Table, TableEnd);
#endif // SIMD
      } else if (ElementSize == 4) {
        // Calculate 3rd order spline (polynomial) interpolation
        // coefficients using 8 points.

        while (p < TableEnd) {
          calcSpline3p8Coeffs(
              p,
              p[0],
              p[TablePos2],
              p[TablePos3],
              p[TablePos4],
              p[TablePos5],
              p[TablePos6],
              p[TablePos7],
              p[TablePos8]);

          p += ElementSize;
        }

#if defined(R8B_SIMD_ISH)
        shuffle2_4(Table, TableEnd);
#endif // SIMD
      }
    } else {
      if (ElementSize == 2) {
        // Calculate linear interpolation coefficients.

        while (p < TableEnd) {
          p[1] = p[TablePos2] - p[0];
          p += ElementSize;
        }

#if defined(R8B_SIMD_ISH)
        shuffle2_2(Table, TableEnd);
#endif // SIMD
      }
    }

    R8BCONSOLE(
        "CDSPFracDelayFilterBank: fracs=%i order=%i taps=%i "
        "att=%.1f third=%i\n",
        FilterFracs,
        ElementSize - 1,
        FilterLen,
        ReqAtten,
        (int)IsThird);
  }

  ~CDSPFracDelayFilterBank() {
    delete Next;
  }

  /**
	 * @brief Rounds the specified attenuation to the nearest effective value.
	 *
	 * @param[in,out] att Required filter attentuation. Will be rounded to the
	 * nearest value.
	 * @param aIsThird `true` if one-third filter is required.
	 */

  static void roundReqAtten(double &att, const bool aIsThird) {
    int tmp;
    getWinParams(att, aIsThird, tmp);
  }

  /**
	 * @brief Returns the length of the filter, in samples (taps). Always
	 * an even number, not less than 6.
	 */

  int getFilterLen() const {
    return (FilterLen);
  }

  /**
	 * @brief Returns the number of fractional positions sampled by the bank.
	 */

  int getFilterFracs() const {
    return (FilterFracs);
  }

  /**
	 * @brief Returns reference to the filter.
	 *
	 * @param i Filter index, in the range 0 to `FilterFracs`, inclusive.
	 */

  const double &operator[](const int i) const {
    R8BASSERT(i >= 0 && i <= FilterFracs);

    return (Table[i * FilterSize]);
  }

  /**
	 * @brief Reduces reference count to *this* object.
	 *
	 * This function should be called when the filter bank obtained via the
	 * filter bank cache is no longer needed.
	 */

  void unref();

 private:
  int FilterLen;                 ///< Filter length. Always an even number, not less than 6.
  int FilterFracs;               ///< Fractional position count.
  int InitFilterFracs;           ///< Fractional position count as supplied to the
                                 ///< constructor, may equal -1.
  int ElementSize;               ///< Filter element size.
  int InterpPoints;              ///< Interpolation points to use.
  double ReqAtten;               ///< Filter's attentuation.
  bool IsThird;                  ///< `true` if one-third filter is in use.
  int FilterSize;                ///< This constant specifies the "size" of a single filter
                                 ///< in "double" elements.
  CFixedBuffer<double> Table;    ///< The table of fractional delay filters
                                 ///< for all discrete fractional x = 0..1 sample positions, and
                                 ///< interpolation coefficients.
  CDSPFracDelayFilterBank *Next; ///< Next filter bank in cache's list.
  int RefCount;                  ///< The number of references made to *this* filter bank.
                                 ///< Not considered for "static" filter bank objects.

  /**
	 * @brief Returns windowing function parameters for the specified
	 * attenuation and filter type.
	 *
	 * @param[in,out] att Required filter attentuation. Will be rounded to the
	 * nearest value.
	 * @param aIsThird `true` if one-third filter is required.
	 * @param[out] fltlen Resulting filter length.
	 */

  static const double *getWinParams(double &att, const bool aIsThird, int &fltlen) {
    static const int Coeffs2Base = 8;
    static const int Coeffs2Count = 12;
    static const double Coeffs2[Coeffs2Count][3] = {
        {4.1308468534586913, 1.1752580009977263, 55.5446},   // 0.0256
        {4.4241520324148826, 1.8004881791443044, 81.4191},   // 0.0886
        {5.2615232289173663, 1.8133318236025469, 96.3392},   // 0.0481
        {5.9433751227216174, 1.8730186391986436, 111.1315},  // 0.0264
        {6.8308658290513815, 1.8549555110340281, 125.4653},  // 0.0146
        {7.6648458290312904, 1.8565766090828464, 139.7379},  // 0.0081
        {8.2038728664307605, 1.9269521820570166, 154.0532},  // 0.0045
        {8.7865150946655142, 1.9775307667441668, 168.2101},  // 0.0025
        {9.5945017884101773, 1.9718456992078597, 182.1076},  // 0.0014
        {10.5163141145985240, 1.9504067820201083, 195.5668}, // 0.0008
        {10.2382465206362470, 2.1608923446870087, 209.0610}, // 0.0004
        {10.9976060250714000, 2.1536533525688935, 222.5010}, // 0.0003
    };

    static const int Coeffs3Base = 6;
    static const int Coeffs3Count = 10;
    static const double Coeffs3[Coeffs3Count][3] = {
        {3.9888564562781847, 1.5869927184268915, 66.5701},   // 0.0467
        {4.6986694038145007, 1.8086068597928262, 86.4715},   // 0.0136
        {5.5995071329337822, 1.8930163360942349, 106.1195},  // 0.0040
        {6.3627287800257228, 1.9945748322093975, 125.2307},  // 0.0012
        {7.4299550711428308, 1.9893400572347544, 144.3469},  // 0.0004
        {8.0667715944075642, 2.0928201458699909, 163.4099},  // 0.0001
        {8.7469970226288822, 2.1640279784268355, 181.0694},  // 0.0000
        {10.0823430069835230, 2.0896678025321922, 199.2880}, // 0.0000
        {10.9222206090489510, 2.1221681162186004, 216.6865}, // 0.0000
        {21.2017743894772010, 1.1856768080118900, 233.9188}, // 0.0000
    };

    const double *Params;
    int i = 0;

    if (aIsThird) {
      while (i != Coeffs3Count - 1 && Coeffs3[i][2] < att) {
        i++;
      }

      Params = &Coeffs3[i][0];
      att = Coeffs3[i][2];
      fltlen = Coeffs3Base + i * 2;
    } else {
      while (i != Coeffs2Count - 1 && Coeffs2[i][2] < att) {
        i++;
      }

      Params = &Coeffs2[i][0];
      att = Coeffs2[i][2];
      fltlen = Coeffs2Base + i * 2;
    }

    return (Params);
  }

  /**
	 * @brief Shuffles 2 order-2 filter points for SIMD operation.
	 *
	 * @param p Filter table start pointer.
	 * @param pe Filter table end pointer.
	 */

  static void shuffle2_2(double *p, double *const pe) {
    while (p != pe) {
      const double t = p[2];
      p[2] = p[1];
      p[1] = t;

      p += 4;
    }
  }

  /**
	 * @brief Shuffles 2 order-3 filter points for SIMD operation.
	 *
	 * @param p Filter table start pointer.
	 * @param pe Filter table end pointer.
	 */

  static void shuffle2_3(double *p, double *const pe) {
    while (p != pe) {
      const double t1 = p[1];
      const double t2 = p[2];
      const double t3 = p[3];
      const double t4 = p[4];
      p[1] = t3;
      p[2] = t1;
      p[3] = t4;
      p[4] = t2;

      p += 6;
    }
  }

  /**
	 * @brief Shuffles 2 order-4 filter points for SIMD operation.
	 *
	 * @param p Filter table start pointer.
	 * @param pe Filter table end pointer.
	 */

  static void shuffle2_4(double *p, double *const pe) {
    while (p != pe) {
      const double t1 = p[1];
      const double t2 = p[2];
      const double t3 = p[3];
      const double t4 = p[4];
      const double t5 = p[5];
      const double t6 = p[6];
      p[1] = t4;
      p[2] = t1;
      p[3] = t5;
      p[4] = t2;
      p[5] = t6;
      p[6] = t3;

      p += 8;
    }
  }
};

/**
 * @brief Fractional delay filter cache class.
 *
 * Class implements cache storage of fractional delay filter banks.
 */

class CDSPFracDelayFilterBankCache : public R8B_BASECLASS {
  R8BNOCTOR(CDSPFracDelayFilterBankCache)

  friend class CDSPFracDelayFilterBank;

 public:
  /**
	 * @brief Calculates or returns reference to a previously calculated
	 * (cached) fractional delay filter bank.
	 *
	 * @param aFilterFracs The number of fractional delay positions to sample,
	 * -1 - use default.
	 * @param aElementSize The size of each filter's tap, in "double" values.
	 * @param aInterpPoints The number of points the interpolation is based
	 * on.
	 * @param ReqAtten Required filter attentuation.
	 * @param IsThird `true` if one-third filter is required.
	 * @param IsStatic `true` if a permanent static filter should be returned
	 * that is never removed from the cache until application terminates.
	 * @return Reference to a filter bank.
	 */

  static CDSPFracDelayFilterBank &getFilterBank(
      const int aFilterFracs,
      const int aElementSize,
      const int aInterpPoints,
      double ReqAtten,
      const bool IsThird,
      const bool IsStatic) {
    R8B_EXITDTOR static CPtrKeeper<CDSPFracDelayFilterBank> Objects;
    // The chain of cached objects.
    R8B_EXITDTOR static CPtrKeeper<CDSPFracDelayFilterBank> StaticObjects;
    // The chain of static objects.
    R8B_EXITDTOR static int ObjCount = 0; // The number of objects
                                          // currently present in the Objects cache.

    CDSPFracDelayFilterBank ::roundReqAtten(ReqAtten, IsThird);

    R8BSYNC(getStateSync());

    if (IsStatic) {
      CDSPFracDelayFilterBank *PrevObj = R8B_NULL;
      CDSPFracDelayFilterBank *CurObj = StaticObjects;

      while (CurObj != R8B_NULL) {
        if (CurObj->InitFilterFracs == aFilterFracs && CurObj->IsThird == IsThird &&
            CurObj->ElementSize == aElementSize && CurObj->InterpPoints == aInterpPoints &&
            CurObj->ReqAtten == ReqAtten) {
          if (PrevObj != R8B_NULL) {
            // Move the object to the top of the list.

            PrevObj->Next = CurObj->Next;
            CurObj->Next = StaticObjects.unkeep();
            StaticObjects = CurObj;
          }

          return (*CurObj);
        }

        PrevObj = CurObj;
        CurObj = CurObj->Next;
      }

      // Create a new filter bank and build it.

      CurObj =
          new CDSPFracDelayFilterBank(aFilterFracs, aElementSize, aInterpPoints, ReqAtten, IsThird);

      // Insert the bank at the start of the list.

      CurObj->Next = StaticObjects.unkeep();
      StaticObjects = CurObj;

      return (*CurObj);
    }

    CDSPFracDelayFilterBank *PrevObj = R8B_NULL;
    CDSPFracDelayFilterBank *CurObj = Objects;

    while (CurObj != R8B_NULL) {
      if (CurObj->InitFilterFracs == aFilterFracs && CurObj->IsThird == IsThird &&
          CurObj->ElementSize == aElementSize && CurObj->InterpPoints == aInterpPoints &&
          CurObj->ReqAtten == ReqAtten) {
        break;
      }

      if (CurObj->Next == R8B_NULL && ObjCount >= R8B_FRACBANK_CACHE_MAX) {
        if (CurObj->RefCount == 0) {
          // Delete the last bank which is not used.

          PrevObj->Next = R8B_NULL;
          delete CurObj;
          ObjCount--;
        } else {
          // Move the last bank to the top of the list since it
          // seems to be in use for a long time.

          PrevObj->Next = R8B_NULL;
          CurObj->Next = Objects.unkeep();
          Objects = CurObj;
        }

        CurObj = R8B_NULL;
        break;
      }

      PrevObj = CurObj;
      CurObj = CurObj->Next;
    }

    if (CurObj != R8B_NULL) {
      CurObj->RefCount++;

      if (PrevObj == R8B_NULL) {
        return (*CurObj);
      }

      // Remove the bank from the list temporarily.

      PrevObj->Next = CurObj->Next;
    } else {
      // Create a new filter bank (with RefCount == 1) and build it.

      CurObj =
          new CDSPFracDelayFilterBank(aFilterFracs, aElementSize, aInterpPoints, ReqAtten, IsThird);

      ObjCount++;
    }

    // Insert the bank at the start of the list.

    CurObj->Next = Objects.unkeep();
    Objects = CurObj;

    return (*CurObj);
  }

 protected:
  /**
	 * @brief Returns reference to global filter bank cache sync object.
	 */

  static CSyncObject &getStateSync() {
    R8B_EXITDTOR static CSyncObject StateSync;

    return (StateSync);
  }
};

// ---------------------------------------------------------------------------
// CDSPFracDelayFilterBank PUBLIC
// ---------------------------------------------------------------------------

inline void CDSPFracDelayFilterBank ::unref() {
  R8BSYNC(CDSPFracDelayFilterBankCache ::getStateSync());

  RefCount--;
}

/**
 * @brief Interatively searches for a greatest common denominator (GCD) of 2
 * numbers.
 *
 * @param l Number 1.
 * @param s Number 2.
 * @param[out] GCD Resulting GCD.
 * @return `true` if the greatest common denominator of 2 numbers was found.
 */

inline bool findGCD(double l, double s, double &GCD) {
  int it = 0;

  while (++it < 150) {
    const double r = l - s;

    if (r == 0.0) {
      GCD = s;
      return (s > 0.0);
    }

    l = s;
    s = fabs(r);
  }

  return (false);
}

/**
 * @brief Evaluates source and destination sample rate ratio and returns
 * the required input and output stepping.
 *
 * Function returns `false` if whole stepping cannot be used to perform
 * interpolation using these sample rates.
 *
 * @param SSampleRate Source sample rate.
 * @param DSampleRate Destination sample rate.
 * @param[out] ResInStep Resulting input step.
 * @param[out] ResOutStep Resulting output step.
 * @return `true` if stepping was acquired.
 */

inline bool getWholeStepping(
    const double SSampleRate,
    const double DSampleRate,
    int &ResInStep,
    int &ResOutStep) {
  double GCD;

  if (!findGCD(SSampleRate, DSampleRate, GCD)) {
    return (false);
  }

  const double InStep0 = SSampleRate / GCD;
  ResInStep = (int)InStep0;
  const double OutStep0 = DSampleRate / GCD;
  ResOutStep = (int)OutStep0;

  if (InStep0 != ResInStep || OutStep0 != ResOutStep) {
    return (false);
  }

  if (ResOutStep > 1500) {
    // Do not allow large output stepping due to low cache
    // performance of large filter banks.

    return (false);
  }

  return (true);
}

/**
 * @brief Fractional delay filter bank-based interpolator class.
 *
 * Class implements the fractional delay interpolator. This implementation at
 * first puts the input signal into a ring buffer and then performs
 * interpolation. The interpolation is performed using sinc-based fractional
 * delay filters. These filters are contained in a bank, and for higher
 * precision they are interpolated between adjacent filters.
 *
 * To increase the sample-timing precision, this class uses "resettable
 * counter" approach. This gives zero overall sample-timing error. With the
 * R8B_FASTTIMING configuration option enabled, the sample timing experiences
 * a very minor drift.
 */

class CDSPFracInterpolator : public CDSPProcessor {
 public:
  /**
	 * @brief Initalizes the interpolator. It is important to call the
	 * getMaxOutLen() function afterwards to obtain the optimal output buffer
	 * length.
	 *
	 * @param aSrcSampleRate Source sample rate.
	 * @param aDstSampleRate Destination sample rate.
	 * @param ReqAtten Required filter attentuation.
	 * @param IsThird `true` if one-third filter is required.
	 * @param PrevLatency Latency, in samples (any non-negative value), which
	 * was left in the output signal by a previous process. This latency will
	 * be consumed completely.
	 */

  CDSPFracInterpolator(
      const double aSrcSampleRate,
      const double aDstSampleRate,
      const double ReqAtten,
      const bool IsThird,
      const double PrevLatency)
      : SrcSampleRate(aSrcSampleRate),
        DstSampleRate(aDstSampleRate)
#if R8B_FASTTIMING
        ,
        FracStep(aSrcSampleRate / aDstSampleRate)
#endif // R8B_FASTTIMING
  {
    R8BASSERT(SrcSampleRate > 0.0);
    R8BASSERT(DstSampleRate > 0.0);
    R8BASSERT(PrevLatency >= 0.0);
    R8BASSERT(BufLenBits >= 5);

    InitFracPos = PrevLatency;
    Latency = (int)InitFracPos;
    InitFracPos -= Latency;

    R8BASSERT(Latency >= 0);

#if R8B_FLTTEST

    IsWhole = false;
    LatencyFrac = 0.0;
    FilterBank = new CDSPFracDelayFilterBank(-1, 3, 8, ReqAtten, IsThird);

#else // R8B_FLTTEST

    IsWhole = getWholeStepping(SrcSampleRate, DstSampleRate, InStep, OutStep);

    if (IsWhole) {
      const double spos = InitFracPos * OutStep;
      InitFracPosW = (int)spos;
      LatencyFrac = (spos - InitFracPosW) / InStep;

      FilterBank =
          &CDSPFracDelayFilterBankCache ::getFilterBank(OutStep, 1, 2, ReqAtten, IsThird, false);
    } else {
      LatencyFrac = 0.0;
      FilterBank = &CDSPFracDelayFilterBankCache ::getFilterBank(-1, 3, 8, ReqAtten, IsThird, true);
    }

#endif // R8B_FLTTEST

    FilterLen = FilterBank->getFilterLen();
    fl2 = FilterLen >> 1;
    fll = fl2 - 1;
    flo = fll + fl2;
    flb = BufLen - fll;

    R8BASSERT((1 << BufLenBits) >= FilterLen * 3);

    static const CConvolveFn FltConvFn0[13] = {
        &CDSPFracInterpolator ::convolve0<6>,
        &CDSPFracInterpolator ::convolve0<8>,
        &CDSPFracInterpolator ::convolve0<10>,
        &CDSPFracInterpolator ::convolve0<12>,
        &CDSPFracInterpolator ::convolve0<14>,
        &CDSPFracInterpolator ::convolve0<16>,
        &CDSPFracInterpolator ::convolve0<18>,
        &CDSPFracInterpolator ::convolve0<20>,
        &CDSPFracInterpolator ::convolve0<22>,
        &CDSPFracInterpolator ::convolve0<24>,
        &CDSPFracInterpolator ::convolve0<26>,
        &CDSPFracInterpolator ::convolve0<28>,
        &CDSPFracInterpolator ::convolve0<30>};

    convfn = (IsWhole ? FltConvFn0[fl2 - 3] : &CDSPFracInterpolator ::convolve2);

    R8BCONSOLE(
        "CDSPFracInterpolator: src=%.2f dst=%.2f taps=%i "
        "fracs=%i whole=%i third=%i step=%.6f\n",
        SrcSampleRate,
        DstSampleRate,
        FilterLen,
        (IsWhole ? OutStep : FilterBank->getFilterFracs()),
        (int)IsWhole,
        (int)IsThird,
        aSrcSampleRate / aDstSampleRate);

    clear();
  }

  virtual ~CDSPFracInterpolator() {
#if R8B_FLTTEST
    delete FilterBank;
#else  // R8B_FLTTEST
    FilterBank->unref();
#endif // R8B_FLTTEST
  }

  virtual int getInLenBeforeOutPos(const int ReqOutPos) const {
    const int ilat = fl2 + Latency;

    if (IsWhole) {
      return (
          ilat +
          (int)((InitFracPosW + (double)ReqOutPos * InStep) / OutStep +
                LatencyFrac * InStep / OutStep));
    }

    return (ilat + (int)(InitFracPos + ReqOutPos * SrcSampleRate / DstSampleRate));
  }

  virtual int getLatency() const {
    return (0);
  }

  virtual double getLatencyFrac() const {
    return (LatencyFrac);
  }

  virtual int getMaxOutLen(const int MaxInLen) const {
    R8BASSERT(MaxInLen >= 0);

    return ((int)ceil(MaxInLen * DstSampleRate / SrcSampleRate) + 1);
  }

  virtual void clear() {
    LatencyLeft = Latency;
    BufLeft = 0;
    WritePos = 0;
    ReadPos = flb; // Set "read" position to account for filter's
                   // latency at zero fractional delay.

    memset(&Buf[ReadPos], 0, (size_t)(BufLen - flb) * sizeof(Buf[0]));

    if (IsWhole) {
      InPosFracW = InitFracPosW;
    } else {
      InPosFrac = InitFracPos;

#if !R8B_FASTTIMING
      InCounter = 0;
      InPosInt = 0;
      InPosShift = InitFracPos * DstSampleRate / SrcSampleRate;
#endif // !R8B_FASTTIMING
    }
  }

  virtual int process(double *ip, int l, double *&op0) {
    R8BASSERT(l >= 0);
    R8BASSERT(ip != op0 || l == 0 || SrcSampleRate > DstSampleRate);

    if (LatencyLeft != 0) {
      if (LatencyLeft >= l) {
        LatencyLeft -= l;
        return (0);
      }

      l -= LatencyLeft;
      ip += LatencyLeft;
      LatencyLeft = 0;
    }

    double *op = op0;

    while (l > 0) {
      // Copy new input samples to the ring buffer.

      const int b = min(l, min(BufLen - WritePos, flb - BufLeft));

      double *const wp1 = Buf + WritePos;
      memcpy(wp1, ip, (size_t)b * sizeof(wp1[0]));
      const int ec = flo - WritePos;

      if (ec > 0) {
        memcpy(wp1 + BufLen, ip, (size_t)min(b, ec) * sizeof(wp1[0]));
      }

      ip += b;
      WritePos = (WritePos + b) & BufLenMask;
      l -= b;
      BufLeft += b;

      // Produce as many output samples as possible.

      op = (*this.*convfn)(op);
    }

#if !R8B_FASTTIMING

    if (!IsWhole && InCounter > 1000) {
      // Reset the interpolation position counter to achieve a higher
      // sample-timing precision.

      InCounter = 0;
      InPosInt = 0;
      InPosShift = InPosFrac * DstSampleRate / SrcSampleRate;
    }

#endif // !R8B_FASTTIMING

    return ((int)(op - op0));
  }

 private:
  static const int BufLenBits = 8; ///< The length of the ring buffer,
  ///< expressed as Nth power of 2. This value can be reduced if it is
  ///< known that only short input buffers will be passed to the
  ///< interpolator. The minimum value of this parameter is 5, and
  ///< `1 << BufLenBits` should be at least 3 times larger than the
  ///< FilterLen. However, this condition can be easily met if the input
  ///< signal is suitably downsampled first before the interpolation is
  ///< performed.
  static const int BufLen = 1 << BufLenBits; ///< The length of the ring
  ///< buffer. The actual length is longer, to permit "beyond bounds"
  ///< positioning.
  static const int BufLenMask = BufLen - 1; ///< Mask used for quick buffer
                                            ///< position wrapping.
  double Buf[BufLen + 29];                  ///< The ring buffer, including overrun
                                            ///< protection for maximal filter length.
  double SrcSampleRate;                     ///< Source sample rate.
  double DstSampleRate;                     ///< Destination sample rate.
  double InitFracPos;                       ///< Initial fractional position, in samples, in the
                                            ///< range [0; 1).
  int InitFracPosW;                         ///< Initial fractional position for whole-number
                                            ///< stepping.
  int Latency;        ///< Initial latency that should be removed from the input.
  double LatencyFrac; ///< Left-over fractional latency on output (always
                      ///< zero for non-whole stepping).
  int FilterLen;      ///< Filter length, in taps. Even value.
  int fll;            ///< Input latency (left-hand filter length).
  int fl2;            ///< Right-side (half) filter length.
  int flo;            ///< Overrun length.
  int flb;            ///< Initial buffer read position.
  int InStep;         ///< Input whole-number stepping.
  int OutStep;        ///< Output whole-number stepping (corresponds to filter bank
                      ///< size).
  int LatencyLeft;    ///< Input latency left to remove.
  int BufLeft;        ///< The number of samples left in the buffer to process.
  int WritePos;       ///< The current buffer write position. Incremented together
                      ///< with the `BufLeft` variable.
  int ReadPos;        ///< The current buffer read position.
  int InPosFracW;     ///< Interpolation position (fractional part) for
                      ///< whole-number stepping. Corresponds to the index into the filter
                      ///< bank.
  double InPosFrac;   ///< Interpolation position (fractional part).

#if R8B_FASTTIMING
  double FracStep; ///< Fractional sample-timing step.
#else              // R8B_FASTTIMING
  int InCounter;     ///< Interpolation step counter.
  int InPosInt;      ///< Interpolation position (integer part).
  double InPosShift; ///< Interpolation position fractional shift.
#endif             // R8B_FASTTIMING

  CDSPFracDelayFilterBank *FilterBank; ///< Filter bank in use, may be
                                       ///< whole-number stepping filter bank or static bank.
  bool IsWhole;                        ///< `true` if whole-number stepping is in use.

  typedef double *(CDSPFracInterpolator ::*CConvolveFn)(double *op); ///<
                                                                     ///< Convolution function type.
  CConvolveFn convfn; ///< Convolution function in use.

  /**
	 * @brief Convolution function for 0th order resampling.
	 *
	 * @param[out] op Output buffer.
	 * @tparam fltlen Filter length, in taps.
	 * @return Advanced "op" value.
	 */

  template <int fltlen>
  double *convolve0(double *op) {
    const CDSPFracDelayFilterBank &fb = *FilterBank;
    const int istep = InStep;
    const int ostep = OutStep;
    int fpos = InPosFracW;
    int rpos = ReadPos;
    int bl = BufLeft - fl2;

    while (bl > 0) {
      const double *const ftp = &fb[fpos];
      const double *const rp = Buf + rpos;
      int i;

#if defined(R8B_SSE2) && !defined(__INTEL_COMPILER)

      __m128d s = _mm_setzero_pd();

      for (i = 0; i < fltlen; i += 2) {
        const __m128d m = _mm_mul_pd(_mm_load_pd(ftp + i), _mm_loadu_pd(rp + i));

        s = _mm_add_pd(s, m);
      }

      _mm_storel_pd(op, _mm_add_pd(s, _mm_shuffle_pd(s, s, 1)));

#elif defined(R8B_NEON)

      float64x2_t s = vdupq_n_f64(0.0);

      for (i = 0; i < fltlen; i += 2) {
        s = vmlaq_f64(s, vld1q_f64(ftp + i), vld1q_f64(rp + i));
      }

      *op = vaddvq_f64(s);

#else // SIMD

      double s = 0.0;

      for (i = 0; i < fltlen; i++) {
        s += ftp[i] * rp[i];
      }

      *op = s;

#endif // SIMD

      op++;

      fpos += istep;
      const int PosIncr = fpos / ostep;
      fpos -= PosIncr * ostep;

      rpos = (rpos + PosIncr) & BufLenMask;
      bl -= PosIncr;
    }

    BufLeft = bl + fl2;
    ReadPos = rpos;
    InPosFracW = fpos;

    return (op);
  }

  /**
	 * @brief Convolution function for 2nd order resampling.
	 *
	 * @param[out] op Output buffer.
	 * @return Advanced "op" value.
	 */

  double *convolve2(double *op) {
    const CDSPFracDelayFilterBank &fb = *FilterBank;
    const int fltlen = FilterLen;
    const double ssr = SrcSampleRate;
    const double dsr = DstSampleRate;
    double fpos = InPosFrac;
    int rpos = ReadPos;
    int bl = BufLeft - fl2;

    while (bl > 0) {
      double x = fpos * fb.getFilterFracs();
      const int fti = (int)x; // Function table index.
      x -= fti;               // Coefficient for interpolation between adjacent
                              // fractional delay filters.
      const double x2d = x * x;
      const double *ftp = &fb[fti];
      const double *const rp = Buf + rpos;
      int i;

#if defined(R8B_SSE2) && defined(R8B_SIMD_ISH)

      const __m128d x1 = _mm_set1_pd(x);
      const __m128d x2 = _mm_set1_pd(x2d);
      __m128d s = _mm_setzero_pd();

      for (i = 0; i < fltlen; i += 2) {
        const __m128d ftp2 = _mm_load_pd(ftp + 2);
        const __m128d xx1 = _mm_mul_pd(ftp2, x1);
        const __m128d ftp4 = _mm_load_pd(ftp + 4);
        const __m128d xx2 = _mm_mul_pd(ftp4, x2);
        const __m128d ftp0 = _mm_load_pd(ftp);
        ftp += 6;

        const __m128d rpi = _mm_loadu_pd(rp + i);
        const __m128d xxs = _mm_add_pd(ftp0, _mm_add_pd(xx1, xx2));

        s = _mm_add_pd(s, _mm_mul_pd(rpi, xxs));
      }

      _mm_storel_pd(op, _mm_add_pd(s, _mm_shuffle_pd(s, s, 1)));

#elif defined(R8B_NEON) && defined(R8B_SIMD_ISH)

      const float64x2_t x1 = vdupq_n_f64(x);
      const float64x2_t x2 = vdupq_n_f64(x2d);
      float64x2_t s = vdupq_n_f64(0.0);

      for (i = 0; i < fltlen; i += 2) {
        const float64x2_t ftp2 = vld1q_f64(ftp + 2);
        const float64x2_t xx1 = vmulq_f64(ftp2, x1);
        const float64x2_t ftp4 = vld1q_f64(ftp + 4);
        const float64x2_t xx2 = vmulq_f64(ftp4, x2);
        const float64x2_t ftp0 = vld1q_f64(ftp);
        ftp += 6;

        const float64x2_t rpi = vld1q_f64(rp + i);
        const float64x2_t xxs = vaddq_f64(ftp0, vaddq_f64(xx1, xx2));

        s = vmlaq_f64(s, rpi, xxs);
      }

      *op = vaddvq_f64(s);

#else // SIMD

      double s = 0.0;

      for (i = 0; i < fltlen; i++) {
        s += (ftp[0] + ftp[1] * x + ftp[2] * x2d) * rp[i];
        ftp += 3;
      }

      *op = s;

#endif // SIMD

      op++;

#if R8B_FASTTIMING

      fpos += FracStep;
      const int PosIncr = (int)fpos;
      fpos -= PosIncr;

#else // R8B_FASTTIMING

      InCounter++;
      const double NextInPos = (InCounter + InPosShift) * ssr / dsr;
      const int NextInPosInt = (int)NextInPos;
      const int PosIncr = NextInPosInt - InPosInt;
      InPosInt = NextInPosInt;
      fpos = NextInPos - NextInPosInt;

#endif // R8B_FASTTIMING

      rpos = (rpos + PosIncr) & BufLenMask;
      bl -= PosIncr;
    }

    BufLeft = bl + fl2;
    ReadPos = rpos;
    InPosFrac = fpos;

    return (op);
  }
};

// ---------------------------------------------------------------------------

} // namespace r8b

#endif // R8B_CDSPFRACINTERPOLATOR_INCLUDED
