//
// Copyright (c) 2002-2014 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//

#include "compiler/translator/EmulatePrecision.h"

namespace
{

static void writeVectorPrecisionEmulationHelpers(
    TInfoSinkBase& sink, ShShaderOutput outputLanguage, unsigned int size)
{
    std::stringstream vecTypeStrStr;
    if (outputLanguage == SH_ESSL_OUTPUT)
        vecTypeStrStr << "highp ";
    vecTypeStrStr << "vec" << size;
    std::string vecType = vecTypeStrStr.str();

    sink <<
    vecType << " angle_frm(in " << vecType << " v) {\n"
    "    v = clamp(v, -65504.0, 65504.0);\n"
    "    " << vecType << " exponent = floor(log2(abs(v) + 1e-30)) - 10.0;\n"
    "    bvec" << size << " isNonZero = greaterThanEqual(exponent, vec" << size << "(-25.0));\n"
    "    v = v * exp2(-exponent);\n"
    "    v = sign(v) * floor(abs(v));\n"
    "    return v * exp2(exponent) * vec" << size << "(isNonZero);\n"
    "}\n";

    sink <<
    vecType << " angle_frl(in " << vecType << " v) {\n"
    "    v = clamp(v, -2.0, 2.0);\n"
    "    v = v * 256.0;\n"
    "    v = sign(v) * floor(abs(v));\n"
    "    return v * 0.00390625;\n"
    "}\n";
}

static void writeMatrixPrecisionEmulationHelper(
    TInfoSinkBase& sink, ShShaderOutput outputLanguage, unsigned int size, const char *functionName)
{
    std::stringstream matTypeStrStr;
    if (outputLanguage == SH_ESSL_OUTPUT)
        matTypeStrStr << "highp ";
    matTypeStrStr << "mat" << size;
    std::string matType = matTypeStrStr.str();

    sink << matType << " " << functionName << "(in " << matType << " m) {\n"
            "    " << matType << " rounded;\n";

    for (unsigned int i = 0; i < size; ++i)
    {
        sink << "    rounded[" << i << "] = " << functionName << "(m[" << i << "]);\n";
    }

    sink << "    return rounded;\n"
            "}\n";
}

static void writeCommonPrecisionEmulationHelpers(TInfoSinkBase& sink, ShShaderOutput outputLanguage)
{
    // Write the angle_frm functions that round floating point numbers to
    // half precision, and angle_frl functions that round them to minimum lowp
    // precision.

    // Unoptimized version of angle_frm for single floats:
    //
    // int webgl_maxNormalExponent(in int exponentBits) {
    //     int possibleExponents = int(exp2(float(exponentBits)));
    //     int exponentBias = possibleExponents / 2 - 1;
    //     int allExponentBitsOne = possibleExponents - 1;
    //     return (allExponentBitsOne - 1) - exponentBias;
    // }
    //
    // float angle_frm(in float x) {
    //     int mantissaBits = 10;
    //     int exponentBits = 5;
    //     float possibleMantissas = exp2(float(mantissaBits));
    //     float mantissaMax = 2.0 - 1.0 / possibleMantissas;
    //     int maxNE = webgl_maxNormalExponent(exponentBits);
    //     float max = exp2(float(maxNE)) * mantissaMax;
    //     if (x > max) {
    //         return max;
    //     }
    //     if (x < -max) {
    //         return -max;
    //     }
    //     float exponent = floor(log2(abs(x)));
    //     if (abs(x) == 0.0 || exponent < -float(maxNE)) {
    //         return 0.0 * sign(x)
    //     }
    //     x = x * exp2(-(exponent - float(mantissaBits)));
    //     x = sign(x) * floor(abs(x));
    //     return x * exp2(exponent - float(mantissaBits));
    // }

    // All numbers with a magnitude less than 2^-15 are subnormal, and are
    // flushed to zero.

    // Note the constant numbers below:
    // a) 65504 is the maximum possible mantissa (1.1111111111 in binary) times
    //    2^15, the maximum normal exponent.
    // b) 10.0 is the number of mantissa bits.
    // c) -25.0 is the minimum normal half-float exponent -15.0 minus the number
    //    of mantissa bits.
    // d) + 1e-30 is to make sure the argument of log2() won't be zero. It can
    //    only affect the result of log2 on x where abs(x) < 1e-22. Since these
    //    numbers will be flushed to zero either way (2^-15 is the smallest
    //    normal positive number), this does not introduce any error.

    std::string floatType = "float";
    if (outputLanguage == SH_ESSL_OUTPUT)
        floatType = "highp float";

    sink <<
    floatType << " angle_frm(in " << floatType << " x) {\n"
    "    x = clamp(x, -65504.0, 65504.0);\n"
    "    " << floatType << " exponent = floor(log2(abs(x) + 1e-30)) - 10.0;\n"
    "    bool isNonZero = (exponent >= -25.0);\n"
    "    x = x * exp2(-exponent);\n"
    "    x = sign(x) * floor(abs(x));\n"
    "    return x * exp2(exponent) * float(isNonZero);\n"
    "}\n";

    sink <<
    floatType << " angle_frl(in " << floatType << " x) {\n"
    "    x = clamp(x, -2.0, 2.0);\n"
    "    x = x * 256.0;\n"
    "    x = sign(x) * floor(abs(x));\n"
    "    return x * 0.00390625;\n"
    "}\n";

    writeVectorPrecisionEmulationHelpers(sink, outputLanguage, 2);
    writeVectorPrecisionEmulationHelpers(sink, outputLanguage, 3);
    writeVectorPrecisionEmulationHelpers(sink, outputLanguage, 4);
    for (unsigned int size = 2; size <= 4; ++size)
    {
        writeMatrixPrecisionEmulationHelper(sink, outputLanguage, size, "angle_frm");
        writeMatrixPrecisionEmulationHelper(sink, outputLanguage, size, "angle_frl");
    }
}

static void writeCompoundAssignmentPrecisionEmulation(
    TInfoSinkBase& sink, ShShaderOutput outputLanguage,
    const char *lType, const char *rType, const char *opStr, const char *opNameStr)
{
    std::string lTypeStr = lType;
    std::string rTypeStr = rType;
    if (outputLanguage == SH_ESSL_OUTPUT)
    {
        std::stringstream lTypeStrStr;
        lTypeStrStr << "highp " << lType;
        lTypeStr = lTypeStrStr.str();
        std::stringstream rTypeStrStr;
        rTypeStrStr << "highp " << rType;
        rTypeStr = rTypeStrStr.str();
    }

    // Note that y should be passed through angle_frm at the function call site,
    // but x can't be passed through angle_frm there since it is an inout parameter.
    // So only pass x and the result through angle_frm here.
    sink <<
    lTypeStr << " angle_compound_" << opNameStr << "_frm(inout " << lTypeStr << " x, in " << rTypeStr << " y) {\n"
    "    x = angle_frm(angle_frm(x) " << opStr << " y);\n"
    "    return x;\n"
    "}\n";
    sink <<
    lTypeStr << " angle_compound_" << opNameStr << "_frl(inout " << lTypeStr << " x, in " << rTypeStr << " y) {\n"
    "    x = angle_frl(angle_frm(x) " << opStr << " y);\n"
    "    return x;\n"
    "}\n";
}

const char *getFloatTypeStr(const TType& type)
{
    switch (type.getNominalSize())
    {
      case 1:
        return "float";
      case 2:
        switch(type.getSecondarySize())
        {
          case 1:
            return "vec2";
          case 2:
            return "mat2";
          case 3:
            return "mat2x3";
          case 4:
            return "mat2x4";
          default:
            UNREACHABLE();
            return NULL;
        }
      case 3:
        switch(type.getSecondarySize())
        {
          case 1:
            return "vec3";
          case 2:
            return "mat3x2";
          case 3:
            return "mat3";
          case 4:
            return "mat3x4";
          default:
            UNREACHABLE();
            return NULL;
        }
      case 4:
        switch(type.getSecondarySize())
        {
          case 1:
            return "vec4";
          case 2:
            return "mat4x2";
          case 3:
            return "mat4x3";
          case 4:
            return "mat4";
          default:
            UNREACHABLE();
            return NULL;
        }
      default:
        UNREACHABLE();
        return NULL;
    }
}

bool canRoundFloat(const TType &type)
{
    return type.getBasicType() == EbtFloat && !type.isNonSquareMatrix() && !type.isArray() &&
        (type.getPrecision() == EbpLow || type.getPrecision() == EbpMedium);
}

TIntermAggregate *createInternalFunctionCallNode(TString name, TIntermNode *child)
{
    TIntermAggregate *callNode = new TIntermAggregate();
    callNode->setOp(EOpFunctionCall);
    TName nameObj(TFunction::mangleName(name));
    nameObj.setInternal(true);
    callNode->setNameObj(nameObj);
    callNode->getSequence()->push_back(child);
    return callNode;
}

TIntermAggregate *createRoundingFunctionCallNode(TIntermTyped *roundedChild)
{
    TString roundFunctionName;
    if (roundedChild->getPrecision() == EbpMedium)
        roundFunctionName = "angle_frm";
    else
        roundFunctionName = "angle_frl";
    return createInternalFunctionCallNode(roundFunctionName, roundedChild);
}

TIntermAggregate *createCompoundAssignmentFunctionCallNode(TIntermTyped *left, TIntermTyped *right, const char *opNameStr)
{
    std::stringstream strstr;
    if (left->getPrecision() == EbpMedium)
        strstr << "angle_compound_" << opNameStr << "_frm";
    else
        strstr << "angle_compound_" << opNameStr << "_frl";
    TString functionName = strstr.str().c_str();
    TIntermAggregate *callNode = createInternalFunctionCallNode(functionName, left);
    callNode->getSequence()->push_back(right);
    return callNode;
}

bool parentUsesResult(TIntermNode* parent, TIntermNode* node)
{
    if (!parent)
    {
        return false;
    }

    TIntermAggregate *aggParent = parent->getAsAggregate();
    // If the parent's op is EOpSequence, the result is not assigned anywhere,
    // so rounding it is not needed. In particular, this can avoid a lot of
    // unnecessary rounding of unused return values of assignment.
    if (aggParent && aggParent->getOp() == EOpSequence)
    {
        return false;
    }
    if (aggParent && aggParent->getOp() == EOpComma && (aggParent->getSequence()->back() != node))
    {
        return false;
    }
    return true;
}

}  // namespace anonymous

EmulatePrecision::EmulatePrecision(const TSymbolTable &symbolTable, int shaderVersion)
    : TLValueTrackingTraverser(true, true, true, symbolTable, shaderVersion),
      mDeclaringVariables(false)
{}

void EmulatePrecision::visitSymbol(TIntermSymbol *node)
{
    if (canRoundFloat(node->getType()) && !mDeclaringVariables && !isLValueRequiredHere())
    {
        TIntermNode *parent = getParentNode();
        TIntermNode *replacement = createRoundingFunctionCallNode(node);
        mReplacements.push_back(NodeUpdateEntry(parent, node, replacement, true));
    }
}


bool EmulatePrecision::visitBinary(Visit visit, TIntermBinary *node)
{
    bool visitChildren = true;

    TOperator op = node->getOp();

    // RHS of initialize is not being declared.
    if (op == EOpInitialize && visit == InVisit)
        mDeclaringVariables = false;

    if ((op == EOpIndexDirectStruct || op == EOpVectorSwizzle) && visit == InVisit)
        visitChildren = false;

    if (visit != PreVisit)
        return visitChildren;

    const TType& type = node->getType();
    bool roundFloat = canRoundFloat(type);

    if (roundFloat) {
        switch (op) {
          // Math operators that can result in a float may need to apply rounding to the return
          // value. Note that in the case of assignment, the rounding is applied to its return
          // value here, not the value being assigned.
          case EOpAssign:
          case EOpAdd:
          case EOpSub:
          case EOpMul:
          case EOpDiv:
          case EOpVectorTimesScalar:
          case EOpVectorTimesMatrix:
          case EOpMatrixTimesVector:
          case EOpMatrixTimesScalar:
          case EOpMatrixTimesMatrix:
          {
            TIntermNode *parent = getParentNode();
            if (!parentUsesResult(parent, node))
            {
                break;
            }
            TIntermNode *replacement = createRoundingFunctionCallNode(node);
            mReplacements.push_back(NodeUpdateEntry(parent, node, replacement, true));
            break;
          }

          // Compound assignment cases need to replace the operator with a function call.
          case EOpAddAssign:
          {
            mEmulateCompoundAdd.insert(TypePair(getFloatTypeStr(type), getFloatTypeStr(node->getRight()->getType())));
            TIntermNode *parent = getParentNode();
            TIntermNode *replacement = createCompoundAssignmentFunctionCallNode(node->getLeft(), node->getRight(), "add");
            mReplacements.push_back(NodeUpdateEntry(parent, node, replacement, false));
            break;
          }
          case EOpSubAssign:
          {
            mEmulateCompoundSub.insert(TypePair(getFloatTypeStr(type), getFloatTypeStr(node->getRight()->getType())));
            TIntermNode *parent = getParentNode();
            TIntermNode *replacement = createCompoundAssignmentFunctionCallNode(node->getLeft(), node->getRight(), "sub");
            mReplacements.push_back(NodeUpdateEntry(parent, node, replacement, false));
            break;
          }
          case EOpMulAssign:
          case EOpVectorTimesMatrixAssign:
          case EOpVectorTimesScalarAssign:
          case EOpMatrixTimesScalarAssign:
          case EOpMatrixTimesMatrixAssign:
          {
            mEmulateCompoundMul.insert(TypePair(getFloatTypeStr(type), getFloatTypeStr(node->getRight()->getType())));
            TIntermNode *parent = getParentNode();
            TIntermNode *replacement = createCompoundAssignmentFunctionCallNode(node->getLeft(), node->getRight(), "mul");
            mReplacements.push_back(NodeUpdateEntry(parent, node, replacement, false));
            break;
          }
          case EOpDivAssign:
          {
            mEmulateCompoundDiv.insert(TypePair(getFloatTypeStr(type), getFloatTypeStr(node->getRight()->getType())));
            TIntermNode *parent = getParentNode();
            TIntermNode *replacement = createCompoundAssignmentFunctionCallNode(node->getLeft(), node->getRight(), "div");
            mReplacements.push_back(NodeUpdateEntry(parent, node, replacement, false));
            break;
          }
          default:
            // The rest of the binary operations should not need precision emulation.
            break;
        }
    }
    return visitChildren;
}

bool EmulatePrecision::visitAggregate(Visit visit, TIntermAggregate *node)
{
    bool visitChildren = true;
    switch (node->getOp())
    {
      case EOpSequence:
      case EOpConstructStruct:
      case EOpFunction:
        break;
      case EOpPrototype:
        visitChildren = false;
        break;
      case EOpParameters:
        visitChildren = false;
        break;
      case EOpInvariantDeclaration:
        visitChildren = false;
        break;
      case EOpDeclaration:
        // Variable declaration.
        if (visit == PreVisit)
        {
            mDeclaringVariables = true;
        }
        else if (visit == InVisit)
        {
            mDeclaringVariables = true;
        }
        else
        {
            mDeclaringVariables = false;
        }
        break;
      case EOpFunctionCall:
      {
        // Function call.
        if (visit == PreVisit)
        {
            // User-defined function return values are not rounded, this relies on that
            // calculations producing the value were rounded.
            TIntermNode *parent = getParentNode();
            if (canRoundFloat(node->getType()) && !isInFunctionMap(node) &&
                parentUsesResult(parent, node))
            {
                TIntermNode *replacement = createRoundingFunctionCallNode(node);
                mReplacements.push_back(NodeUpdateEntry(parent, node, replacement, true));
            }
        }
        break;
      }
      default:
        TIntermNode *parent = getParentNode();
        if (canRoundFloat(node->getType()) && visit == PreVisit && parentUsesResult(parent, node))
        {
            TIntermNode *replacement = createRoundingFunctionCallNode(node);
            mReplacements.push_back(NodeUpdateEntry(parent, node, replacement, true));
        }
        break;
    }
    return visitChildren;
}

bool EmulatePrecision::visitUnary(Visit visit, TIntermUnary *node)
{
    switch (node->getOp())
    {
      case EOpNegative:
      case EOpVectorLogicalNot:
      case EOpLogicalNot:
      case EOpPostIncrement:
      case EOpPostDecrement:
      case EOpPreIncrement:
      case EOpPreDecrement:
        break;
      default:
        if (canRoundFloat(node->getType()) && visit == PreVisit)
        {
            TIntermNode *parent = getParentNode();
            TIntermNode *replacement = createRoundingFunctionCallNode(node);
            mReplacements.push_back(NodeUpdateEntry(parent, node, replacement, true));
        }
        break;
    }

    return true;
}

void EmulatePrecision::writeEmulationHelpers(TInfoSinkBase& sink, ShShaderOutput outputLanguage)
{
    // Other languages not yet supported
    ASSERT(outputLanguage == SH_GLSL_COMPATIBILITY_OUTPUT ||
           IsGLSL130OrNewer(outputLanguage) ||
           outputLanguage == SH_ESSL_OUTPUT);
    writeCommonPrecisionEmulationHelpers(sink, outputLanguage);

    EmulationSet::const_iterator it;
    for (it = mEmulateCompoundAdd.begin(); it != mEmulateCompoundAdd.end(); it++)
        writeCompoundAssignmentPrecisionEmulation(sink, outputLanguage, it->lType, it->rType, "+", "add");
    for (it = mEmulateCompoundSub.begin(); it != mEmulateCompoundSub.end(); it++)
        writeCompoundAssignmentPrecisionEmulation(sink, outputLanguage, it->lType, it->rType, "-", "sub");
    for (it = mEmulateCompoundDiv.begin(); it != mEmulateCompoundDiv.end(); it++)
        writeCompoundAssignmentPrecisionEmulation(sink, outputLanguage, it->lType, it->rType, "/", "div");
    for (it = mEmulateCompoundMul.begin(); it != mEmulateCompoundMul.end(); it++)
        writeCompoundAssignmentPrecisionEmulation(sink, outputLanguage, it->lType, it->rType, "*", "mul");
}