import _ from "lodash" import ExprUtils from "./ExprUtils" import { getExprExtension } from "./extensions" import Schema from "./Schema" import { AggrStatus, Expr, LiteralType, OpExpr, Variable } from "./types" import { OpItem } from "./ExprUtils" import { WeakCache } from "./WeakCache" // Weak cache is global to allow validator to be created and destroyed const weakCache = new WeakCache() export interface ValidateOptions { table?: string types?: LiteralType[] enumValueIds?: string[] idTable?: string aggrStatuses?: AggrStatus[] } /** * Validates expressions. If an expression has been cleaned, it will always be valid. * An incomplete expression is still valid, but might return null if evaluated! */ export default class ExprValidator { schema: Schema exprUtils: ExprUtils constructor(schema: Schema) { this.schema = schema this.exprUtils = new ExprUtils(schema) } /** Validates an expression, returning null if it is valid, otherwise return an error string * NOTE: This uses global weak caching and assumes that expressions are never mutated after * having been validated! * options are: * table: optional current table. expression must be related to this table or will be stripped * types: optional types to limit to * enumValueIds: ids of enum values that are valid if type is enum * idTable: table that type of id must be from * aggrStatuses: statuses of aggregation to allow. list of "individual", "literal", "aggregate". Default: ["individual", "literal"] */ validateExpr(expr: Expr, options?: ValidateOptions): string | null { options = options || {} if (!expr) { return null } // Non-objects are not valid expressions if (typeof expr != "object") { return "Invalid expression" } return weakCache.cacheFunction([this.schema, expr], [options], () => { return this.validateExprInternal(expr, options!) }) } validateExprInternal = ( expr: Expr, options: { table?: string types?: LiteralType[] enumValueIds?: string[] idTable?: string aggrStatuses?: AggrStatus[] depth?: number } ): string | null => { let error, key, value let enumValueIds const aggrStatuses = options.aggrStatuses || ["individual", "literal"] if (!expr) { return null } // Allow {} placeholder if (_.isEmpty(expr)) { return null } // Prevent infinite recursion if ((options.depth || 0) > 100) { return "Circular reference" } // Check table if not literal if (options.table && this.exprUtils.getExprTable(expr) && this.exprUtils.getExprTable(expr) !== options.table) { return `Wrong table ${this.exprUtils.getExprTable(expr)} (expected ${options.table})` } // Literal is ok if right type switch (expr.type) { case "literal": if (options.types && !options.types.includes(expr.valueType)) { return "Wrong type" } if (options.idTable && expr.valueType === "id" && options.idTable !== expr.idTable) { return "Wrong table" } break case "field": var column = this.schema.getColumn(expr.table, expr.column) if (!column) { return `Missing column ${expr.table}.${expr.column}` } // Validate expression if (column.expr) { // Use depth to prevent infinite recursion error = this.validateExprInternal(column.expr, { ...options, depth: (options.depth || 0) + 1 }) if (error) { return error } } break case "op": // Find best matching op item first to determine argument types const opItems = this.exprUtils.findMatchingOpItems({ op: expr.op, lhsExpr: expr.exprs[0], resultTypes: options.types }) if (opItems.length === 0) { return "No matching op" } // For validation, check every possible opItem to see if one is valid let oneValid = false let lastError: string | null = null for (const opItem of opItems) { const error = this.validateOpItem(expr, opItem, options) if (!error) { oneValid = true break } lastError = error } if (!oneValid) { return lastError } break case "scalar": // Validate joins if (!this.exprUtils.areJoinsValid(expr.table, expr.joins)) { return "Invalid joins" } var exprTable = this.exprUtils.followJoins(expr.table, expr.joins) // If joins are 1-n, allow aggrStatus of "aggregate" for the inner expression if (this.exprUtils.isMultipleJoins(expr.table, expr.joins)) { error = this.validateExprInternal( expr.expr, { ...options, table: exprTable, aggrStatuses: ["literal", "aggregate"] } ) } else { // Otherwise, inner expression must match outer aggregation requirements error = this.validateExprInternal(expr.expr, { ...options, table: exprTable }) } if (error) { return error } break case "case": { // Do not allow more than 10,000 cases if (expr.cases.length > 10000) { return `Too many cases (${expr.cases.length})` } // Validate cases for (let i = 0; i < expr.cases.length; i++) { const cse = expr.cases[i] error = this.validateExprInternal(cse.when, { ...options, types: ["boolean"], table: expr.table }) if (error) { return `Case ${i + 1} 'when': ${error}` } error = this.validateExprInternal(cse.then, { ...options, table: expr.table }) // Then must match overall type/aggr requirements if (error) { return `Case ${i + 1} 'then': ${error}` } } error = this.validateExprInternal(expr.else, { ...options, table: expr.table }) // Else must also match if (error) { return `'Else': ${error}` } // Validate that all thens are of same type (or null) const thenTypes = _.uniq(_.compact(_.map(expr.cases, (cse) => this.exprUtils.getExprType(cse.then)).concat(this.exprUtils.getExprType(expr.else)))) if (thenTypes.length > 1) { return "All 'then'/'else' clauses must be of same type" } // Check for mixing aggregate and individual in when/then/else clauses let hasIndividual = false let hasAggregate = false const allSubExprs: Expr[] = [] for (const cse of expr.cases) { allSubExprs.push(cse.when, cse.then) } allSubExprs.push(expr.else) for (const subexpr of allSubExprs) { const aggrStatus = this.exprUtils.getExprAggrStatus(subexpr) hasIndividual = hasIndividual || aggrStatus === "individual" hasAggregate = hasAggregate || aggrStatus === "aggregate" } if (hasIndividual && hasAggregate) { return "Cannot mix individual and aggregate expressions in case statement" } break } case "score": error = this.validateExprInternal(expr.input, { ...options, types: ["enum", "enumset"], table: expr.table }) if (error) { return `Input: ${error}` } if (expr.input) { enumValueIds = _.pluck(this.exprUtils.getExprEnumValues(expr.input) || [], "id") } else { enumValueIds = null } for (key in expr.scores) { value = expr.scores[key] if (enumValueIds && !enumValueIds.includes(key)) { return `Score key '${key}' is not a valid enum value for the input` } error = this.validateExprInternal(value, { ...options, types: ["number"], table: expr.table }) if (error) { return `Score for '${key}': ${error}` } } break case "build enumset": for (key in expr.values) { value = expr.values[key] // Check key against overall enumValueIds requirement if provided if (options.enumValueIds && !options.enumValueIds.includes(key)) { return `Enum value '${key}' is not allowed in this context` } error = this.validateExprInternal(value, { ...options, types: ["boolean"] }) if (error) { return `Value for '${key}': ${error}` } } break case "variable": // Get variable var variable = _.findWhere(this.schema.getVariables(), { id: expr.variableId }) if (!variable) { return `Missing variable ${expr.variableId}` } break case "subquery": { if (expr.table && !this.schema.getTable(expr.table)) { return `Table ${expr.table} does not exist` } if (expr.from && !this.schema.getTable(expr.from)) { return `From table ${expr.from} does not exist` } // Create inner validator with outer refs exposed as variables const innerExprValidator = new ExprValidator(this.schema.addVariables(this.exprUtils.createSubqueryInnerVariables(expr), {})) // Validate select expression within the context of the 'from' table // Select can be aggregate or individual let error = innerExprValidator.validateExpr(expr.select, { ...options, table: expr.from, aggrStatuses: ["aggregate", "individual", "literal"] }) if (error) { return `Select: ${error}` } // Validate where expression (must be boolean, individual/literal) error = innerExprValidator.validateExpr(expr.where, { table: expr.from, types: ["boolean"], aggrStatuses: ["individual", "literal"] }) if (error) { return `Where: ${error}` } // Validate order by expressions (must be individual/literal) for (let i = 0; i < expr.orderBys.length; i++) { const orderBy = expr.orderBys[i] error = innerExprValidator.validateExpr(orderBy.expr, { table: expr.from, aggrStatuses: ["individual", "literal"] }) if (error) { return `Order By ${i + 1}: ${error}` } } // Validate outer reference expressions in the outer context for (let i = 0; i < expr.outerRefs.length; i++) { const outerRef = expr.outerRefs[i] // Outer refs can have any type, so we don't pass types error = this.validateExprInternal(outerRef.expr, { ...options, table: expr.table, types: undefined, aggrStatuses: ["individual", "literal"] }) if (error) { return `Outer Ref ${i + 1}: ${error}` } } break } case "extension": const err = getExprExtension(expr.extension).validateExpr(expr, options, this.schema) if (err) { return err } break } // Final checks based on overall expression result // Validate table (redundant check but safe) if ( options.idTable && this.exprUtils.getExprIdTable(expr) && this.exprUtils.getExprIdTable(expr) !== options.idTable ) { return "Resulting expression has wrong idTable" } // Validate type if present const type = this.exprUtils.getExprType(expr) if (options.types && type && !options.types.includes(type)) { return `Resulting expression type '${type}' is not one of the allowed types: ${options.types.join(", ")}` } // Validate aggregate status const aggrStatus = this.exprUtils.getExprAggrStatus(expr) if (aggrStatuses && aggrStatus) { if (!aggrStatuses.includes(aggrStatus)) { return `Resulting expression has invalid aggregation status '${aggrStatus}', expected ${aggrStatuses.join(" or ")}` } } // Validate enums if result is enum/enumset if ( options.enumValueIds && type && (type === "enum" || type === "enumset") ) { const exprEnums = this.exprUtils.getExprEnumValues(expr) if (exprEnums) { const exprEnumIds = _.pluck(exprEnums, "id") if (_.difference(exprEnumIds, options.enumValueIds).length > 0) { return "Resulting expression contains invalid enum values" } } } return null // Valid } /** Validates an op expression based on the chosen opItem. Returns null if valid, otherwise returns an error string */ validateOpItem(expr: OpExpr, opItem: OpItem, options: ValidateOptions) { const opIsAggr = opItem.aggr || false const aggrStatuses = options.aggrStatuses || ["individual", "literal"] // Validate exprs based on the chosen opItem for (let i = 0; i < expr.exprs.length; i++) { let expectedTypes: LiteralType[] | undefined = undefined const isOrderByArg = opItem.ordered && i === opItem.exprTypes.length if (i < opItem.exprTypes.length) { expectedTypes = opItem.exprTypes[i] ? [opItem.exprTypes[i]] : undefined } else if (isOrderByArg) { // Set expected types for the order by argument expectedTypes = ["date", "datetime", "number"] } else if (opItem.moreExprType && i >= opItem.exprTypes.length) { expectedTypes = [opItem.moreExprType] } else { // Extra argument not covered by exprTypes, moreExprType, or ordered flag return `Unexpected argument at index ${i} for operator '${expr.op}'` } // Determine aggregation status requirement for the sub-expression let subExprAggrStatuses: AggrStatus[] if (isOrderByArg) { // Ordering argument MUST be individual or literal subExprAggrStatuses = ["individual", "literal"] } else if (opIsAggr) { // Aggregate ops require non-aggregate inputs (for value/where args) subExprAggrStatuses = ["literal", "individual"] } else { // Non-aggregate ops allow literals and inputs matching the overall required status (for value/where args) subExprAggrStatuses = _.union(aggrStatuses, ["literal"]) } // Validate sub-expression const error = this.validateExprInternal(expr.exprs[i], { ..._.omit(options, "types", "enumValueIds", "idTable"), // Pass general options down aggrStatuses: subExprAggrStatuses, types: expectedTypes, }) if (error) { return `Argument ${i + 1}: ${error}` } } // Check for too few arguments (excluding optional order by) if (expr.exprs.length < opItem.exprTypes.length) { return `Operator '${expr.op}' requires at least ${opItem.exprTypes.length} arguments, received ${expr.exprs.length}` } // Check for too many arguments if not variadic or ordered if (expr.exprs.length > opItem.exprTypes.length && !opItem.moreExprType && !opItem.ordered) { return `Operator '${expr.op}' expects ${opItem.exprTypes.length} arguments, received ${expr.exprs.length}` } if (opItem.ordered && expr.exprs.length > opItem.exprTypes.length + 1 && !opItem.moreExprType) { return `Operator '${expr.op}' expects ${opItem.exprTypes.length} arguments plus an optional ordering argument, received ${expr.exprs.length}` } // Check for mixing aggregate and individual *within non-aggregate ops* // Aggregate ops handle this implicitly by requiring non-aggregate inputs if (!opIsAggr) { let hasIndividual = false, hasAggregate = false for (const subexpr of expr.exprs) { const aggrStatus = this.exprUtils.getExprAggrStatus(subexpr) hasIndividual = hasIndividual || aggrStatus == "individual" hasAggregate = hasAggregate || aggrStatus == "aggregate" } if (hasIndividual && hasAggregate) { return "Cannot mix individual and aggregate expressions in this operation" } } // Check for missing ordering if required if (opItem.ordered) { const numRequiredArgs = opItem.exprTypes.length const orderByExprProvided = expr.exprs.length > numRequiredArgs const tableOrdering = expr.table ? this.schema.getTable(expr.table)?.ordering : null if (!orderByExprProvided && !tableOrdering) { return `Operator '${expr.op}' requires an ordering field or an intrinsically ordered table.` } } // Do not allow "to text" on an enum with more than 10000 values if (expr.op === "to text" && this.exprUtils.getExprType(expr.exprs[0]) === "enum" && this.exprUtils.getExprEnumValues(expr.exprs[0])!.length > 10000) { return "Too many enum values" } return null } }