import _ from "lodash" import ExprUtils from "./ExprUtils" import ExprValidator from "./ExprValidator" import { Schema } from "." import { Variable, Expr, LiteralType, AggrStatus, FieldExpr, OpExpr, ScalarExpr, LiteralExpr, CaseExpr, IdExpr, ScoreExpr, BuildEnumsetExpr, VariableExpr, LegacyComparisonExpr, LegacyLogicalExpr, LegacyCountExpr, SubqueryExpr } from "./types" import produce from "immer" import { getExprExtension } from "./extensions" export interface CleanExprOptions { /** optional current table. expression must be related to this table or will be stripped */ table?: string /** optional types to limit to */ types?: LiteralType[] /** ids of enum values that are valid if type is enum */ enumValueIds?: string[] /** table that type of id must be from */ idTable?: string /** statuses of aggregation to allow. list of "individual", "literal", "aggregate". Default: ["individual", "literal"] */ aggrStatuses: AggrStatus[] } /** Cleans expressions. Cleaning means nulling invalid (not just incomplete) expressions if they cannot be auto-fixed. * The resulting expression must be valid. */ export default class ExprCleaner { schema: Schema exprUtils: ExprUtils constructor(schema: Schema) { this.schema = schema this.exprUtils = new ExprUtils(schema) } // Clean an expression, returning null if completely invalid, otherwise removing // invalid parts. Attempts to correct invalid types by wrapping in other expressions. // e.g. if an enum is chosen when a boolean is required, it will be wrapped in "= any" op // options are: // table: optional current table. expression must be related to this table or will be stripped // types: optional types to limit to // enumValueIds: ids of enum values that are valid if type is enum // idTable: table that type of id must be from // aggrStatuses: statuses of aggregation to allow. list of "individual", "literal", "aggregate". Default: ["individual", "literal"] cleanExpr( expr: Expr, options: { /** optional current table. expression must be related to this table or will be stripped */ table?: string /** optional types to limit to */ types?: LiteralType[] /** ids of enum values that are valid if type is enum */ enumValueIds?: string[] /** table that type of id must be from */ idTable?: string /** statuses of aggregation to allow. list of "individual", "literal", "aggregate". Default: ["individual", "literal"] */ aggrStatuses?: AggrStatus[] } = {} ): Expr { const aggrStatuses: AggrStatus[] = options.aggrStatuses || ["individual", "literal"] // Null handling if (!expr) { return null } // Allow {} placeholder TODO is this still needed? if (_.isEmpty(expr)) { return expr } // Handle upgrades from old version if (expr.type == "comparison") return this.cleanComparisonExpr(expr, { ...options, aggrStatuses: aggrStatuses }) if (expr.type == "logical") return this.cleanLogicalExpr(expr, { ...options, aggrStatuses: aggrStatuses }) if (expr.type == "count") return this.cleanCountExpr(expr, { ...options, aggrStatuses: aggrStatuses }) if (expr.type == "literal" && (expr.valueType as any) == "enum[]") expr = { type: "literal", valueType: "enumset", value: expr.value } // Strip if wrong table if (expr.type != "literal") { if (options.table && expr.table && expr.table !== options.table) { return null } } // Strip if no table if (expr.type == "field" && !expr.table) { return null } // Strip if non-existent table if (expr.type != "literal" && expr.table && !this.schema.getTable(expr.table)) { return null } // Fix old expression type if (expr.type === "literal" && (expr as any).valueType === "enum[]") { return { type: "literal", valueType: "enumset", value: expr.value } } // Handle infinite recursion that can occur when cleaning field expressions that self-reference let aggrStatus: AggrStatus | null = null try { aggrStatus = this.exprUtils.getExprAggrStatus(expr) } catch (ex: any) { if (ex.message === "Infinite recursion") { return null } throw ex } // Default aggregation if needed and not aggregated if (expr.type != "literal" && expr.table) { if (aggrStatus === "individual" && !aggrStatuses.includes("individual") && aggrStatuses.includes("aggregate")) { let aggrOpItems = this.exprUtils.findMatchingOpItems({ resultTypes: options.types, lhsExpr: expr, aggr: true, }) // Prefer "sum" if possible if (aggrOpItems.some(opItem => opItem.op === "sum")) { aggrOpItems = aggrOpItems.filter(opItem => opItem.op === "sum") } else { // Prefer "last" if possible if (aggrOpItems.some(opItem => opItem.op === "last")) { aggrOpItems = aggrOpItems.filter(opItem => opItem.op === "last") } } // If aggr is required and there is at least one possible, use it if (aggrOpItems.length > 0) { expr = { type: "op", op: aggrOpItems[0].op, table: expr.table, exprs: [expr] } aggrStatus = "aggregate" } } } // Strip if wrong aggregation status if (aggrStatus && !aggrStatuses.includes(aggrStatus)) { return null } // Get type let type: LiteralType | null = this.exprUtils.getExprType(expr) // Boolean-ize for easy building of filters // True if a boolean expression is required const booleanOnly = options.types && options.types.length === 1 && options.types[0] === "boolean" // If boolean and expr is not boolean, wrap with appropriate expression if (booleanOnly && type && type !== "boolean") { // Find op item that matches const opItem = this.exprUtils.findMatchingOpItems({ resultTypes: ["boolean"], lhsExpr: expr })[0] if (opItem) { // Wrap in op to make it boolean if (expr.type != "literal") { expr = { type: "op", table: expr.table, op: opItem.op, exprs: [expr] } } else { expr = { type: "op", op: opItem.op, exprs: [expr] } } // Determine number of arguments to append const args = opItem.exprTypes.length - 1 // Add extra nulls for other arguments for (let i = 1; i <= args; i++) { expr.exprs.push(null) } } } // Get type again in case changed type = this.exprUtils.getExprType(expr) // Strip if wrong type if (type && options.types && !options.types.includes(type)) { // case statements should be preserved as they are a variable type and they will have their then clauses cleaned if (expr.type !== "case") { return null } } const cleanOptions: CleanExprOptions = { ...options, aggrStatuses: aggrStatuses } switch (expr.type) { case "field": return this.cleanFieldExpr(expr, cleanOptions) case "scalar": return this.cleanScalarExpr(expr, cleanOptions) case "op": return this.cleanOpExpr(expr, cleanOptions) case "literal": return this.cleanLiteralExpr(expr, cleanOptions) case "case": return this.cleanCaseExpr(expr, cleanOptions) case "id": return this.cleanIdExpr(expr, cleanOptions) case "score": return this.cleanScoreExpr(expr, cleanOptions) case "build enumset": return this.cleanBuildEnumsetExpr(expr, cleanOptions) case "variable": return this.cleanVariableExpr(expr, cleanOptions) case "subquery": return this.cleanSubqueryExpr(expr, cleanOptions) case "extension": return getExprExtension(expr.extension).cleanExpr(expr, cleanOptions, this.schema) // default: // throw new Error(`Unknown expression type ${expr.type}`) } } /** Removes references to non-existent tables */ cleanFieldExpr(expr: FieldExpr, options: CleanExprOptions) { // Empty expression if (!expr.column || !expr.table) { return null } // Missing table if (!this.schema.getTable(expr.table)) { return null } // Missing column const column = this.schema.getColumn(expr.table, expr.column) if (!column) { return null } // Invalid expr if (column.expr) { const exprValidation = new ExprValidator(this.schema).validateExpr(column.expr, options) if (exprValidation) { return null } } // Invalid enums if (options.enumValueIds && column.type === "enum") { if (_.difference(_.pluck(column.enumValues!, "id"), options.enumValueIds).length > 0) { return null } } if (options.enumValueIds && column.expr) { if (this.exprUtils.getExprType(column.expr) === "enum") { if ( _.difference(_.pluck(this.exprUtils.getExprEnumValues(column.expr)!, "id"), options.enumValueIds).length > 0 ) { return null } } } return expr } cleanOpExpr(expr: OpExpr, options: CleanExprOptions): Expr { switch (expr.op) { case "and": case "or": // Simplify if (expr.exprs.length === 1) { return this.cleanExpr(expr.exprs[0], options) } if (expr.exprs.length === 0) { return null } return produce(expr, (draft) => { for (let i = 0; i < expr.exprs.length; i++) { draft.exprs[i] = this.cleanExpr(expr.exprs[i], { types: ["boolean"], aggrStatuses: options.aggrStatuses, table: expr.table }) } }) case "+": case "*": // Simplify if (expr.exprs.length === 1) { return this.cleanExpr(expr.exprs[0], options) } if (expr.exprs.length === 0) { return null } return produce(expr, (draft) => { // Once a certain aggregate status of "aggregate" or "individual" is found, only allow that status or "literal" let foundAggregateStatus: AggrStatus | null = null for (let i = 0; i < expr.exprs.length; i++) { const cleanedExpr = this.cleanExpr(expr.exprs[i], { types: ["number"], table: expr.table, aggrStatuses: foundAggregateStatus ? [foundAggregateStatus, "literal"] : options.aggrStatuses }) draft.exprs[i] = cleanedExpr if (cleanedExpr) { const aggrStatus = this.exprUtils.getExprAggrStatus(cleanedExpr) if (aggrStatus == "aggregate" || aggrStatus == "individual") { foundAggregateStatus = aggrStatus } } } }) default: // Count always takes zero parameters and is valid if number type is valid if ( expr.op === "count" && (!options.types || options.types.includes("number")) && options.aggrStatuses.includes("aggregate") ) { if (expr.exprs.length == 0) { return expr } else { return { type: "op", op: "count", table: expr.table, exprs: [] } } } // Determine aggregate type of op const opIsAggr = ExprUtils.isOpAggr(expr.op) // Aggregate ops are never allowed if aggregates are not allowed if (opIsAggr && !options.aggrStatuses.includes("aggregate")) { return null } // Determine aggr setting. Prevent aggr for non-aggr output let aggr: boolean | undefined = undefined if (!options.aggrStatuses.includes("aggregate") && options.aggrStatuses.includes("individual")) { aggr = false } // Determine innerAggrStatuses (same as outer, unless aggregate expression, in which case always aggregate) let innerAggrStatuses: AggrStatus[] if (opIsAggr) { innerAggrStatuses = ["literal", "individual"] } else { innerAggrStatuses = options.aggrStatuses } // First do a loose cleaning of LHS to remove obviously invalid values let lhsExpr = this.cleanExpr(expr.exprs[0], { table: expr.table, aggrStatuses: innerAggrStatuses }) // Now attempt to clean it restricting to the types the op allows as lhs if (lhsExpr) { const lhsTypes = _.uniq( _.compact(_.map(this.exprUtils.findMatchingOpItems({ op: expr.op }), (opItem) => opItem.exprTypes[0])) ) lhsExpr = this.cleanExpr(expr.exprs[0], { table: expr.table, aggrStatuses: innerAggrStatuses, types: lhsTypes }) // If this nulls it, don't keep as we can switch ops to preseve it if (lhsExpr == null) { lhsExpr = this.cleanExpr(expr.exprs[0], { table: expr.table, aggrStatuses: innerAggrStatuses }) } } // Need LHS for a normal op that is not a prefix. If it is a prefix op, allow the op to stand alone without params. Allow null type (ones being built out) to stand too if (!lhsExpr && !ExprUtils.isOpPrefix(expr.op)) { return null } // Get opItem let opItems = this.exprUtils.findMatchingOpItems({ op: expr.op, lhsExpr: lhsExpr, resultTypes: options.types, aggr: aggr, }) // If ambiguous, just clean subexprs and return if (opItems.length > 1) { return produce(expr, (draft) => { for (let i = 0; i < expr.exprs.length; i++) { // Determine all possible types (union of all op items types) const types = _.uniq(_.compact(_.flatten(_.map(opItems, (opItem) => opItem.exprTypes[i])))) draft.exprs[i] = this.cleanExpr(expr.exprs[i], { table: expr.table, aggrStatuses: innerAggrStatuses, types: types.length > 0 ? types : undefined }) } }) } // If not found, default opItem let opItem = opItems[0] if (!opItem) { opItem = this.exprUtils.findMatchingOpItems({ lhsExpr, resultTypes: options.types, aggr })[0] if (!opItem) { return null } expr = { type: "op", table: expr.table, op: opItem.op, exprs: [lhsExpr || null] } } // Do not allow "to text" on an enum with more than 10,000 values if (expr.op === "to text" && this.exprUtils.getExprType(expr.exprs[0]) === "enum" && this.exprUtils.getExprEnumValues(expr.exprs[0])!.length > 10000) { return null } return produce(expr, (draft) => { // Pad or trim number of expressions while (draft.exprs.length < opItem.exprTypes.length) { draft.exprs.push(null) } if (!opItem.moreExprType) { // Determine max number of exprs const maxExprs = opItem.ordered ? opItem.exprTypes.length + 1 : opItem.exprTypes.length if (expr.exprs.length > maxExprs) { draft.exprs.splice(maxExprs, expr.exprs.length - maxExprs) } } // Clean all sub expressions let enumValueIds: string[] | undefined = undefined if (lhsExpr) { const enumValues = this.exprUtils.getExprEnumValues(lhsExpr) if (enumValues) { enumValueIds = _.pluck(enumValues, "id") } } for (let i = 0; i < draft.exprs.length; i++) { let expectedTypes: LiteralType[] | undefined = undefined // Check if this index corresponds to the optional order by argument const isOrderByArg = opItem.ordered && i === opItem.exprTypes.length if (i < opItem.exprTypes.length) { expectedTypes = opItem.exprTypes[i] ? [opItem.exprTypes[i]] : undefined } else if (isOrderByArg) { // Set expected types for the order by argument expectedTypes = ["date", "datetime", "number"] } else if (opItem.moreExprType) { expectedTypes = [opItem.moreExprType] } // else: Should have been trimmed // Recursively clean the argument, passing expectedTypes draft.exprs[i] = this.cleanExpr(expr.exprs[i] || null, { table: expr.table, types: expectedTypes, enumValueIds: (opItem.exprTypes[i] === "enumset" || opItem.exprTypes[i] === "enum") ? enumValueIds : undefined, idTable: this.exprUtils.getExprIdTable(expr.exprs[0]) || undefined, aggrStatuses: isOrderByArg ? ["individual", "literal"] : innerAggrStatuses }) } // If ordering is allowed, remove the ordering exprs if null *after* cleaning if (opItem.ordered && draft.exprs.length > opItem.exprTypes.length && draft.exprs[opItem.exprTypes.length] == null) { // Remove only the null ordering argument draft.exprs.splice(opItem.exprTypes.length, 1); } }) } } // Strips/defaults invalid aggr and where of a scalar expression cleanScalarExpr(expr: ScalarExpr, options: CleanExprOptions): Expr { if (expr.joins.length === 0) { return this.cleanExpr(expr.expr, options) } // Fix legacy entity joins (accidentally had entities.. prepended) const joins = _.map(expr.joins, (j) => { if (j.match(/^entities\.[a-z_0-9]+\./)) { return j.split(".")[2] } return j }) expr = _.extend({}, expr, { joins }) if (!this.exprUtils.areJoinsValid(expr.table, expr.joins)) { return null } const innerTable = this.exprUtils.followJoins(expr.table, expr.joins) // LEGACY // Move aggr to inner expression if ((expr as any).aggr) { expr = _.extend({}, _.omit(expr, "aggr"), { expr: { type: "op", table: innerTable, op: (expr as any).aggr, exprs: [expr.expr] } }) } // // Clean where // if (expr.where) { // expr.where = this.cleanExpr(expr.where, {table: innerTable}) // } // Simplify to join column if (expr.joins.length === 1 && expr.expr && expr.expr.type === "id") { return { type: "field", table: expr.table, column: expr.joins[0] } } // Get inner expression type (must match unless is count which can count anything) if (expr.expr) { const isMultiple = this.exprUtils.isMultipleJoins(expr.table, expr.joins) const aggrStatuses: AggrStatus[] = isMultiple ? ["literal", "aggregate"] : ["literal", "individual"] return produce(expr, (draft) => { draft.expr = this.cleanExpr(expr.expr, { ...options, table: innerTable, aggrStatuses }) }) } return expr } cleanLiteralExpr(expr: LiteralExpr, options: CleanExprOptions) { // Convert old types if (["decimal", "integer"].includes(expr.valueType)) { expr = _.extend({}, expr, { valueType: "number" }) } // TODO strip if no value? // Remove if enum type is wrong if (expr.valueType === "enum" && options.enumValueIds && expr.value && !options.enumValueIds.includes(expr.value)) { return null } // Remove invalid enum types if (expr.valueType === "enumset" && options.enumValueIds && expr.value) { if (_.difference(expr.value, options.enumValueIds).length > 0) { return { ...expr, value: _.intersection(options.enumValueIds, expr.value) } } } // Null if wrong table if (expr.valueType === "id" && options.idTable && expr.idTable !== options.idTable) { return null } return expr } cleanCaseExpr(expr: CaseExpr, options: CleanExprOptions) { // Simplify if no cases if (expr.cases.length === 0) { return expr.else || null } // Do not allow more than 10,000 cases if (expr.cases.length > 10000) { return null } // Determine type of first then that has a type const firstThenType = _.first(_.compact(_.map(expr.cases, (c) => this.exprUtils.getExprType(c.then)).concat(expr.else ? [this.exprUtils.getExprType(expr.else)] : []))) const requiredTypes = firstThenType && options.types ? _.intersection([firstThenType], options.types) : (firstThenType ? [firstThenType] : options.types) // Clean whens as boolean, tracking aggregate status to prevent mixing individual and aggregate return produce(expr, (draft) => { // Once a certain aggregate status of "aggregate" or "individual" is found, only allow that status or "literal" let foundAggregateStatus: AggrStatus | null = null for (let c = 0; c < expr.cases.length; c++) { // Clean when const cleanedWhen = this.cleanExpr(expr.cases[c].when, { ...options, types: ["boolean"], table: expr.table, aggrStatuses: foundAggregateStatus ? [foundAggregateStatus, "literal"] : options.aggrStatuses }) draft.cases[c].when = cleanedWhen if (cleanedWhen) { const whenAggrStatus = this.exprUtils.getExprAggrStatus(cleanedWhen) if (whenAggrStatus === "aggregate" || whenAggrStatus === "individual") { foundAggregateStatus = whenAggrStatus } } // Clean then const cleanedThen = this.cleanExpr(expr.cases[c].then, { ...options, types: requiredTypes, aggrStatuses: foundAggregateStatus ? [foundAggregateStatus, "literal"] : options.aggrStatuses }) draft.cases[c].then = cleanedThen if (cleanedThen) { const thenAggrStatus = this.exprUtils.getExprAggrStatus(cleanedThen) if (thenAggrStatus === "aggregate" || thenAggrStatus === "individual") { foundAggregateStatus = thenAggrStatus } } } // Clean else draft.else = this.cleanExpr(expr.else, { ...options, types: requiredTypes, aggrStatuses: foundAggregateStatus ? [foundAggregateStatus, "literal"] : options.aggrStatuses }) }) } cleanIdExpr(expr: IdExpr, options: CleanExprOptions) { // Null if wrong table if (options.idTable && expr.table !== options.idTable) { return null } return expr } cleanScoreExpr(expr: ScoreExpr, options: CleanExprOptions) { return produce(expr, (draft) => { draft.input = this.cleanExpr(expr.input, { types: ["enum", "enumset"] }) // Remove scores if no input if (!draft.input) { draft.scores = {} return } const enumValues = this.exprUtils.getExprEnumValues(expr.input) if (!enumValues) { draft.scores = {} return } const enumValueIds = enumValues.map((ev) => ev.id) // Clean score values for (const key in expr.scores) { // Remove unknown enum values if (!enumValueIds.includes(key)) { delete draft.scores[key] } else { draft.scores[key] = this.cleanExpr(expr.scores[key], { table: expr.table, types: ["number"] }) if (!draft.scores[key]) { delete draft.scores[key] } } } }) } cleanBuildEnumsetExpr(expr: BuildEnumsetExpr, options: CleanExprOptions) { return produce(expr, (draft) => { const enumValueIds = options.enumValueIds // Clean values for (const key in expr.values) { if (enumValueIds && !enumValueIds.includes(key)) { delete draft.values[key] } else { draft.values[key] = this.cleanExpr(expr.values[key], { table: expr.table, types: ["boolean"] }) if (!draft.values[key]) { delete draft.values[key] } } } }) } cleanVariableExpr(expr: VariableExpr, options: CleanExprOptions) { // Get variable const variable = this.schema.getVariables().find((v) => v.id == expr.variableId) if (!variable) { return null } // Check id table if (options.idTable && variable.type === "id" && variable.idTable !== options.idTable) { return null } return expr } cleanSubqueryExpr(expr: SubqueryExpr, options: CleanExprOptions) { // Check if table is valid if (options.table && expr.table !== options.table) { return null } // Ensure that table exists if (expr.table && !this.schema.getTable(expr.table)) { return null } // If no from, is initial state so leave as is if (!expr.from) { return expr } // Ensure that from is valid if (!this.schema.getTable(expr.from)) { return null } // Determine if select expression is aggregate (as it doesn't need ordering) const selectExprType = this.exprUtils.getExprAggrStatus(expr.select) // Create inner expr cleaner with variables const innerExprCleaner = new ExprCleaner(this.schema.addVariables(this.exprUtils.createSubqueryInnerVariables(expr), {})) // Clean select return produce(expr, (draft) => { draft.select = innerExprCleaner.cleanExpr(expr.select, { table: expr.from, aggrStatuses: ["aggregate", "individual", "literal"] }) draft.where = innerExprCleaner.cleanExpr(expr.where, { table: expr.from, aggrStatuses: ["individual", "literal"] }) for (let i = 0; i < expr.orderBys.length; i++) { draft.orderBys[i].expr = innerExprCleaner.cleanExpr(expr.orderBys[i].expr, { table: expr.from, aggrStatuses: ["individual", "literal"] }) } for (let i = 0; i < expr.outerRefs.length; i++) { draft.outerRefs[i].expr = this.cleanExpr(expr.outerRefs[i].expr, { table: expr.table, aggrStatuses: ["individual", "literal"] }) } if (selectExprType === "aggregate") { draft.orderBys = [] } }) } cleanComparisonExpr(expr: LegacyComparisonExpr, options: CleanExprOptions) { // Upgrade to op let newExpr: Expr = { type: "op", table: expr.table, op: expr.op, exprs: [expr.lhs] } if (expr.rhs) { newExpr.exprs.push(expr.rhs) } // Clean sub-expressions to handle legacy literals newExpr.exprs = _.map(newExpr.exprs, (e) => this.cleanExpr(e)) // If = true if (expr.op === "= true") { newExpr = expr.lhs } if (expr.op === "= false") { newExpr = { type: "op", op: "not", table: expr.table, exprs: [expr.lhs] } } if ( expr.op === "between" && expr.rhs && expr.rhs.type === "literal" && (expr as any).rhs.valueType === "daterange" ) { ; (newExpr as OpExpr).exprs = [ expr.lhs, { type: "literal", valueType: "date", value: expr.rhs.value[0] }, { type: "literal", valueType: "date", value: expr.rhs.value[1] } ] } if ( expr.op === "between" && expr.rhs && expr.rhs.type === "literal" && (expr as any).rhs.valueType === "datetimerange" ) { // If date, convert datetime to date if (this.exprUtils.getExprType(expr.lhs) === "date") { ; (newExpr as OpExpr).exprs = [ expr.lhs, { type: "literal", valueType: "date", value: expr.rhs.value[0].substr(0, 10) }, { type: "literal", valueType: "date", value: expr.rhs.value[1].substr(0, 10) } ] } else { ; (newExpr as OpExpr).exprs = [ expr.lhs, { type: "literal", valueType: "datetime", value: expr.rhs.value[0] }, { type: "literal", valueType: "datetime", value: expr.rhs.value[1] } ] } } return this.cleanExpr(newExpr, options) } cleanLogicalExpr(expr: LegacyLogicalExpr, options: CleanExprOptions) { const newExpr: OpExpr = { type: "op", op: expr.op, table: expr.table, exprs: expr.exprs } return this.cleanExpr(newExpr, options) } cleanCountExpr(expr: LegacyCountExpr, options: CleanExprOptions) { const newExpr: IdExpr = { type: "id", table: expr.table } return this.cleanExpr(newExpr, options) } }