import { ParserOptionsSpec } from './ParserOptionsSpec'; import { computeSecondaryStructure } from "./utils/computeSecondaryStructure"; import { processSymmetries } from "./utils/processSymmetries"; import { conversionMatrix3, Matrix3, Matrix4, Vector3, } from "../WebGL" import { assignPDBBonds } from './utils/assignPDBBonds'; /** * Puts atoms specified in mmCIF fromat in str into atoms * * @param {string} str * @param {ParserOptionsSpec} options * @category Parsers */ export function CIF(str: string, options: ParserOptionsSpec = {}) { var atoms: any[] & Record = []; var noAssembly = !options.doAssembly; // don't assemble by default var modelData: any[] & Record = (atoms.modelData = []); var assignbonds = options.assignBonds === undefined ? true : options.assignBonds; //coordinate conversion var fractionalToCartesian = function (cmat: Matrix3, x: number, y: number, z: number) { return new Vector3(x, y, z).applyMatrix3(cmat); }; // Used to handle quotes correctly function splitRespectingQuotes(string: string, separator: string | any[]) { var sections: any[] = []; var sectionStart = 0; var sectionEnd = 0; while (sectionEnd < string.length) { while ( string.substring(sectionEnd, sectionEnd + separator.length) !== separator && sectionEnd < string.length ) { // currently does not support escaping quotes if (string[sectionEnd] === "'") { sectionEnd++; while (sectionEnd < string.length && string[sectionEnd] !== "'") { sectionEnd++; } //biopython apparently generates invalid string literals so if we think we are done but aren't at a separator keep going while (string.substring(sectionEnd, sectionEnd + separator.length) !== separator && sectionEnd < string.length) { sectionEnd++; } } else if (string[sectionEnd] === '"') { sectionEnd++; while (sectionEnd < string.length && string[sectionEnd] !== '"') { sectionEnd++; } sectionEnd++; } else { sectionEnd++; } } sections.push(string.substring(sectionStart, sectionEnd)); sectionStart = sectionEnd = sectionEnd + separator.length; } return sections; } var lines = str.split(/\r?\n|\r/); // Filter text to remove comments, trailing spaces, and empty lines var linesFiltered: string[] = []; var trimDisabled = false; for (let lineNum = 0; lineNum < lines.length; lineNum++) { // first remove comments // incorrect if #'s are allowed in strings // comments might only be allowed at beginning of line, not sure var line = lines[lineNum].split("#")[0]; // inside data blocks, the string must be left verbatim // datablocks are started with a ';' at the beginning of a line // and ended with a ';' on its own line. if (trimDisabled) { if (line[0] === ";") { trimDisabled = false; } } else { if (line[0] === ";") { trimDisabled = true; } } if (trimDisabled || line !== "") { if (!trimDisabled) { line = line.trim(); if (line[0] === "_") { // Replace dot separating category from data item with underscore. Dots aren't guarenteed, to makes // files consistent. var dot = line.split(/\s/)[0].indexOf("."); if (dot > -1) { let lineArr = line.split('') lineArr[dot] = "_"; line = lineArr.join(''); line = line.substring(0, dot) + "_" + line.substring(dot + 1); } } } linesFiltered.push(line); } } var lineNum = 0; while (lineNum < linesFiltered.length) { while ( !linesFiltered[lineNum].startsWith("data_") || linesFiltered[lineNum] === "data_global" ) { lineNum++; } lineNum++; // Process the lines and puts all of the data into an object. var mmCIF: Record = {}; while ( lineNum < linesFiltered.length && !linesFiltered[lineNum].startsWith("data_") ) { if (linesFiltered[lineNum][0] === undefined) { lineNum++; } else if (linesFiltered[lineNum][0] === "_") { var dataItemName = linesFiltered[lineNum].split(/\s/)[0].toLowerCase(); var dataItem = (mmCIF[dataItemName] = mmCIF[dataItemName] || []); // if nothing left on the line go to the next one var restOfLine = linesFiltered[lineNum].substring( linesFiltered[lineNum].indexOf(dataItemName) + dataItemName.length ); if (restOfLine === "") { lineNum++; if (linesFiltered[lineNum][0] === ";") { var dataBlock = linesFiltered[lineNum].substring(1); lineNum++; while (linesFiltered[lineNum] !== ";") { dataBlock = dataBlock + "\n" + linesFiltered[lineNum]; lineNum++; } dataItem.push(dataBlock); } else { dataItem.push(linesFiltered[lineNum]); } } else { dataItem.push(restOfLine.trim()); } lineNum++; } else if (linesFiltered[lineNum].substring(0, 5) === "loop_") { lineNum++; var dataItems: any[] = []; while ( linesFiltered[lineNum] === "" || linesFiltered[lineNum][0] === "_" ) { if (linesFiltered[lineNum] !== "") { let dataItemName = linesFiltered[lineNum] .split(/\s/)[0] .toLowerCase(); let dataItem = (mmCIF[dataItemName] = mmCIF[dataItemName] || []); dataItems.push(dataItem); } lineNum++; } var currentDataItem = 0; while ( lineNum < linesFiltered.length && linesFiltered[lineNum][0] !== "_" && !linesFiltered[lineNum].startsWith("loop_") && !linesFiltered[lineNum].startsWith("data_") ) { let line = splitRespectingQuotes(linesFiltered[lineNum], " "); for (var field = 0; field < line.length; field++) { if (line[field] !== "") { dataItems[currentDataItem].push(line[field]); currentDataItem = (currentDataItem + 1) % dataItems.length; } } lineNum++; } } else { lineNum++; } } modelData.push({ symmetries: [] }); // Pulls atom information out of the data atoms.push([]); var atomCount = mmCIF._atom_site_id !== undefined ? mmCIF._atom_site_id.length : mmCIF._atom_site_label.length; var conversionMatrix: Matrix3; if (mmCIF._cell_length_a !== undefined) { var a = parseFloat(mmCIF._cell_length_a); var b = parseFloat(mmCIF._cell_length_b); var c = parseFloat(mmCIF._cell_length_c); var alpha_deg = parseFloat(mmCIF._cell_angle_alpha) || 90; var beta_deg = parseFloat(mmCIF._cell_angle_beta) || 90; var gamma_deg = parseFloat(mmCIF._cell_angle_gamma) || 90; conversionMatrix = conversionMatrix3( a, b, c, alpha_deg, beta_deg, gamma_deg ); modelData[modelData.length - 1].cryst = { a: a, b: b, c: c, alpha: alpha_deg, beta: beta_deg, gamma: gamma_deg, }; } for (var i = 0; i < atomCount; i++) { if ( mmCIF._atom_site_group_pdb !== undefined && mmCIF._atom_site_group_pdb[i] === "TER" ) continue; var atom: Record = {}; if (mmCIF._atom_site_cartn_x !== undefined) { atom.x = parseFloat(mmCIF._atom_site_cartn_x[i]); atom.y = parseFloat(mmCIF._atom_site_cartn_y[i]); atom.z = parseFloat(mmCIF._atom_site_cartn_z[i]); } else { var coords = fractionalToCartesian( conversionMatrix, parseFloat(mmCIF._atom_site_fract_x[i]), parseFloat(mmCIF._atom_site_fract_y[i]), parseFloat(mmCIF._atom_site_fract_z[i]) ); atom.x = coords.x; atom.y = coords.y; atom.z = coords.z; } atom.chain = mmCIF._atom_site_auth_asym_id ? mmCIF._atom_site_auth_asym_id[i] : (mmCIF._atom_site_label_asym_id ? mmCIF._atom_site_label_asym_id[i] : undefined); atom.resi = mmCIF._atom_site_auth_seq_id ? parseInt(mmCIF._atom_site_auth_seq_id[i]) : (mmCIF._atom_site_label_seq_id ? mmCIF._atom_site_label_seq_id[i] : undefined); atom.resn = mmCIF._atom_site_auth_comp_id ? mmCIF._atom_site_auth_comp_id[i].trim() : (mmCIF._atom_site_label_comp_id ? mmCIF._atom_site_label_comp_id[i].trim() : undefined); atom.atom = mmCIF._atom_site_auth_atom_id ? mmCIF._atom_site_auth_atom_id[i].replace(/"/gm, "") : (mmCIF._atom_site_label_atom_id ? mmCIF._atom_site_label_atom_id[i].replace(/"/gm, "") : undefined); //"primed" names are in quotes atom.hetflag = !mmCIF._atom_site_group_pdb || mmCIF._atom_site_group_pdb[i] === "HETA" || mmCIF._atom_site_group_pdb[i] === "HETATM"; var elem = "X"; if (mmCIF._atom_site_type_symbol) { elem = mmCIF._atom_site_type_symbol[i].replace(/\(?\+?\d+.*/, ""); } else if (mmCIF._atom_site_label) { //first two components are concatenated, then separated by underscore //best I can do is assume second component, if present, starts with a number elem = mmCIF._atom_site_label[i].split("_")[0].replace(/\(?\d+.*/, ""); } atom.elem = elem[0].toUpperCase() + elem.substring(1, 2).toLowerCase(); atom.bonds = []; atom.ss = "c"; atom.serial = i; atom.bondOrder = []; atom.properties = {}; atoms[atoms.length - 1].push(atom); } if (mmCIF._pdbx_struct_oper_list_id !== undefined && !noAssembly) { for (let i = 0; i < mmCIF._pdbx_struct_oper_list_id.length; i++) { var matrix11 = parseFloat( mmCIF["_pdbx_struct_oper_list_matrix[1][1]"][i] ); var matrix12 = parseFloat( mmCIF["_pdbx_struct_oper_list_matrix[1][2]"][i] ); var matrix13 = parseFloat( mmCIF["_pdbx_struct_oper_list_matrix[1][3]"][i] ); var vector1 = parseFloat(mmCIF["_pdbx_struct_oper_list_vector[1]"][i]); var matrix21 = parseFloat( mmCIF["_pdbx_struct_oper_list_matrix[2][1]"][i] ); var matrix22 = parseFloat( mmCIF["_pdbx_struct_oper_list_matrix[2][2]"][i] ); var matrix23 = parseFloat( mmCIF["_pdbx_struct_oper_list_matrix[2][3]"][i] ); var vector2 = parseFloat(mmCIF["_pdbx_struct_oper_list_vector[2]"][i]); var matrix31 = parseFloat( mmCIF["_pdbx_struct_oper_list_matrix[3][1]"][i] ); var matrix32 = parseFloat( mmCIF["_pdbx_struct_oper_list_matrix[3][2]"][i] ); var matrix33 = parseFloat( mmCIF["_pdbx_struct_oper_list_matrix[3][3]"][i] ); var vector3 = parseFloat(mmCIF["_pdbx_struct_oper_list_vector[3]"][i]); var matrix = new Matrix4( matrix11, matrix12, matrix13, vector1, matrix21, matrix22, matrix23, vector2, matrix31, matrix32, matrix33, vector3 ); modelData[modelData.length - 1].symmetries.push(matrix); } } var parseTerm = function (term: string) { var negative = term.match("-"); term = term.replace(/[-xyz]/g, ""); var fractionParts = term.split("/"); var numerator: number, denominator: number; if (fractionParts[1] === undefined) { denominator = 1; } else { denominator = parseInt(fractionParts[1]); } if (fractionParts[0] === "") { numerator = 1; } else { numerator = parseInt(fractionParts[0]); } return (numerator / denominator) * (negative ? -1 : 1); }; if (mmCIF._symmetry_equiv_pos_as_xyz !== undefined && !noAssembly) { for (var sym = 0; sym < mmCIF._symmetry_equiv_pos_as_xyz.length; sym++) { var transform = mmCIF._symmetry_equiv_pos_as_xyz[sym].replace( /["' ]/g, "" ); var componentStrings = transform.split(",").map(function (val: string) { return val.replace(/-/g, "+-"); }); let matrix = new Matrix4( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 ); for (let coord = 0; coord < 3; coord++) { var terms = componentStrings[coord].split("+"); for (let t = 0; t < terms.length; t++) { var term = terms[t]; if (term === "") continue; var coefficient = parseTerm(term); if (term.match("x")) { matrix.elements[coord + 0] = coefficient; } else if (term.match("y")) { matrix.elements[coord + 4] = coefficient; } else if (term.match("z")) { matrix.elements[coord + 8] = coefficient; } else { matrix.elements[coord + 12] = coefficient; } } } var conversionMatrix4 = conversionMatrix.getMatrix4(); var conversionInverse = new Matrix4().getInverse( conversionMatrix4, true ); matrix = new Matrix4().multiplyMatrices( matrix, conversionInverse ); matrix = new Matrix4().multiplyMatrices( conversionMatrix4, matrix ); modelData[modelData.length - 1].symmetries.push(matrix); } } } for (let i = 0; i < atoms.length; i++) { if (assignbonds && !(options.duplicateAssemblyAtoms && !options.dontConnectDuplicatedAtoms)) { assignPDBBonds(atoms[i], options); } computeSecondaryStructure(atoms[i],options.hbondCutoff); processSymmetries( modelData[i].symmetries, atoms[i], options, modelData[i].cryst ); if ( options.duplicateAssemblyAtoms && !options.dontConnectDuplicatedAtoms && assignbonds ) assignPDBBonds(atoms[i],options); } return atoms; }