import axios, {AxiosError} from "axios"; import fetch from "cross-fetch"; import axiosRetry from "axios-retry"; const defaultColumns = `accession,id,gene_names,protein_name,organism_name,organism_id,length,xref_refseq,xref_geneid,xref_ensembl,go_id,go_p,go_c,go_f,cc_subcellular_location,ft_topo_dom,ft_carbohyd,mass,cc_mass_spectrometry,sequence,ft_var_seq,cc_alternative_products` const accRegex = /([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})(-(\d+))?/ const baseUrl: string = "https://rest.uniprot.org/idmapping/run" const checkStatusUrl: string = "https://rest.uniprot.org/idmapping/status/" export interface UniprotFieldFromGroup {displayName: string, name: string, uriLink: string, from: boolean, to: boolean, ruleId: number} export const getUniprotFromFields = () => { return axios.get("https://rest.uniprot.org/configure/idmapping/fields").then((res) => { return <{[key: string]: UniprotFieldFromGroup}>res.data.groups }) } export const uniprotSections: string[] = [ "Names & Taxonomy", "Sequences", "Function", "Miscellaneous", "Interaction", "Expression", "Gene Ontology (GO)", "Pathology & Biotech", "Subcellular Location", "PTM / Processing", "Structure", "Publications", "Date of", "Family & Domains", "Sequence Databases", "3D Structure Databases", "Protein-Protein Interaction Databases", "Chemistry Databases", "Protein Family/Group Databases", "PTM Databases", "Genetic Variation Databases", "2D Gel Databases", "Proteomic Databases", "Protocols And Materials Databases", "Genome Annotation Databases", "Organism-Specific Databases", "Phylogenomic Databases", "Enzyme And Pathway Databases", "Miscellaneous Databases", "Gene Expression Databases", "Family And Domain Databases" ] export const uniprotColumns: {label: string, fieldId: string, section: string}[] = [ {label: "Entry", fieldId: "accession", section: "Names & Taxonomy"}, {label: "Entry name", fieldId: "id", section: "Names & Taxonomy"}, {label: "Gene Names", fieldId: "gene_names", section: "Names & Taxonomy"}, {label: "Gene Names (primary)", fieldId: "gene_primary", section: "Names & Taxonomy"}, {label: "Gene Names (synonym)", fieldId: "gene_synonym", section: "Names & Taxonomy"}, {label: "Gene Names (ordered locus)", fieldId: "gene_oln", section: "Names & Taxonomy"}, {label: "Gene Names (ORF)", fieldId: "gene_orf", section: "Names & Taxonomy"}, {label: "Organism", fieldId: "organism_name", section: "Names & Taxonomy"}, {label: "Organism ID", fieldId: "organism_id", section: "Names & Taxonomy"}, {label: "Protein names", fieldId: "protein_name", section: "Names & Taxonomy"}, {label: "Proteomes", fieldId: "xref_proteomes", section: "Names & Taxonomy"}, {label: "Taxonomic lineage", fieldId: "lineage", section: "Names & Taxonomy"}, {label: "Taxonomic lineage IDs", fieldId: "lineage_ids", section: "Names & Taxonomy"}, {label: "Virus hosts", fieldId: "virus_hosts", section: "Names & Taxonomy"}, {label: "Alternative products", fieldId: "cc_alternative_products", section: "Sequences"}, {label: "Alternative sequence", fieldId: "ft_var_seq", section: "Sequences"}, {label: "Erroneous gene model prediction", fieldId: "error_gmodel_pred", section: "Sequences"}, {label: "Fragment", fieldId: "fragment", section: "Sequences"}, {label: "Gene encoded by", fieldId: "organelle", section: "Sequences"}, {label: "Length", fieldId: "length", section: "Sequences"}, {label: "Mass", fieldId: "mass", section: "Sequences"}, {label: "Mass spectrometry", fieldId: "cc_mass_spectrometry", section: "Sequences"}, {label: "Natural variant", fieldId: "ft_mod_res", section: "ft_variant"}, {label: "Non-adjacent residues", fieldId: "ft_non_cons", section: "Sequences"}, {label: "Non-standard residue", fieldId: "ft_non_std", section: "Sequences"}, {label: "Non-terminal residue", fieldId: "ft_non_ter", section: "Sequences"}, {label: "Polymorphism", fieldId: "cc_polymorphism", section: "Sequences"}, {label: "RNA editing", fieldId: "cc_seq_caution", section: "Sequences"}, {label: "Sequence", fieldId: "cc_rna_editing", section: "Sequences"}, {label: "Sequence caution", fieldId: "cc_seq_caution", section: "Sequences"}, {label: "Sequence conflict", fieldId: "ft_conflict", section: "Sequences"}, {label: "Sequence uncertainty", fieldId: "ft_unsure", section: "Sequences"}, {label: "Sequence version", fieldId: "sequence_version", section: "Sequences"}, {label: "Absorption", fieldId: "absorption", section: "Function"}, {label: "Active site", fieldId: "ft_act_site", section: "Function"}, {label: "Activity regulation", fieldId: "cc_activity_regulation", section: "Function"}, {label: "Binding site", fieldId: "ft_binding", section: "Function"}, {label: "Catalytic activity", fieldId: "cc_catalytic_activity", section: "Function"}, {label: "Cofactor", fieldId: "cc_cofactor", section: "Function"}, {label: "DNA binding", fieldId: "ft_dna_bind", section: "Function"}, {label: "EC number", fieldId: "ec", section: "Function"}, {label: "Function [CC]", fieldId: "cc_function", section: "Function"}, {label: "Kinetics", fieldId: "kinetics", section: "Function"}, {label: "Pathway", fieldId: "cc_pathway", section: "Function"}, {label: "pH dependence", fieldId: "ph_dependence", section: "Function"}, {label: "Redox potential", fieldId: "redox_potential", section: "Function"}, {label: "Rhea ID", fieldId: "rhea", section: "Function"}, {label: "Site", fieldId: "ft_site", section: "Function"}, {label: "Temperature dependence", fieldId: "temp_dependence", section: "Function"}, {label: "Annotation", fieldId: "annotation_score", section: "Miscellaneous"}, {label: "Caution", fieldId: "cc_caution", section: "Miscellaneous"}, {label: "Comment Count", fieldId: "comment_count", section: "Miscellaneous"}, {label: "Features", fieldId: "feature_count", section: "Miscellaneous"}, {label: "Keyword ID", fieldId: "keywordid", section: "Miscellaneous"}, {label: "Keywords", fieldId: "keyword", section: "Miscellaneous"}, {label: "Miscellaneous [CC]", fieldId: "cc_miscellaneous", section: "Miscellaneous"}, {label: "Protein existence", fieldId: "protein_existence", section: "Miscellaneous"}, {label: "Reviewed", fieldId: "reviewed", section: "Miscellaneous"}, {label: "Tools", fieldId: "tools", section: "Miscellaneous"}, {label: "UniParc", fieldId: "uniparc_id", section: "Miscellaneous"}, {label: "Interacts with", fieldId: "cc_interaction", section: "Interaction"}, {label: "Subunit structure[CC]", fieldId: "cc_subunit", section: "Interaction"}, {label: "Developmental stage", fieldId: "cc_developmental_stage", section: "Expression"}, {label: "Induction", fieldId: "cc_induction", section: "Expression"}, {label: "Tissue specificity", fieldId: "cc_tissue_specificity", section: "Expression"}, {label: "Gene ontology (biological process)", fieldId: "go_p", section: "Gene Ontology (GO)"}, {label: "Gene ontology (cellular component)", fieldId: "go_c", section: "Gene Ontology (GO)"}, {label: "Gene ontology (GO)", fieldId: "go", section: "Gene Ontology (GO)"}, {label: "Gene ontology (molecular function)", fieldId: "go_f", section: "Gene Ontology (GO)"}, {label: "Gene ontology IDs", fieldId: "go_id", section: "Gene Ontology (GO)"}, {label: "Allergenic properties", fieldId: "cc_allergen", section: "Pathology & Biotech"}, {label: "Biotechnological use", fieldId: "cc_biotechnology", section: "Pathology & Biotech"}, {label: "Disruption phenotype", fieldId: "cc_disruption_phenotype", section: "Pathology & Biotech"}, {label: "Involvement in disease", fieldId: "cc_disease", section: "Pathology & Biotech"}, {label: "Mutagenesis", fieldId: "ft_mutagen", section: "Pathology & Biotech"}, {label: "Pharmaceutical use", fieldId: "cc_pharmaceutical", section: "Pathology & Biotech"}, {label: "Toxic dose", fieldId: "cc_toxic_dose", section: "Pathology & Biotech"}, {label: "Intramembrane", fieldId: "ft_intramem", section: "Subcellular Location"}, {label: "Subcellular location[CC]", fieldId: "cc_subcellular_location", section: "Subcellular Location"}, {label: "Topological domain", fieldId: "ft_topo_dom", section: "Subcellular Location"}, {label: "Transmembrane", fieldId: "ft_transmem", section: "Subcellular Location"}, {label: "Chain", fieldId: "ft_chain", section: "PTM / Processing"}, {label: "Cross-link", fieldId: "ft_crosslnk", section: "PTM / Processing"}, {label: "Disulfide bond", fieldId: "ft_disulfid", section: "PTM / Processing"}, {label: "Glycosylation", fieldId: "ft_carbohyd", section: "PTM / Processing"}, {label: "Initiator methionine", fieldId: "ft_init_met", section: "PTM / Processing"}, {label: "Lipidation", fieldId: "ft_lipid", section: "PTM / Processing"}, {label: "Modified residue", fieldId: "ft_mod_res", section: "PTM / Processing"}, {label: "Peptide", fieldId: "ft_peptide", section: "PTM / Processing"}, {label: "Post-translational modification", fieldId: "cc_ptm", section: "PTM / Processing"}, {label: "Propeptide", fieldId: "ft_propep", section: "PTM / Processing"}, {label: "Signal peptide", fieldId: "ft_signal", section: "PTM / Processing"}, {label: "Transit peptide", fieldId: "ft_transit", section: "PTM / Processing"}, {label: "3D", fieldId: "structure_3d", section: "Structure"}, {label: "Beta strand", fieldId: "ft_strand", section: "Structure"}, {label: "Helix", fieldId: "ft_helix", section: "Structure"}, {label: "Turn", fieldId: "ft_turn", section: "Structure"}, {label: "PubMed ID", fieldId: "lit_pubmed_id", section: "Publications"}, {label: "Date of creation", fieldId: "date_created", section: "Date of"}, {label: "Date of last modification", fieldId: "date_modified", section: "Date of"}, {label: "Date of last sequence modification", fieldId: "date_sequence_modified", section: "Date of"}, {label: "Entry version", fieldId: "version", section: "Date of"}, {label: "Coiled coil", fieldId: "ft_coiled", section: "Family & Domains"}, {label: "Compositional bias", fieldId: "ft_compbias", section: "Family & Domains"}, {label: "Domain[CC]", fieldId: "cc_domain", section: "Family & Domains"}, {label: "Domain[FT]", fieldId: "ft_domain", section: "Family & Domains"}, {label: "Motif", fieldId: "ft_motif", section: "Family & Domains"}, {label: "Protein families", fieldId: "protein_families", section: "Family & Domains"}, {label: "Region", fieldId: "ft_region", section: "Family & Domains"}, {label: "Repeat", fieldId: "ft_repeat", section: "Family & Domains"}, {label: "Zinc finger", fieldId: "ft_zn_fing", section: "Family & Domains"}, {label: "CCDS", fieldId: "xref_ccds", section: "Sequence Databases"}, {label: "EMBL", fieldId: "xref_embl", section: "Sequence Databases"}, {label: "PIR", fieldId: "xref_pir", section: "Sequence Databases"}, {label: "RefSeq", fieldId: "xref_refseq", section: "Sequence Databases"}, {label: "AlphaFoldDB", fieldId: "xref_alphafolddb", section: "3D Structure Databases"}, {label: "BMRB", fieldId: "xref_bmrb", section: "3D Structure Databases"}, {label: "PCDDB", fieldId: "xref_pcddb", section: "3D Structure Databases"}, {label: "PDB", fieldId: "xref_pdb", section: "3D Structure Databases"}, {label: "PDBsum", fieldId: "xref_pdbsum", section: "3D Structure Databases"}, {label: "SASBDB", fieldId: "xref_sasbdb", section: "3D Structure Databases"}, {label: "SMR", fieldId: "xref_smr", section: "3D Structure Databases"}, {label: "BioGRID", fieldId: "xref_biogrid", section: "Protein-Protein Interaction Databases"}, {label: "CORUM", fieldId: "xref_corum", section: "Protein-Protein Interaction Databases"}, {label: "ComplexPortal", fieldId: "xref_complexportal", section: "Protein-Protein Interaction Databases"}, {label: "DIP", fieldId: "xref_dip", section: "Protein-Protein Interaction Databases"}, {label: "ELM", fieldId: "xref_elm", section: "Protein-Protein Interaction Databases"}, {label: "IntAct", fieldId: "xref_intact", section: "Protein-Protein Interaction Databases"}, {label: "MINT", fieldId: "xref_mint", section: "Protein-Protein Interaction Databases"}, {label: "STRING", fieldId: "xref_string", section: "Protein-Protein Interaction Databases"}, {label: "BindingDB", fieldId: "xref_bindingdb", section: "Chemistry Databases"}, {label: "ChEMBL", fieldId: "xref_chembl", section: "Chemistry Databases"}, {label: "DrugBank", fieldId: "xref_drugbank", section: "Chemistry Databases"}, {label: "DrugCentral", fieldId: "xref_drugcentral", section: "Chemistry Databases"}, {label: "GuidetoPHARMACOLOGY", fieldId: "xref_guidetopharmacology", section: "Chemistry Databases"}, {label: "SwissLipids", fieldId: "xref_swisslipids", section: "Chemistry Databases"}, {label: "Allergome", fieldId: "xref_allergome", section: "Protein Family/Group Databases"}, {label: "CAZy", fieldId: "xref_cazy", section: "Protein Family/Group Databases"}, {label: "CLAE", fieldId: "xref_clae", section: "Protein Family/Group Databases"}, {label: "ESTHER", fieldId: "xref_esther", section: "Protein Family/Group Databases"}, {label: "IMGT_GENE-DB", fieldId: "xref_imgt_gene-db", section: "Protein Family/Group Databases"}, {label: "MEROPS", fieldId: "xref_merops", section: "Protein Family/Group Databases"}, {label: "MoonDB", fieldId: "xref_moondb", section: "Protein Family/Group Databases"}, {label: "MoonProt", fieldId: "xref_moonprot", section: "Protein Family/Group Databases"}, {label: "PeroxiBase", fieldId: "xref_peroxibase", section: "Protein Family/Group Databases"}, {label: "REBASE", fieldId: "xref_rebase", section: "Protein Family/Group Databases"}, {label: "TCDB", fieldId: "xref_tcdb", section: "Protein Family/Group Databases"}, {label: "UniLectin", fieldId: "xref_unilectin", section: "Protein Family/Group Databases"}, {label: "CarbonylDB", fieldId: "xref_carbonyldb", section: "PTM Databases"}, {label: "DEPOD", fieldId: "xref_glyconnect", section: "PTM Databases"}, {label: "GlyCosmos", fieldId: "xref_glycosmos", section: "PTM Databases"}, {label: "GlyConnect", fieldId: "xref_depod", section: "PTM Databases"}, {label: "GlyGen", fieldId: "xref_glygen", section: "PTM Databases"}, {label: "MetOSite", fieldId: "xref_metosite", section: "PTM Databases"}, {label: "PhosphoSitePlus", fieldId: "xref_phosphositeplus", section: "PTM Databases"}, {label: "SwissPalm", fieldId: "xref_swisspalm", section: "PTM Databases"}, {label: "iPTMnet", fieldId: "xref_iptmnet", section: "PTM Databases"}, {label: "BioMuta", fieldId: "xref_biomuta", section: "Genetic Variation Databases"}, {label: "DMDM", fieldId: "xref_dmdm", section: "Genetic Variation Databases"}, {label: "dbSNP", fieldId: "xref_dbsnp", section: "Genetic Variation Databases"}, {label: "COMPLUYEAST-2DPAGE", fieldId: "xref_compluyeast-2dpage", section: "2D Gel Databases"}, {label: "DOSAC-COBS-2DPAGE", fieldId: "xref_dosac-cobs-2dpage", section: "2D Gel Databases"}, {label: "OGP", fieldId: "xref_ogp", section: "2D Gel Databases"}, {label: "SWISS-2DPAGE", fieldId: "xref_swiss-2dpage", section: "2D Gel Databases"}, {label: "UCD-2DPAGE", fieldId: "xref_ucd-2dpage", section: "2D Gel Databases"}, {label: "World-2DPAGE", fieldId: "xref_world-2dpage", section: "2D Gel Databases"}, {label: "CPTAC", fieldId: "xref_cptac", section: "Proteomic Databases"}, {label: "EPD", fieldId: "xref_epd", section: "Proteomic Databases"}, {label: "MassIVE", fieldId: "xref_massive", section: "Proteomic Databases"}, {label: "MaxQB", fieldId: "xref_maxqb", section: "Proteomic Databases"}, {label: "PRIDE", fieldId: "xref_pride", section: "Proteomic Databases"}, {label: "PaxDb", fieldId: "xref_paxdb", section: "Proteomic Databases"}, {label: "PeptideAtlas", fieldId: "xref_peptideatlas", section: "Proteomic Databases"}, {label: "ProMEX", fieldId: "xref_promex", section: "Proteomic Databases"}, {label: "ProteomicsDB", fieldId: "xref_proteomicsdb", section: "Proteomic Databases"}, {label: "TopDownProteomics", fieldId: "xref_topdownproteomics", section: "Proteomic Databases"}, {label: "jPOST", fieldId: "xref_jpost", section: "Proteomic Databases"}, {label: "ABCD", fieldId: "xref_abcd", section: "Protocols And Materials Databases"}, {label: "Antibodypedia", fieldId: "xref_antibodypedia", section: "Protocols And Materials Databases"}, {label: "CPTC", fieldId: "xref_cptc", section: "Protocols And Materials Databases"}, {label: "DNASU", fieldId: "xref_dnasu", section: "Protocols And Materials Databases"}, {label: "Ensembl", fieldId: "xref_ensembl", section: "Genome Annotation Databases"}, {label: "EnsemblBacteria", fieldId: "xref_ensemblbacteria", section: "Genome Annotation Databases"}, {label: "EnsemblFungi", fieldId: "xref_ensemblfungi", section: "Genome Annotation Databases"}, {label: "EnsemblMetazoa", fieldId: "xref_ensemblmetazoa", section: "Genome Annotation Databases"}, {label: "EnsemblPlants", fieldId: "xref_ensemblplants", section: "Genome Annotation Databases"}, {label: "EnsemblProtists", fieldId: "xref_ensemblprotists", section: "Genome Annotation Databases"}, {label: "GeneID", fieldId: "xref_geneid", section: "Genome Annotation Databases"}, {label: "Gramene", fieldId: "xref_gramene", section: "Genome Annotation Databases"}, {label: "KEGG", fieldId: "xref_kegg", section: "Genome Annotation Databases"}, {label: "MANE-Select", fieldId: "xref_mane-select", section: "Genome Annotation Databases"}, {label: "PATRIC", fieldId: "xref_patric", section: "Genome Annotation Databases"}, {label: "UCSC", fieldId: "xref_ucsc", section: "Genome Annotation Databases"}, {label: "VectorBase", fieldId: "xref_vectorbase", section: "Genome Annotation Databases"}, {label: "WBParaSite", fieldId: "xref_wbparasite", section: "Genome Annotation Databases"}, {label: "WBParaSiteTranscriptProtein", fieldId: "xref_wbparasitetranscriptprotein", section: "Genome Annotation Databases"}, {label: "AGR", fieldId: "xref_agr", section: "Organism-Specific Databases"}, {label: "ArachnoServer", fieldId: "xref_arachnoserver", section: "Organism-Specific Databases"}, {label: "CGD", fieldId: "xref_cgd", section: "Organism-Specific Databases"}, {label: "CTD", fieldId: "xref_ctd", section: "Organism-Specific Databases"}, {label: "ConoServer", fieldId: "xref_conoserver", section: "Organism-Specific Databases"}, {label: "DisGeNET", fieldId: "xref_disgenet", section: "Organism-Specific Databases"}, {label: "EchoBASE", fieldId: "xref_echobase", section: "Organism-Specific Databases"}, {label: "FlyBase", fieldId: "xref_flybase", section: "Organism-Specific Databases"}, {label: "GeneCards", fieldId: "xref_genecards", section: "Organism-Specific Databases"}, {label: "GeneReviews", fieldId: "xref_genereviews", section: "Organism-Specific Databases"}, {label: "HGNC", fieldId: "xref_hgnc", section: "Organism-Specific Databases"}, {label: "HPA", fieldId: "xref_hpa", section: "Organism-Specific Databases"}, {label: "LegioList", fieldId: "xref_legiolist", section: "Organism-Specific Databases"}, {label: "Leproma", fieldId: "xref_leproma", section: "Organism-Specific Databases"}, {label: "MGI", fieldId: "xref_mgi", section: "Organism-Specific Databases"}, {label: "MIM", fieldId: "xref_mim", section: "Organism-Specific Databases"}, {label: "MaizeGDB", fieldId: "xref_maizegdb", section: "Organism-Specific Databases"}, {label: "MalaCards", fieldId: "xref_malacards", section: "Organism-Specific Databases"}, {label: "NIAGADS", fieldId: "xref_niagads", section: "Organism-Specific Databases"}, {label: "OpenTargets", fieldId: "xref_opentargets", section: "Organism-Specific Databases"}, {label: "Orphanet", fieldId: "xref_orphanet", section: "Organism-Specific Databases"}, {label: "PharmGKB", fieldId: "xref_pharmgkb", section: "Organism-Specific Databases"}, {label: "PomBase", fieldId: "xref_pombase", section: "Organism-Specific Databases"}, {label: "PseudoCAP", fieldId: "xref_pseudocap", section: "Organism-Specific Databases"}, {label: "RGD", fieldId: "xref_rgd", section: "Organism-Specific Databases"}, {label: "SGD", fieldId: "xref_sgd", section: "Organism-Specific Databases"}, {label: "TAIR", fieldId: "xref_tair", section: "Organism-Specific Databases"}, {label: "TubercuList", fieldId: "xref_tuberculist", section: "Organism-Specific Databases"}, {label: "VEuPathDB", fieldId: "xref_veupathdb", section: "Organism-Specific Databases"}, {label: "VGNC", fieldId: "xref_vgnc", section: "Organism-Specific Databases"}, {label: "WormBase", fieldId: "xref_wormbase", section: "Organism-Specific Databases"}, {label: "Xenbase", fieldId: "xref_xenbase", section: "Organism-Specific Databases"}, {label: "ZFIN", fieldId: "xref_zfin", section: "Organism-Specific Databases"}, {label: "dictyBase", fieldId: "xref_dictybase", section: "Organism-Specific Databases"}, {label: "euHCVdb", fieldId: "xref_euhcvdb", section: "Organism-Specific Databases"}, {label: "neXtProt", fieldId: "xref_nextprot", section: "Organism-Specific Databases"}, {label: "GeneTree", fieldId: "xref_genetree", section: "Phylogenomic Databases"}, {label: "HOGENOM", fieldId: "xref_hogenom", section: "Phylogenomic Databases"}, {label: "InParanoid", fieldId: "xref_inparanoid", section: "Phylogenomic Databases"}, {label: "KO", fieldId: "xref_ko", section: "Phylogenomic Databases"}, {label: "OMA", fieldId: "xref_oma", section: "Phylogenomic Databases"}, {label: "OrthoDB", fieldId: "xref_orthodb", section: "Phylogenomic Databases"}, {label: "PhylomeDB", fieldId: "xref_phylomedb", section: "Phylogenomic Databases"}, {label: "TreeFam", fieldId: "xref_treefam", section: "Phylogenomic Databases"}, {label: "eggNOG", fieldId: "xref_eggnog", section: "Phylogenomic Databases"}, {label: "BRENDA", fieldId: "xref_brenda", section: "Enzyme And Pathway Databases"}, {label: "BioCyc", fieldId: "xref_biocyc", section: "Enzyme And Pathway Databases"}, {label: "PathwayCommons", fieldId: "xref_pathwaycommons", section: "Enzyme And Pathway Databases"}, {label: "PlantReactome", fieldId: "xref_plantreactome", section: "Enzyme And Pathway Databases"}, {label: "Reactome", fieldId: "xref_reactome", section: "Enzyme And Pathway Databases"}, {label: "SABIO-RK", fieldId: "xref_sabio-rk", section: "Enzyme And Pathway Databases"}, {label: "SIGNOR", fieldId: "xref_signor", section: "Enzyme And Pathway Databases"}, {label: "SinaLink", fieldId: "xref_sinalink", section: "Enzyme And Pathway Databases"}, {label: "UniPathway", fieldId: "xref_unipathway", section: "Enzyme And Pathway Databases"}, {label: "BioGRID-ORCS", fieldId: "xref_biogrid-orcs", section: "Miscellaneous Databases"}, {label: "ChiTaRS", fieldId: "xref_chitars", section: "Miscellaneous Databases"}, {label: "EvolutionaryTrace", fieldId: "xref_evolutionarytrace", section: "Miscellaneous Databases"}, {label: "GeneWiki", fieldId: "xref_genewiki", section: "Miscellaneous Databases"}, {label: "GenomeRNAi", fieldId: "xref_genomernai", section: "Miscellaneous Databases"}, {label: "PHI-base", fieldId: "xref_phi-base", section: "Miscellaneous Databases"}, {label: "PRO", fieldId: "xref_pro", section: "Miscellaneous Databases"}, {label: "Pharos", fieldId: "xref_pharos", section: "Miscellaneous Databases"}, {label: "RNAct", fieldId: "xref_rnact", section: "Miscellaneous Databases"}, {label: "Bgee", fieldId: "xref_bgee", section: "Gene Expression Databases"}, {label: "CleanEx", fieldId: "xref_cleanex", section: "Gene Expression Databases"}, {label: "CollecTF", fieldId: "xref_collectf", section: "Gene Expression Databases"}, {label: "ExpressionAtlas", fieldId: "xref_expressionatlas", section: "Gene Expression Databases"}, {label: "Genevisible", fieldId: "xref_genevisible", section: "Gene Expression Databases"}, {label: "CDD", fieldId: "xref_cdd", section: "Family And Domain Databases"}, {label: "Gene3D", fieldId: "xref_gene3d", section: "Family And Domain Databases"}, {label: "HAMAP", fieldId: "xref_hamap", section: "Family And Domain Databases"}, {label: "IDEAL", fieldId: "xref_ideal", section: "Family And Domain Databases"}, {label: "InterPro", fieldId: "xref_interpro", section: "Family And Domain Databases"}, {label: "PANTHER", fieldId: "xref_panther", section: "Family And Domain Databases"}, {label: "PIRSF", fieldId: "xref_pirsf", section: "Family And Domain Databases"}, {label: "PRINTS", fieldId: "xref_prints", section: "Family And Domain Databases"}, {label: "PROSITE", fieldId: "xref_prosite", section: "Family And Domain Databases"}, {label: "Pfam", fieldId: "xref_pfam", section: "Family And Domain Databases"}, {label: "ProDom", fieldId: "xref_prodom", section: "Family And Domain Databases"}, {label: "SFLD", fieldId: "xref_sfld", section: "Family And Domain Databases"}, {label: "SMART", fieldId: "xref_smart", section: "Family And Domain Databases"}, {label: "SUPFAM", fieldId: "xref_supfam", section: "Family And Domain Databases"}, {label: "TIGRFAMs", fieldId: "xref_tigrfams", section: "Family And Domain Databases"}, ] export class Parser { pollingInterval: number = 5; columns: string = defaultColumns; includeIsoform: boolean = false; format: string = "tsv"; resultUrl: ResultLink[] = []; from: string = "UniProtKB_AC-ID"; constructor(pollingInterval: number = 5, columns: string = defaultColumns, format: string = "tsv", includeIsoform:boolean = false, from: string = "UniProtKB_AC-ID") { this.pollingInterval = pollingInterval; this.columns = columns; this.includeIsoform = includeIsoform; this.format = format this.from = from; axiosRetry(axios, { retries: 3, retryDelay: axiosRetry.exponentialDelay, retryCondition: (error: AxiosError) => { return error.response?.status === 500; } }); } async *parse(ids: string[], segment: number = 10000) { const total = ids.length; `` for (let i = 0; i < total; i += segment) { try { const currentSegment = ids.slice(i, i + segment); const res = await axios.post( baseUrl, "ids=" + currentSegment.join(",") + "&from=" + this.from + "&to=UniProtKB", { headers: {"Content-Type": "application/x-www-form-urlencoded", "Accept": "application/json"}, responseType: "json" } ) this.resultUrl.push(new ResultLink(checkStatusUrl + res.data.jobId, this.pollingInterval, i, 5, currentSegment, this.from)); } catch (e: any) { console.log(e) if (e.response.status === 500) { const currentSegment = ids.slice(i, i + segment); const res = await axios.post( baseUrl, "ids=" + currentSegment.join(",") + "&from=UniProtKB_AC-ID&to=UniProtKB", { headers: {"Content-Type": "application/x-www-form-urlencoded", "Accept": "application/json"}, responseType: "json" } ) this.resultUrl.push(new ResultLink(checkStatusUrl + res.data.jobId, this.pollingInterval, i, 5, currentSegment, "UniProtKB_AC-ID")); } } } for await (const result of this.getResult()) { yield result; } } async resubmitJob(resultLink: ResultLink): Promise { try { console.log(`Resubmitting job for segment ${resultLink.segment} with ${resultLink.ids.length} IDs`); const res = await axios.post( baseUrl, "ids=" + resultLink.ids.join(",") + "&from=" + resultLink.from + "&to=UniProtKB", { headers: {"Content-Type": "application/x-www-form-urlencoded", "Accept": "application/json"}, responseType: "json" } ) return res.data.jobId; } catch (e: any) { console.error("Failed to resubmit job:", e); if (e.response && e.response.status === 500) { try { console.log("Retrying with fallback from parameter"); const res = await axios.post( baseUrl, "ids=" + resultLink.ids.join(",") + "&from=UniProtKB_AC-ID&to=UniProtKB", { headers: {"Content-Type": "application/x-www-form-urlencoded", "Accept": "application/json"}, responseType: "json" } ) return res.data.jobId; } catch (fallbackError) { console.error("Fallback resubmission also failed:", fallbackError); return null; } } return null; } } async *getResult(): AsyncGenerator<{data:string, total: number, segment: number}> { for await (const {url, segment} of this.getResultStatus()) { let baseData: any baseData = { "format": this.format, "size": 500, "fields": this.columns, "includeIsoform": this.includeIsoform ? "true" : "false" } let params: string[] = [] Object.keys(baseData).forEach(key => {params.push(`${key}=${baseData[key]}`)}) let nextUrl = undefined; try { const res = await axios.get(url+"?"+params.join("&"), {responseType: "text"}); // @ts-ignore yield {data: res.data, total: parseInt(res.headers.get("x-total-results")), segment: segment} // @ts-ignore nextUrl = res.headers.get("link") } catch (e) { console.log(e) } while (true) { if (nextUrl !== undefined && nextUrl !== null) { const match = /<(.*)>/.exec(nextUrl); if (match) { const url = match[1]; console.log("Next URL: " + url) const resNext = await axios.get(url, {responseType: "text"}); // @ts-ignore nextUrl = resNext.headers.get("link") // @ts-ignore yield {data: resNext.data, total: parseInt(resNext.headers.get("x-total-results")), segment: segment} await new Promise(r => setTimeout(r, 1000)); } } else { break; } } } } async *getResultStatus(): AsyncGenerator<{ url: string, segment: number }> { let complete = this.resultUrl.length; while (complete > 0) { for (let i = 0; i < this.resultUrl.length; i++) { if (!this.resultUrl[i].completed) { try { console.log("Getting status for " + this.resultUrl[i].url); const res = await fetch(this.resultUrl[i].url, { method: "GET", headers: { "Accept": "application/json" }, mode: "cors" }); console.log("Response status: " + res.status); if (res.status === 0) { console.error("Request failed. Possible reasons: Network error, CORS issue, request aborted, or timeout."); } else if (res.status === 303) { const location = res.headers.get("Location"); if (location) { console.log("Redirect location: " + location); const redirectRes = await fetch(location, { method: "GET", headers: { "Accept": "application/json" }, mode: "cors" }); console.log("Redirect response status: " + redirectRes.status); if (redirectRes.status === 200) { this.resultUrl[i].completed = true; complete--; console.log("Final URL: " + redirectRes.url); yield { url: redirectRes.url, segment: this.resultUrl[i].segment }; } } } else if (res.status === 400) { console.log("Error: Incorrect URL"); this.resultUrl[i].completed = true; complete--; } else if (res.status === 200) { if (!res.url.startsWith(checkStatusUrl)) { this.resultUrl[i].completed = true; complete--; console.log("Final URL: " + res.url); yield { url: res.url, segment: this.resultUrl[i].segment }; } else { const responseData = await res.json(); if (responseData.jobStatus === "ERROR") { console.error("Job returned ERROR status:", responseData.errors); this.resultUrl[i].retryCount++; if (this.resultUrl[i].retryCount >= this.resultUrl[i].maxRetries) { console.error(`Max retries (${this.resultUrl[i].maxRetries}) reached for segment ${this.resultUrl[i].segment}. Skipping.`); this.resultUrl[i].completed = true; complete--; } else { const newJobId = await this.resubmitJob(this.resultUrl[i]); if (newJobId) { console.log(`Job resubmitted successfully. New job ID: ${newJobId}`); this.resultUrl[i].url = checkStatusUrl + newJobId; } else { console.error("Failed to resubmit job. Marking as completed."); this.resultUrl[i].completed = true; complete--; } } } } } else { console.log("Polling again in " + this.resultUrl[i].pollInterval + " seconds"); } } catch (e) { console.error("Error during fetch:", e); } } } if (complete > 0) { await new Promise(r => setTimeout(r, 1000 * this.resultUrl[0].pollInterval)); } } } } class ResultLink { url: string; completed: boolean = false; pollInterval: number = 5; segment: number = 0; retryCount: number = 0; maxRetries: number = 5; ids: string[] = []; from: string = ""; constructor(url: string, pollInterval: number=5, segment: number=0, maxRetries: number=5, ids: string[] = [], from: string = "") { this.url = url; this.pollInterval = pollInterval; this.segment = segment; this.maxRetries = maxRetries; this.ids = ids; this.from = from; } } export class Accession { rawAcc: string = ""; acc: string = ""; isoform: string = ""; constructor(rawAcc: string, parseAcc=false) { this.rawAcc = rawAcc; if (parseAcc) { const match = accRegex.exec(rawAcc); if (match) { this.acc = match[1] this.isoform = match[4] ? match[4] : "" } } } toString() { if (this.isoform !== "") { return this.acc + "-" + this.isoform; } return this.acc; } }