/* This file is part of node-sword-interface.

   Copyright (C) 2019 - 2026 Tobias Klein <contact@tklein.info>

   node-sword-interface is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 2 of the License, or
   (at your option) any later version.

   node-sword-interface is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of 
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
   See the GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with node-sword-interface. See the file COPYING.
   If not, see <http://www.gnu.org/licenses/>. */

// Std includes
#include <sstream>
#include <string>
#include <regex>
#include <iomanip>

#if defined(__APPLE__)
#include <TargetConditionals.h>
#endif

// Sword includes
#include <versekey.h>
#include <listkey.h>
#include <swmodule.h>
#include <swkey.h>
#include <versificationmgr.h>

// Own includes
#include "text_processor.hpp"
#include "module_store.hpp"
#include "module_helper.hpp"
#include "string_helper.hpp"
#include "strongs_entry.hpp"

using namespace std;
using namespace sword;

TextProcessor::TextProcessor(ModuleStore& moduleStore, ModuleHelper& moduleHelper)
    : _moduleStore(moduleStore), _moduleHelper(moduleHelper)
{
    this->_markupEnabled = false;
    this->_strongsWithNbspEnabled = false;
    this->_rawMarkupEnabled = false;
}

string TextProcessor::getFileUrl(const string& nativePath)
{
    if (nativePath.empty()) {
        return "";
    }

    string path = nativePath;

    // Strip trailing slash or backslash
    if (!path.empty() && (path.back() == '/' || path.back() == '\\')) {
        path.pop_back();
    }

    // Convert backslashes to forward slashes for URL compatibility
    static regex backslash("\\\\");
    path = regex_replace(path, backslash, "/");

    // Build file:// URL (Windows needs extra slash for drive letter)
#if _WIN32
    return "file:///" + path;
#elif TARGET_OS_IOS
    return "app://localhost/_app_file_/" + path;
#else
    return "file://" + path;
#endif
}

string TextProcessor::getFilteredText(const string& text, int chapter, int verseNr, bool hasStrongs, bool hasInconsistentClosingEndDivs, const string& moduleFileUrl, bool hasThMLVariants)
{
    static string chapterFilter = "<chapter";

    static string lbBeginParagraph = "<lb type=\"x-begin-paragraph\"/>";
    static string lbEndParagraph = "<lb type=\"x-end-paragraph\"/>";
    static string emptyParagraphElement = "<p/>";
    static string lbElementFilter = "<lb ";
    static string lElementFilter = "<l ";
    static string lgElementFilter = "<lg ";
    static string noteStartElementFilter = "<note";
    static string noteEndElementFilter = "</note>";
    static string headStartElementFilter = "<head";
    static string headEndElementFilter = "</head>";
    static string appStartElementFilter = "<app";
    static string appEndElementFilter = "</app>";
    static string scripRefStartElementFilter = "<scripRef";
    static string scripRefEndElementFilter = "</scripRef>";
    static string rtxtStartElementFilter1 = "<rtxt type=";
    static string rtxtStartElementFilter2 = "<rtxt rend=";
    static string rtxtEndElementFilter = "</rtxt>";
    static string pbElementFilter = "<pb";
    static string quoteJesusElementFilter = "<q marker=\"\" who=\"Jesus\">";
    static string quoteElementFilter = "<q ";
    static string quoteEndElementFilter = "</q>";
    static string titleStartElementFilter = "<title";
    static string titleEndElementFilter = "</title>";
    static string segEndElementFilter = "</seg>";
    static string divTitleElementFilter = "<div class=\"title\"";
    static string secHeadClassFilter = "class=\"sechead\"";
    static string divMilestoneFilter = "<div type=\"x-milestone\"";
    static string xBrFilter = "x-br\"/>";
    static string divSIDFilter = "<div sID=";
    static string divEIDFilter = "<div eID=";
    static string divineNameStartElement = "<divineName>";
    static string divineNameEndElement = "</divineName>";
    static string strongsWElement = "<w lemma=";
    static string listStartElement = "<list";
    static string listEndElement = "</list>";
    static string itemStartElement = "<item";
    static string itemEndElement = "</item>";
    static string hiBold = "<hi type=\"bold";
    static string hiItalic = "<hi type=\"italic";
    static string hiSuper = "<hi type=\"super";

    static string fullStopWithoutSpace = ".<";
    static string questionMarkWithoutSpace = "?<";
    static string exclamationMarkWithoutSpace = "!<";
    static string commaWithoutSpace = ",<";
    static string semiColonWithoutSpace = ";<";
    static string colonWithoutSpace = ":<";

    string filteredText = text;
    
    // Remove the first pbElement, because it prevents correctly replacing the first note in the next step
    this->removePbElementsWithSpace(filteredText);

    // Remove <note type="variant"> if it occurs in the beginning of the verse (applicable for NA28), because it has
    // been observed that the note is not properly closed.
    static string noteTypeVariant = "<note type=\"variant\">";
    if (StringHelper::hasBeginning(filteredText, noteTypeVariant)) {
        filteredText.replace(0, noteTypeVariant.length(), "");
    }

    this->findAndReplaceAll(filteredText, chapterFilter, "<chapter class=\"sword-markup sword-chapter\"");
    this->findAndReplaceAll(filteredText, lbBeginParagraph, "");
    this->findAndReplaceAll(filteredText, lbEndParagraph, "&nbsp;<div class=\"sword-markup sword-paragraph-end\"><br></div>");
    this->findAndReplaceAll(filteredText, emptyParagraphElement, "<br/><br/>");
    this->findAndReplaceAll(filteredText, lbElementFilter, "<div class=\"sword-markup sword-lb\" ");
    this->findAndReplaceAll(filteredText, lElementFilter, "<div class=\"sword-markup sword-l\" ");
    this->findAndReplaceAll(filteredText, lgElementFilter, "<div class=\"sword-markup sword-lg\" ");
    this->findAndReplaceAll(filteredText, noteStartElementFilter, "<div class=\"sword-markup sword-note\" ");
    this->findAndReplaceAll(filteredText, noteEndElementFilter, "</div>");
    this->findAndReplaceAll(filteredText, headStartElementFilter, "<div class=\"sword-markup sword-head\" ");
    this->findAndReplaceAll(filteredText, headEndElementFilter, "</div>");
    this->findAndReplaceAll(filteredText, appStartElementFilter, "<div class=\"sword-markup sword-app\" ");
    this->findAndReplaceAll(filteredText, appEndElementFilter, "</div>");
    this->findAndReplaceAll(filteredText, scripRefStartElementFilter, "<div class=\"sword-markup sword-scripref\" ");
    this->findAndReplaceAll(filteredText, scripRefEndElementFilter, "</div>");
    this->findAndReplaceAll(filteredText, rtxtStartElementFilter1, "<div class=\"sword-markup sword-rtxt\" type=");
    this->findAndReplaceAll(filteredText, rtxtStartElementFilter2, "<div class=\"sword-markup sword-rtxt\" rend=");
    this->findAndReplaceAll(filteredText, rtxtEndElementFilter, "</div>");
    this->findAndReplaceAll(filteredText, pbElementFilter, "<pb class=\"sword-markup sword-pb\"");
    
    this->replaceMilestoneLineElements(filteredText);
    this->removeMilestoneElements(filteredText);
    this->removeSegStartElements(filteredText);
    this->removeDivSectionElements(filteredText);

    stringstream sectionTitleElement;
    sectionTitleElement << "<div class=\"sword-markup sword-section-title\" ";
    sectionTitleElement << "chapter=\"" << chapter << "\" ";
    sectionTitleElement << "verse=\"" << verseNr << "\"";
    this->findAndReplaceAll(filteredText, titleStartElementFilter, sectionTitleElement.str());
    this->findAndReplaceAll(filteredText, divTitleElementFilter, sectionTitleElement.str());

    stringstream secHead;
    secHead << "class=\"sword-markup sword-section-title\" ";
    secHead << "chapter=\"" << chapter << "\"";
    this->findAndReplaceAll(filteredText, secHeadClassFilter, secHead.str());

    if (filteredText.find("subType=\"x-Chapter") != string::npos ||
        filteredText.find("type=\"chapter") != string::npos) {

      static string swordSectionTitle = "sword-section-title";
      static string swordSectionTitleChapter = "sword-section-title sword-chapter-title";
      this->findAndReplaceAll(filteredText, swordSectionTitle, swordSectionTitleChapter);
    }

    this->findAndReplaceAll(filteredText, titleEndElementFilter, "</div>");
    this->findAndReplaceAll(filteredText, segEndElementFilter, "");
    this->findAndReplaceAll(filteredText, divMilestoneFilter, "<div class=\"sword-markup sword-x-milestone\"");
    this->findAndReplaceAll(filteredText, xBrFilter, "x-br\"/> ");
    this->findAndReplaceAll(filteredText, divSIDFilter, "<div class=\"sword-markup sword-sid\" sID=");
    this->findAndReplaceAll(filteredText, divEIDFilter, "<div class=\"sword-markup sword-eid\" eID=");
    this->findAndReplaceAll(filteredText, quoteJesusElementFilter, "<div class=\"sword-markup sword-quote-jesus\">");
    this->findAndReplaceAll(filteredText, quoteElementFilter, "<div class=\"sword-markup sword-quote\" ");
    this->findAndReplaceAll(filteredText, quoteEndElementFilter, "</div>");
    this->findAndReplaceAll(filteredText, divineNameStartElement, "");
    this->findAndReplaceAll(filteredText, divineNameEndElement, "");
    this->findAndReplaceAll(filteredText, strongsWElement, "<w class=");
    this->findAndReplaceAll(filteredText, listStartElement, "<ul");
    this->findAndReplaceAll(filteredText, listEndElement, "</ul>");
    this->findAndReplaceAll(filteredText, itemStartElement, "<li");
    this->findAndReplaceAll(filteredText, itemEndElement, "</li>");
    this->findAndReplaceAll(filteredText, hiBold, "<hi class=\"bold");
    this->findAndReplaceAll(filteredText, hiItalic, "<hi class=\"italic");
    this->findAndReplaceAll(filteredText, hiSuper, "<hi class=\"super");

    this->expandSelfClosingElements(filteredText);
    if (hasThMLVariants) {
        this->normalizeVariantClasses(filteredText);
    }

    this->findAndReplaceAll(filteredText, fullStopWithoutSpace, ". <");
    this->findAndReplaceAll(filteredText, questionMarkWithoutSpace, "? <");
    this->findAndReplaceAll(filteredText, exclamationMarkWithoutSpace, "! <");
    this->findAndReplaceAll(filteredText, commaWithoutSpace, ", <");
    this->findAndReplaceAll(filteredText, semiColonWithoutSpace, "; <");
    this->findAndReplaceAll(filteredText, colonWithoutSpace, ": <");

    if (hasInconsistentClosingEndDivs) {
        int numberOfOpeningDivs = StringHelper::numberOfSubstrings(filteredText, "<div");
        int numberOfClosingDivs = StringHelper::numberOfSubstrings(filteredText, "</div>");

        // Remove the last closing div(s) if the number of closing divs is higher than the number of opening divs
        if (numberOfClosingDivs > numberOfOpeningDivs) {
            const string closingDiv = "</div>";
            unsigned int diff = numberOfClosingDivs - numberOfOpeningDivs;

            for (unsigned int i = 0; i < diff; i++) {
                size_t lastClosingDivOffset = filteredText.rfind(closingDiv);
                filteredText.erase(lastClosingDivOffset, closingDiv.length());
            }
        }

        // Add closing div(s) if the number of closing divs is smaller than the number of opening divs
        if (numberOfClosingDivs < numberOfOpeningDivs) {
            const string closingDiv = "</div>";
            unsigned int diff = numberOfOpeningDivs - numberOfClosingDivs;

            for (unsigned int i = 0; i < diff; i++) {
                size_t lastClosingDivOffset = filteredText.rfind(closingDiv);
                filteredText.insert(lastClosingDivOffset + closingDiv.length(), closingDiv);
            }
        }
    }

    if (hasStrongs && this->_strongsWithNbspEnabled) {
        filteredText = this->replaceSpacesInStrongs(filteredText);
    }

    // Prefix img src attributes starting with "/" with the module file URL
    this->processImageUrls(filteredText, moduleFileUrl);

    return filteredText;
}

string TextProcessor::getCurrentChapterHeading(sword::SWModule* module, const string& moduleFileUrl, bool hasThMLVariants)
{
    string currentModuleName = string(module->getName());
    string chapterHeading = "";
    VerseKey currentVerseKey = module->getKey();
    int currentChapter = currentVerseKey.getChapter();
    int currentVerseNr = currentVerseKey.getVerse();

    if (currentVerseKey.getVerse() == 1) { // X:1, set key to X:0
        // Include chapter/book/testament/module intros
        currentVerseKey.setIntros(true);

        // For chapter 1, always skip chapter 1:0 content here.
        // getBookIntroduction() now always includes chapter 1:0 as part of the book intro,
        // so we must not duplicate it here.
        if (currentChapter == 1) {
            return "";
        }

        currentVerseKey.setVerse(0);
        module->setKey(currentVerseKey);
        
        chapterHeading = string(module->getRawEntry());
        StringHelper::trim(chapterHeading);

        // Set verse back to X:1
        currentVerseKey.setVerse(1);
        module->setKey(currentVerseKey);
    }

    if (this->_markupEnabled && !this->_rawMarkupEnabled) {
        if (currentModuleName == "ISV" && currentChapter == 1 && currentVerseNr == 1) {
            // The chapter headings in the ISV (International Standard Version) are screwed up somehow for 1:1
            // Therefore we do not render chapter headings for the first verse of the book in this case.
            chapterHeading = "";
        } else if (currentModuleName == "AB" && currentVerseNr == 1) {
            // The AB module (Apostle's Bible) has broken chapter headings for X:1
            // Therefore we do not render chapter headings for the first verse of the chapter in this case.
            chapterHeading = "";
        } else {
            chapterHeading = this->getFilteredText(chapterHeading, currentChapter, currentVerseNr, false, false, moduleFileUrl, hasThMLVariants);
        }
    }

    return chapterHeading;
}

string TextProcessor::getCurrentVerseText(sword::SWModule* module, bool hasStrongs, bool hasInconsistentClosingEndDivs, bool forceNoMarkup)
{
    string moduleFileUrl = this->getFileUrl(this->_moduleStore.getModuleDataPath(module));
    bool hasThMLVariants = this->_moduleHelper.moduleHasGlobalOption(module, "ThMLVariants");
    return this->getCurrentVerseText(module, hasStrongs, hasInconsistentClosingEndDivs, forceNoMarkup, moduleFileUrl, hasThMLVariants);
}

string TextProcessor::getCurrentVerseText(sword::SWModule* module, bool hasStrongs, bool hasInconsistentClosingEndDivs, bool forceNoMarkup, const string& moduleFileUrl, bool hasThMLVariants)
{
    string verseText;
    string filteredText;

    if (this->_markupEnabled && !forceNoMarkup) {
        VerseKey currentVerseKey = module->getKey();
        int currentChapter = currentVerseKey.getChapter();
        int currentVerseNr = currentVerseKey.getVerse();
        
        verseText = string(module->getRawEntry());
        StringHelper::trim(verseText);
        filteredText = verseText;

        if (!this->_rawMarkupEnabled) {
            filteredText = this->getFilteredText(verseText, currentChapter, currentVerseNr, hasStrongs, hasInconsistentClosingEndDivs, moduleFileUrl, hasThMLVariants);
        }
    } else {
        verseText = string(module->stripText());
        StringHelper::trim(verseText);
        filteredText = verseText;
    }
    
    return filteredText;
}

vector<Verse> TextProcessor::getBibleText(string moduleName)
{
    return this->getText(moduleName, "Gen 1:1");
}

Verse TextProcessor::getReferenceText(std::string moduleName, std::string reference)
{
    SWModule* module = this->_moduleStore.getLocalModule(moduleName);
    module->setKey(reference.c_str());
    bool entryExisting = module->hasEntry(module->getKey());

    if (entryExisting) {
        vector<Verse> verses = this->getText(moduleName, reference, QueryLimit::book, -1, 1);
        verses[0].absoluteVerseNumber = -1;
        return verses[0];
    } else {
        Verse verse;
        verse.reference = reference;
        verse.absoluteVerseNumber = -1;
        verse.content = "";
        return verse;
    }
}

vector<Verse> TextProcessor::getBookText(string moduleName, string bookCode, int startVerseNumber, int verseCount)
{
    stringstream key;
    key << bookCode;
    key << " 1:1";

    return this->getText(moduleName, key.str(), QueryLimit::book, startVerseNumber, verseCount);
}

vector<Verse> TextProcessor::getChapterText(string moduleName, string bookCode, int chapter)
{
    stringstream key;
    key << bookCode << " " << chapter << ":1";

    return this->getText(moduleName, key.str(), QueryLimit::chapter);
}

string TextProcessor::getBookFromReference(string reference)
{
    VerseKey key(reference.c_str());
    return string(key.getOSISBookName());
}

vector<string> TextProcessor::getBookListFromReferences(vector<string>& references)
{
    vector<string> bookList;

    for (unsigned int i = 0; i < references.size(); i++) {
        string currentReference = references[i];
        string book = this->getBookFromReference(currentReference);

        if (find(bookList.begin(), bookList.end(), book) == bookList.end()) {
            bookList.push_back(book);
        }
    }

    return bookList;
}

vector<Verse> TextProcessor::getVersesFromReferences(string moduleName, vector<string>& references)
{
    vector<Verse> verses;
    SWModule* module = this->_moduleStore.getLocalModule(moduleName);
    vector<string> bookList = this->getBookListFromReferences(references);
    map<string, int> absoluteVerseNumbers = this->_moduleHelper.getAbsoluteVerseNumberMap(module, bookList);
    bool moduleMarkupIsBroken = this->_moduleHelper.isBrokenMarkupModule(moduleName);
    bool hasInconsistentClosingEndDivs = this->_moduleHelper.isInconsistentClosingEndDivModule(moduleName);
    bool hasThMLVariants = this->_moduleHelper.moduleHasGlobalOption(module, "ThMLVariants");

    // Compute file URL once for the entire module
    string moduleFileUrl = this->getFileUrl(this->_moduleStore.getModuleDataPath(module));

    for (unsigned int i = 0; i < references.size(); i++) {
        string currentReference = references[i];
        string currentVerseText = "";

        module->setKey(currentReference.c_str());
        bool entryExisting = module->hasEntry(module->getKey());

        if (entryExisting) {
                    currentVerseText = this->getCurrentVerseText(module, false, hasInconsistentClosingEndDivs, moduleMarkupIsBroken, moduleFileUrl, hasThMLVariants);
        }

        Verse currentVerse;
        currentVerse.reference = module->getKey()->getShortText();
        currentVerse.absoluteVerseNumber = absoluteVerseNumbers[currentVerse.reference];
        currentVerse.content = currentVerseText;
        verses.push_back(currentVerse);
    }

    return verses;
}

vector<string> TextProcessor::getReferencesFromReferenceRange(std::string referenceRange)
{
    vector<string> references;
    VerseKey parser;
    ListKey result;

    result = parser.parseVerseList(referenceRange.c_str(), parser, true);

    for (result = TOP; !result.popError(); result++) {
        VerseKey currentKey(result.getShortText());
        references.push_back(string(currentKey.getOSISRef()));
    }

    return references;
}

vector<Verse> TextProcessor::getText(string moduleName, string key, QueryLimit queryLimit, int startVerseNumber, int verseCount)
{
    SWModule* module = this->_moduleStore.getLocalModule(moduleName);
    string lastKey;
    int index = 0;
    string lastBookName = "";
    int lastChapter = -1;
    bool currentBookExisting = true;
    bool moduleMarkupIsBroken = this->_moduleHelper.isBrokenMarkupModule(moduleName);
    bool hasInconsistentClosingEndDivs = this->_moduleHelper.isInconsistentClosingEndDivModule(moduleName);

    // This holds the text that we will return
    vector<Verse> text;

    if (module == 0) {
        cerr << "getLocalModule returned zero pointer for " << moduleName << endl;
    } else {
        bool hasStrongs = this->_moduleHelper.moduleHasGlobalOption(module, "Strongs");
        bool hasThMLVariants = this->_moduleHelper.moduleHasGlobalOption(module, "ThMLVariants");

        // Compute file URL once for the entire module
        string moduleFileUrl = this->getFileUrl(this->_moduleStore.getModuleDataPath(module));

        module->setKey(key.c_str());

        if (startVerseNumber >= 1) {
          module->increment(startVerseNumber - 1);
        } else {
          startVerseNumber = 1;
        }
        
        for (;;) {
            VerseKey currentVerseKey(module->getKey());
            string currentBookName(currentVerseKey.getBookAbbrev());
            int currentChapter = currentVerseKey.getChapter();
            bool firstVerseInBook = false;
            bool firstVerseInChapter = (currentVerseKey.getVerse() == 1);
            string verseText = "";
            string currentKey(module->getKey()->getShortText());

            // Stop, once the newly read key is the same as the previously read key
            if (currentKey == lastKey) { break; }
            // Stop, once the newly ready key is a different book than the previously read key
            if (queryLimit == QueryLimit::book && (index > 0) && (currentBookName != lastBookName)) { break; }
            // Stop, once the newly ready key is a different chapter than the previously read key
            if (queryLimit == QueryLimit::chapter && (index > 0) && (currentChapter != lastChapter)) { break; }
            // Stop once the maximum number of verses is reached
            if (startVerseNumber >= 1 && verseCount >= 1 && (index == verseCount)) { break; }

            if (currentBookName != lastBookName) {
                currentBookExisting = true;
                firstVerseInBook = true;
            }

            // Chapter heading
            // We only add it when we're looking at the first verse of a chapter
            // and if the module markup is not broken
            // and if the requested verse count is more than one or the default (-1 / all verses).
            if (firstVerseInChapter && !moduleMarkupIsBroken && (verseCount > 1 || verseCount == -1)) {
                string chapterHeading = this->getCurrentChapterHeading(module, moduleFileUrl, hasThMLVariants);
                verseText += chapterHeading;
            }
            
            // Current verse text
            verseText += this->getCurrentVerseText(module,
                                                   hasStrongs,
                                                   hasInconsistentClosingEndDivs,
                                                   // Note that if markup is broken this will enforce
                                                   // the usage of the "stripped" / non-markup variant of the text
                                                   moduleMarkupIsBroken,
                                                   moduleFileUrl,
                                                   hasThMLVariants);

            // If the current verse does not have any content and if it is the first verse in this book
            // we assume that the book is not existing.
            if (verseText.length() == 0 && firstVerseInBook) { currentBookExisting = false; }

            if (currentBookExisting) {
                Verse currentVerse;
                currentVerse.reference = module->getKey()->getShortText();
                currentVerse.absoluteVerseNumber = startVerseNumber + index;
                currentVerse.content = verseText;
                text.push_back(currentVerse);
            }

            lastKey = currentKey;
            lastBookName = currentBookName;
            lastChapter = currentChapter;
            
            module->increment();
            
            index++;
        }
    }

    return text;
}

string TextProcessor::getBookIntroduction(string moduleName, string bookCode)
{
    string bookIntroText = "";
    string filteredText = "";
    SWModule* module = this->_moduleStore.getLocalModule(moduleName);

    if (module == 0) {
        cerr << "getLocalModule returned zero pointer for " << moduleName << endl;
    } else {
        // Get module data path BEFORE manipulating the module key
        string moduleDataPath = this->_moduleStore.getModuleDataPath(module);
        string moduleFileUrl = this->getFileUrl(moduleDataPath);

        module->setKeyText(bookCode.c_str());
        
        // Create a local VerseKey copy to avoid pointer invalidation issues
        VerseKey verseKey = module->getKey();

        // Include chapter/book/testament/module intros
        verseKey.setIntros(true);
        
        // Get book intro from chapter 0, verse 0
        // This may contain testament intro for first books (Genesis, Matthew)
        verseKey.setChapter(0);
        verseKey.setVerse(0);
        module->setKey(verseKey);

        bookIntroText = string(module->getRawEntry());
        StringHelper::trim(bookIntroText);

        // Also fetch chapter 1:0 content and append it
        // Many modules store book-level intro content (images, titles) in chapter 1:0
        // We always include this to handle both cases:
        // - Modules where 0:0 is empty and 1:0 has book intro
        // - First books of testaments where 0:0 has testament intro and 1:0 has book intro
        verseKey.setChapter(1);
        verseKey.setVerse(0);
        module->setKey(verseKey);

        string chapter1Intro = string(module->getRawEntry());
        StringHelper::trim(chapter1Intro);
        
        if (!chapter1Intro.empty()) {
            if (!bookIntroText.empty()) {
                bookIntroText += "\n";
            }
            bookIntroText += chapter1Intro;
        }

        static regex titleStartElementFilter = regex("<title");
        static regex titleEndElementFilter = regex("</title>");
        static regex noteStartElementFilter = regex("<note");
        static regex noteEndElementFilter = regex("</note>");
        static regex headStartElementFilter = regex("<head");
        static regex headEndElementFilter = regex("</head>");
        static regex chapterDivFilter = regex("<div type=\"chapter\" n=\"[0-9]{1}\" id=\"[-A-Z0-9]{1,8}\">");

        filteredText = bookIntroText;
        filteredText = regex_replace(filteredText, titleStartElementFilter, "<div class=\"sword-markup sword-book-title\"");
        filteredText = regex_replace(filteredText, titleEndElementFilter, "</div>");
        filteredText = regex_replace(filteredText, noteStartElementFilter, "<div class=\"sword-markup sword-note\"");
        filteredText = regex_replace(filteredText, noteEndElementFilter, "</div>");
        filteredText = regex_replace(filteredText, headStartElementFilter, "<div class=\"sword-markup sword-head\"");
        filteredText = regex_replace(filteredText, headEndElementFilter, "</div>");
        filteredText = regex_replace(filteredText, chapterDivFilter, "");

        // Prefix img src attributes starting with "/" with the module file URL
        this->processImageUrls(filteredText, moduleFileUrl);
    }

    return filteredText;
}

void TextProcessor::processImageUrls(string& text, const string& moduleFileUrl)
{
    if (!moduleFileUrl.empty()) {
        static string imgSrcSlash = "src=\"/";
        string imgSrcReplacement = "src=\"" + moduleFileUrl + "/";
        this->findAndReplaceAll(text, imgSrcSlash, imgSrcReplacement);
    }
}

void TextProcessor::processImageUrls(string& text, sword::SWModule* module)
{
    string moduleFileUrl = this->getFileUrl(this->_moduleStore.getModuleDataPath(module));
    this->processImageUrls(text, moduleFileUrl);
}

string TextProcessor::replaceSpacesInStrongs(const string& text)
{
    string input = text;
    static regex strongsWText = regex(">[^<]*</w>");
    static std::regex space(" ");
    smatch m;
    string filteredText;

    // Search for Strongs pattern and then replace all spaces within each occurance
    while (std::regex_search(input, m, strongsWText)) {
        filteredText += m.prefix();
        filteredText += std::regex_replace(m[0].str(), space, "&nbsp;");
        input = m.suffix();
    }

    filteredText += input;

    return filteredText;
}

bool TextProcessor::moduleHasStrongsZeroPrefixes(sword::SWModule* module)
{
    string key = "Gen 1:1";
    module->setKey(key.c_str());

    bool previousMarkupSetting = this->_markupEnabled;    
    this->enableMarkup();
    string verseText = this->getCurrentVerseText(module, true);
    this->_markupEnabled = previousMarkupSetting;
    
    // Check if the verse text contains the Strong's number H07225 ("beginning") with a zero prefix
    return verseText.find("strong:H07225") != string::npos;
}

bool TextProcessor::moduleHasStrongsPaddedZeroPrefixes(sword::SWModule* module)
{
    // Check if the module has Strongs's numbers that are padded with zeros
    string key = "Gen 2:24";
    module->setKey(key.c_str());

    bool previousMarkupSetting = this->_markupEnabled;
    this->enableMarkup();
    string verseText = this->getCurrentVerseText(module, true);
    this->_markupEnabled = previousMarkupSetting;

    // Check if the verse text contains the Strong's number H0001 ("father") with padded zeros
    return verseText.find("strong:H0001") != string::npos;
}

string TextProcessor::padStrongsNumber(const string strongsNumber) {
    if (strongsNumber.size() == 0) {
        return strongsNumber;
    }

    // Skip any leading non-digit characters (e.g. 'H', 'G' prefixes)
    size_t numStart = strongsNumber.find_first_of("0123456789");
    if (numStart == string::npos) {
        return strongsNumber;
    }

    // Convert to int
    int numericValue = stoi(strongsNumber.substr(numStart));

    // Use stringstream and i/o manipulators to pad with zeros
    std::ostringstream padded;
    padded << std::setw(4) << std::setfill('0') << numericValue;

    return padded.str();
}

bool TextProcessor::isModuleReadable(sword::SWModule* module, std::string key)
{
    module->setKey(key.c_str());
    string verseText = this->getCurrentVerseText(module, false);
    return verseText.size() > 0;
}

StrongsEntry* TextProcessor::getStrongsEntry(string key)
{
    SWModule* module = 0;
    char strongsType = key[0];

    if (strongsType == 'H') {
        module = this->_moduleStore.getLocalModule("StrongsHebrew");
    } else if (strongsType == 'G') {
        module = this->_moduleStore.getLocalModule("StrongsGreek");
    } else {
        return 0;
    }

    if (module == 0) {
        cerr << "No valid Strong's module available!" << endl;
        return 0;
    }

    StrongsEntry* entry = StrongsEntry::getStrongsEntry(module, key);
    return entry;
}

unsigned int TextProcessor::findAndReplaceAll(std::string & data, std::string toSearch, std::string replaceStr)
{
    unsigned int count = 0;

    // Get the first occurrence
    size_t pos = data.find(toSearch);

    // Repeat till end is reached
    while(pos != std::string::npos)
    {
        count++;
        // Replace this occurrence of Sub String
        data.replace(pos, toSearch.size(), replaceStr);
        // Get the next occurrence from the current position
        pos = data.find(toSearch, pos + replaceStr.size());
    }

    return count;
}

// Remove elements matching pattern: <prefix ... suffix>
// This is a string-based replacement for regex patterns like "<prefix.*?suffix>"
void TextProcessor::removeElementsByPrefixSuffix(std::string& data, const std::string& prefix, const std::string& suffix)
{
    size_t pos = 0;
    while ((pos = data.find(prefix, pos)) != std::string::npos) {
        size_t endPos = data.find(suffix, pos + prefix.size());
        if (endPos != std::string::npos) {
            data.erase(pos, endPos + suffix.size() - pos);
        } else {
            break;
        }
    }
}

// Remove milestone elements with type="line" and replace with <br/>
void TextProcessor::replaceMilestoneLineElements(std::string& data)
{
    static const std::string milestoneStart = "<milestone";
    static const std::string typeLine = "type=\"line\"";
    static const std::string milestoneEnd = "/>";
    
    size_t pos = 0;
    while ((pos = data.find(milestoneStart, pos)) != std::string::npos) {
        size_t endPos = data.find(milestoneEnd, pos);
        if (endPos != std::string::npos) {
            size_t elementEnd = endPos + milestoneEnd.size();
            std::string element = data.substr(pos, elementEnd - pos);
            
            if (element.find(typeLine) != std::string::npos) {
                data.replace(pos, elementEnd - pos, "<br/>");
                pos += 5; // length of "<br/>"
            } else {
                pos = elementEnd;
            }
        } else {
            break;
        }
    }
}

// Remove all milestone elements (those not already replaced)
void TextProcessor::removeMilestoneElements(std::string& data)
{
    removeElementsByPrefixSuffix(data, "<milestone", "/>");
}

// Remove seg start elements: <seg...>
void TextProcessor::removeSegStartElements(std::string& data)
{
    removeElementsByPrefixSuffix(data, "<seg", ">");
}

// Remove div section elements: <div type="section"...>
void TextProcessor::removeDivSectionElements(std::string& data)
{
    static const std::string divSection = "<div type=\"section\"";
    
    size_t pos = 0;
    while ((pos = data.find(divSection, pos)) != std::string::npos) {
        size_t endPos = data.find(">", pos + divSection.size());
        if (endPos != std::string::npos) {
            data.erase(pos, endPos + 1 - pos);
        } else {
            break;
        }
    }
}

// Expand self-closing elements: <w .../> -> <w ...></w> and <div .../> -> <div ...></div>
void TextProcessor::expandSelfClosingElements(std::string& data)
{
    size_t pos = 0;
    while (pos < data.size()) {
        // Find next '<'
        size_t startPos = data.find('<', pos);
        if (startPos == std::string::npos) break;
        
        // Check if it's <w or <div followed by space
        bool isW = (data.compare(startPos, 3, "<w ") == 0);
        bool isDiv = (data.compare(startPos, 5, "<div ") == 0);
        
        if (isW || isDiv) {
            // Find the end of the tag
            size_t endPos = data.find('>', startPos);
            if (endPos != std::string::npos && endPos > startPos + 1) {
                // Check if it's self-closing (ends with />)
                if (data[endPos - 1] == '/') {
                    // It's self-closing, expand it
                    std::string tagName = isW ? "w" : "div";
                    std::string closingTag = "</" + tagName + ">";
                    
                    // Remove the / before >
                    data.erase(endPos - 1, 1);
                    endPos--; // Adjust for removed character
                    
                    // Insert closing tag after >
                    data.insert(endPos + 1, closingTag);
                    
                    pos = endPos + 1 + closingTag.size();
                } else {
                    pos = endPos + 1;
                }
            } else {
                pos = startPos + 1;
            }
        } else {
            pos = startPos + 1;
        }
    }
}

// Normalize numeric classes used by variant readings to semantic class names.
void TextProcessor::normalizeVariantClasses(std::string& data)
{
    static const std::string divStart = "<div ";
    static const std::string tagEnd = ">";
    static const std::string variantType = "type=\"variant\"";
    static const std::string classOne = "class=\"1\"";
    static const std::string classTwo = "class=\"2\"";
    static const std::string primaryClass = "class=\"primary-variant\"";
    static const std::string secondaryClass = "class=\"secondary-variant\"";

    size_t pos = 0;
    while ((pos = data.find(divStart, pos)) != std::string::npos) {
        size_t endPos = data.find(tagEnd, pos + divStart.size());
        if (endPos == std::string::npos) {
            break;
        }

        const size_t variantPos = data.find(variantType, pos);
        if (variantPos == std::string::npos || variantPos > endPos) {
            pos = endPos + 1;
            continue;
        }

        size_t classPos = data.find(classOne, pos);
        if (classPos != std::string::npos && classPos <= endPos) {
            data.replace(classPos, classOne.size(), primaryClass);
            endPos += primaryClass.size() - classOne.size();
        }

        classPos = data.find(classTwo, pos);
        if (classPos != std::string::npos && classPos <= endPos) {
            data.replace(classPos, classTwo.size(), secondaryClass);
            endPos += secondaryClass.size() - classTwo.size();
        }

        pos = endPos + 1;
    }
}

// Remove pb elements with trailing space: "<pb .../> "
void TextProcessor::removePbElementsWithSpace(std::string& data)
{
    static const std::string pbStart = "<pb ";
    static const std::string pbEnd = "/> ";
    
    size_t pos = 0;
    while ((pos = data.find(pbStart, pos)) != std::string::npos) {
        size_t endPos = data.find(pbEnd, pos);
        if (endPos != std::string::npos) {
            data.erase(pos, endPos + pbEnd.size() - pos);
        } else {
            break;
        }
    }
}

string TextProcessor::mapVerseReference(string sourceOsisRef, string sourceModuleName, string targetModuleName, bool allowRange)
{
    // Look up both modules to read their versification systems
    SWModule* sourceModule = this->_moduleStore.getLocalModule(sourceModuleName);
    SWModule* targetModule = this->_moduleStore.getLocalModule(targetModuleName);

    // Determine versification system names (default to KJV if not specified)
    string sourceV11n = "KJV";
    string targetV11n = "KJV";

    if (sourceModule != 0) {
        const char* v11nEntry = sourceModule->getConfigEntry("Versification");
        if (v11nEntry != 0) {
            sourceV11n = v11nEntry;
        }
    }

    if (targetModule != 0) {
        const char* v11nEntry = targetModule->getConfigEntry("Versification");
        if (v11nEntry != 0) {
            targetV11n = v11nEntry;
        }
    }

    // If both modules use the same versification, no mapping is needed
    if (sourceV11n == targetV11n) {
        return sourceOsisRef;
    }

    // Get the versification systems from VersificationMgr
    VersificationMgr* vMgr = VersificationMgr::getSystemVersificationMgr();
    const VersificationMgr::System* sourceSys = vMgr->getVersificationSystem(sourceV11n.c_str());
    const VersificationMgr::System* targetSys = vMgr->getVersificationSystem(targetV11n.c_str());

    if (sourceSys == 0 || targetSys == 0) {
        return sourceOsisRef;
    }

    // Parse the source OSIS reference using a VerseKey with the source versification
    VerseKey sourceKey;
    sourceKey.setVersificationSystem(sourceV11n.c_str());
    sourceKey.setText(sourceOsisRef.c_str());

    // Extract the components for translateVerse
    const char* book = sourceKey.getOSISBookName();
    int chapter = sourceKey.getChapter();
    int verse = sourceKey.getVerse();
    int verseEnd = verse;

    // Perform the mapping
    sourceSys->translateVerse(targetSys, &book, &chapter, &verse, &verseEnd);

    // Build the mapped OSIS reference
    stringstream result;
    result << book << "." << chapter << "." << verse;

    if (allowRange && verseEnd > verse) {
        result << "-" << book << "." << chapter << "." << verseEnd;
    }

    return result.str();
}
