import { promises as fs } from 'fs'; import path from 'path'; import util from 'util'; import url from 'url'; import glob from 'tiny-glob'; import * as mime from 'mime'; import cheerio from 'cheerio'; import xmldom from '@xmldom/xmldom'; // import * as metadata from './metadata.js'; import * as utils from './utils.js'; import { Configuration } from './Configuration.js'; export class Manifest extends Array { constructor(toimport) { super(); if (toimport) { for (const item of toimport) { this.push(new exports.ManifestItem(item)); } } } /** * Finds a {@link ManifestItem} * * @param id The ID code for a {@link ManifestItem} * @returns The matching {@link ManifestItem} */ byID(id: string): ManifestItem { for (const item of this) { if (item.id === id) return item; } return undefined; } /** * Finds a {@link ManifestItem} * * @param path2find The file name for a {@link ManifestItem} * @returns The matching {@link ManifestItem} */ byPath(path2find: string): ManifestItem { for (const item of this) { // console.log(`byPath ${item.path} === ${path2find}`); if (item.path === path2find) return item; } return undefined; } /** * Returns an array of {@link ManifestItem} items that are in the _spine_ * * @returns Array of matching items */ get spine(): ManifestItem[] { const spine = this.filter(item => { if (item.in_spine) return true; // spine.push(item); else return false; }); // spine.sort ... spine.sort((a, b) => { if (a.spine_order < b.spine_order) return -1; if (a.spine_order > b.spine_order) return 1; return 0; }); return spine; } /** * Either updates a {@link ManifestItem} or adds a new one to the {@link Manifest}. * If the item already exists, it is updated, and otherwise it is * added. * * @param newItem The new {@link ManifestItem} to add or update */ addItem(newItem: ManifestItem): void { const mItem = this.byPath(newItem.path); if (mItem) { mItem.basedir = newItem.basedir; mItem.path = newItem.path; mItem.id = typeof newItem.id !== 'undefined' ? newItem.id : ""; mItem.suppress = typeof newItem.suppress !== 'undefined' ? newItem.suppress : false; mItem.in_spine = typeof newItem.in_spine !== 'undefined' ? newItem.in_spine : false; mItem.spine_order = typeof newItem.spine_order !== 'undefined' ? newItem.spine_order : -1; } else { this.push(new ManifestItem(newItem)); } } /** * Adds an array of items to the {@link Manifest} * * @param items Array of {@link ManifestItem} items */ addItems(items: ManifestItem[]): void { for (const f of items) { this.addItem(f); } } // It seems this function was never invoked, never tested. // The only execution is in a function in index.ts that is // itself not tested and commented-out /* checkItemsFromOPF(opfManifest) { // Maybe this does not belong here since it is presumed to be part of a process driven elsewhere // Maybe instead this belongs in that place // The assumption is config.opfManifest was built by using from_fs first and // second to scan what is in an OPF file // Ergo the from_fs stage cannot get a lot of the details which are in the OPF for (let mItem of opfManifest) { // What did it mean to prefix this with "config."? // This doesn't make sense. let existing = /*config.*--/opfManifest.byPath(mItem.path); if (existing) { existing.seen_in_opf = true; if (typeof mItem.id !== 'undefined') existing.id = mItem.id; if (typeof mItem.mime !== 'undefined') existing.mime = mItem.mime; if (typeof mItem.mimeoverride !== 'undefined') existing.mimeoverride = mItem.mimeoverride; if (typeof mItem.is_nav !== 'undefined') existing.is_nav = mItem.is_nav; if (typeof mItem.nav_id !== 'undefined') existing.nav_id = mItem.nav_id; if (typeof mItem.nav_path !== 'undefined') existing.nav_path = mItem.nav_path; if (typeof mItem.properties !== 'undefined') existing.properties = mItem.properties; if (typeof mItem.is_cover_image !== 'undefined') existing.is_cover_image = mItem.is_cover_image; if (typeof mItem.cover_id !== 'undefined') existing.cover_id = mItem.cover_id; if (typeof mItem.cover_path !== 'undefined') existing.cover_path = mItem.cover_path; if (typeof mItem.is_mathml !== 'undefined') existing.is_mathml = mItem.is_mathml; if (typeof mItem.is_scripted !== 'undefined') existing.is_scripted = mItem.is_scripted; if (typeof mItem.is_svg !== 'undefined') existing.is_svg = mItem.is_svg; if (typeof mItem.is_remote_resources !== 'undefined') existing.is_remote_resources = mItem.is_remote_resources; if (typeof mItem.is_switch !== 'undefined') existing.is_switch = mItem.is_switch; if (typeof mItem.suppressOPF !== 'undefined') existing.suppressOPF = mItem.suppressOPF; if (typeof mItem.suppress !== 'undefined') existing.suppress = mItem.suppress; if (typeof mItem.in_spine !== 'undefined') existing.in_spine = mItem.in_spine; if (typeof mItem.spine_order !== 'undefined') existing.spine_order = mItem.spine_order; if (typeof mItem.linear !== 'undefined') existing.linear = mItem.linear; } else { console.log(`checkItemsFromOPF OPF has item not in file system ${util.inspect(mItem)}`); } } } */ /** * First removes all existing {@link ManifestItem} objects from manifest, * then adds the new ones. * * @param newItems Array of {@link ManifestItem} objects */ replaceItems(newItems: ManifestItem[]): void { while (this.length > 0) { this.pop(); } this.addItems(newItems); } /** * Remove a {@link ManifestItem} from the {@link Manifest}. * * @param path2remove The path of the item to remove. */ remove(path2remove: string): void { for (const item of this) { if (item.path === path2remove) { const i = this.indexOf(item); if (i !== -1) { this.splice(i, 1); } } } } } export class ManifestItem { constructor(item) { this.id = typeof item.id !== 'undefined' ? item.id : ""; this.basedir = typeof item.basedir !== 'undefined' ? item.basedir : ""; this.path = typeof item.path !== 'undefined' ? item.path : "--unknown--"; this.dirname = typeof item.dirname !== 'undefined' ? item.dirname : ""; this.filename = typeof item.filename !== 'undefined' ? item.filename : ""; this.mime = typeof item.mime !== 'undefined' ? item.mime : ""; this.mimeoverride = typeof item.mimeoverride !== 'undefined' ? item.mimeoverride : ""; if (typeof item.is_nav !== 'undefined' && item.is_nav) { this.nav_id = typeof item.id !== 'undefined' ? item.id : ""; this.nav_path = typeof item.path !== 'undefined' ? item.path : ""; this.is_nav = item.is_nav; } else { this.is_nav = false; } if (item.properties) this.properties = item.properties; if (typeof item.is_cover_image !== 'undefined' && item.is_cover_image) { this.cover_id = typeof item.id !== 'undefined' ? item.id : ""; this.cover_path = typeof item.path !== 'undefined' ? item.path : ""; this.is_cover_image = item.is_cover_image; } else { this.is_cover_image = false; } // TODO this.is_mathml = typeof item.is_mathml !== 'undefined' ? item.is_mathml : false; this.is_scripted = typeof item.is_scripted !== 'undefined' ? item.is_scripted : false; this.is_svg = typeof item.is_svg !== 'undefined' ? item.is_svg : false; this.is_remote_resources = typeof item.is_remote_resources !== 'undefined' ? item.is_remote_resources : false; this.is_switch = typeof item.is_switch !== 'undefined' ? item.is_switch : false; // TODO some additional item/itemref properties // rendition:layout // rendition:orientation // rendition:spread // rendition:page-spread-center this.suppressOPF = typeof item.suppressOPF !== 'undefined' ? item.suppressOPF : false; this.suppress = typeof item.suppress !== 'undefined' ? item.suppress : false; this.in_spine = typeof item.in_spine !== 'undefined' ? item.in_spine : false; this.spine_order = typeof item.spine_order !== 'undefined' ? item.spine_order : -1; if (typeof item.linear !== 'undefined') { this.linear = item.linear; } this.seen_in_opf = typeof item.seen_in_opf !== 'undefined' ? item.seen_in_opf : false; // console.log(util.inspect(this)); } id: string; basedir: string; path: string; dirname: string; filename: string; mime: string; mimeoverride: string; nav_id: string; nav_path: string; is_nav: boolean; properties: string; cover_id: string; cover_path: string; is_cover_image: boolean; is_mathml: boolean; is_scripted: boolean; is_svg: boolean; is_remote_resources: boolean; is_switch: boolean; suppressOPF: boolean; suppress: boolean; in_spine: boolean; spine_order: number; linear: boolean; seen_in_opf: boolean; } /** * Returns an array of {@link ManifestItem} items that are in the _spine_ * * @param epubConfig The {@link Configuration} object * @returns An array of {@link ManifestItem} objects that are in the spine */ export function spineItems(epubConfig: Configuration): ManifestItem[] { if (!epubConfig || !epubConfig.opfManifest) return []; const spine = epubConfig.opfManifest.filter(item => { if (item.in_spine) return true; else return false; }); // spine.sort ... spine.sort((a, b) => { if (a.spine_order < b.spine_order) return -1; if (a.spine_order > b.spine_order) return 1; return 0; }); return spine; } /** * For {@link ManifestItem} objects that are in the spine, read the title * from the matching XHTML file, adding it as the _title_ field. * * @param epubConfig The {@link Configuration} object */ export async function spineTitles(epubConfig: Configuration): Promise { const epubdir = epubConfig.renderedFullPath; if (epubConfig.opfManifest) for (const item of epubConfig.opfManifest) { if (!item.in_spine) continue; // console.log(`spineTitles ${epubdir} ${item.path}`); const docpath = path.join(epubdir, item.path); const doctxt = await fs.readFile(docpath, 'utf8'); const $ = cheerio.load(doctxt, { xmlMode: true, decodeEntities: true }); const title = $('head title').text(); // console.log(`spineTitles title ${title}`); item.title = title; } } let navolcount = 0; function getNavOLChildrenXML(DOM, navol, tocdir) { const ret = []; const children = navol.childNodes; for (const child of utils.nodeListIterator(children)) { if (child.nodeType === 1 && child.tagName && child.tagName === 'li') { // ELEMENT_NODE const lichildren = child.childNodes; let item; let itemchildren; for (const lichild of utils.nodeListIterator(lichildren)) { if (lichild.nodeType === 1 && lichild.tagName && lichild.tagName === 'a') { // ELEMENT_NODE const href = lichild.getAttribute('href'); let id; const childid = child.getAttribute('id'); const liid = child.getAttribute('id'); // Get the ID value either from the , or the containing
  • // If neither have it, then concoct an ID. if (childid && childid !== '') { id = childid; } else if (liid && liid !== '') { id = liid; } else { id = `item${navolcount++}`; } item = { text: lichild.textContent, href: path.normalize(path.join(tocdir, href)), id: id, // lichild.getAttribute('id'), children: [] }; } if (lichild.nodeType === 1 && lichild.tagName && lichild.tagName === 'ol') { // ELEMENT_NODE itemchildren = getNavOLChildrenXML(DOM, lichild, tocdir); } } if (itemchildren) item.children = itemchildren; // console.log(`getNavOLChildrenXML item ${util.inspect(item)}`); if (item) ret.push(item); } } return ret; } export async function tocData(epubConfig: Configuration) { // console.log(`tocData ${epubConfig.sourceBookTOCHREF} found ${util.inspect(found)}`); // console.log(`tocData ${epubConfig.sourceBookTOCHREF} renderer ${util.inspect(renderer)}`); const content = await fs.readFile( path.join(epubConfig.renderedFullPath, epubConfig.sourceBookTOCHREF), 'utf8'); // console.log(`tocData ${epubConfig.sourceBookTOCHREF} content ${util.inspect(content)}`); const tocdom = new xmldom.DOMParser().parseFromString(content, 'application/xhtml+xml'); if (!tocdom) { throw new Error(`epubtools tocData FAIL to read ${epubConfig.renderedFullPath} ${epubConfig.sourceBookTOCHREF}`); } // let tochtml = tocxhtml.xhtmlText; // let tocdom = tocxhtml.xhtmlDOM; const tocid = epubConfig.sourceBookTOCID; const tocdir = path.dirname(epubConfig.sourceBookTOCHREF); let tocnav; for (const nav of utils.nodeListIterator( tocdom.getElementsByTagName('nav') )) { // console.log(`tocData found nav epub:type ${nav.getAttribute('epub:type')} id ${nav.getAttribute('id')} tocid ${tocid}`); if (nav.getAttribute('epub:type') === 'toc' && nav.getAttribute('id') === tocid) { tocnav = nav; break; } } if (!tocnav) { throw new Error(`No nav epub:type===toc id===${tocid} in ${epubConfig.TOCpath}`); } const tocnavchildren = tocnav.childNodes; let tocnavrootol; for (const child of utils.nodeListIterator(tocnavchildren)) { if (child.nodeType === 1 && child.tagName && child.tagName === 'ol') { // ELEMENT_NODE tocnavrootol = child; break; } } if (!tocnavrootol) { throw new Error(`No root 'ol' node in nav epub:type===toc id===${tocid} in ${epubConfig.TOCpath}`); } navolcount = 0; const tocdata = getNavOLChildrenXML(tocdom, tocnavrootol, tocdir); return tocdata; } /** * Scans the file system, constructing {@link ManifestItem} items for each, * and ultimately constructing a {@link Manifest} object. * * @param config The {@link Configuration} object * @returns Returns a {@link Manifest} object */ export async function from_fs(config: Configuration): Promise { // console.log(`scanfiles epubdir ${epubdir}`); // TODO Should this scan the source directory or the rendered directory? // console.log(config.renderedFullPath); const filez = await glob('**', { cwd: config.renderedFullPath, dot: true, // absolute: true, filesOnly: true }) // console.log(`files found in ${config.renderedFullPath}`, filez); // Remove directories const itemz = []; for (const filenm of filez) { // Do not include admin files if (filenm === 'mimetype') continue; if (filenm === 'META-INF/container.xml') continue; if (filenm === config.bookOPF) continue; let stats; // Only include files which can be stat'd and are not directories // console.log(item); const fullpath = path.join(config.renderedFullPath, filenm); try { stats = await fs.stat(fullpath); } catch (e) { continue; } if (!stats.isDirectory()) { // Modify the basedir to be renderedFullPath // Fill in other base ManifestItem fields const item = new ManifestItem({ basedir: config.renderedPath, path: filenm, dirname: path.dirname(filenm), filename: path.basename(filenm), fullpath: fullpath, mime: mime.getType(filenm), // is_nav elsewhere mimeoverride: false, suppressOPF: false, suppress: false }); if (item.mime === 'text/html') { item.mime = 'application/xhtml+xml'; } item.in_spine = item.mime === 'text/html' || item.mime === 'application/xhtml+xml' ? true : false; item.seen_in_opf = false; if (config.sourceBookTOCHREF === item.path) { if (config.sourceBookTOCID) { item.id = config.sourceBookTOCID; } } if (config.sourceBookCoverHTMLHREF === item.path) { if (config.sourceBookCoverHTMLID) { item.id = config.sourceBookCoverHTMLID; } } if (config.sourceBookCoverHREF === item.path) { if (config.sourceBookCoverID) { item.id = config.sourceBookCoverID; } } // console.log(`from_fs pushed for ${filenm}`, item); itemz.push(item); } } let itemnum = 0; for (const item of itemz) { // console.log(`from_fs scan ${item.dirname} ${item.path} ${item.in_spine}`); if (!item.id) item.id = `item${itemnum++}`; if (item.in_spine) { try { const file2read = path.join(config.renderedFullPath, item.path); // console.log(`readXHTML ${file2read}`); const data = await fs.readFile(file2read, 'utf8'); const $ = cheerio.load(data, { xmlMode: true, decodeEntities: true }); $("nav").each((i, nav) => { const navtype = $(nav).attr('epub:type'); // console.log(`from_fs scan ${item.path} has nav ${navtype}`); if (navtype === 'toc') { item.is_nav = true; // item.id = config.sourceBookTOCID; item.nav_path = item.path; item.nav_id = item.id; } }); if (item.is_nav) { let order = 0; $("nav li > a").each((i, anchor) => { const aHref = $(anchor).attr('href'); const aPath = path.normalize( path.join(item.dirname, aHref) ); // console.log(`from_fs scan ${item.path} toc entry ${aHref} ${aPath}`); for (const reffed of itemz) { if (reffed.path === aPath) { reffed.spine_order = order++; break; } } }); } $("math").each((/* i, mItem */) => { item.is_mathml = true; }); $("svg").each((/* i, mItem */) => { item.is_svg = true; }); const checkRemote = (i, link) => { // console.log(link); const href = $(link).attr('href'); const src = $(link).attr('src'); let theurl; if (href) theurl = url.parse(href); else if (src) theurl = url.parse(src); if (theurl && (theurl.hostname || theurl.port)) { console.log(`checkRemote found remote resource href ${href} src ${src} theurl ${util.inspect(theurl)}`); item.is_remote_resources = true; } }; // Apparently we don't need to check tags // for a remote reference. // $("a").each(checkRemote); $("img").each(checkRemote); $("style").each(checkRemote); $("script").each(checkRemote); $("link").each(checkRemote); $("audio > source").each(checkRemote); $("video > source").each(checkRemote); } catch (e) { // ignore console.log(`Scanning files caught error ${e.stack}`); } } } // console.log(`from_fs `, filez); return new Manifest(itemz); } /* This was the old plan, where the config file would hold a manifest of the files. The code here may have some usefulness e.g. a command to list out the files in the directory. exports.scan = async function(config) { // console.log(`scanfiles renderedFullPath ${config.renderedFullPath}`); var filez = await globfs.findAsync(config.renderedFullPath, '**'); // console.log(util.inspect(config.manifest)); // Remove directories var _filez = []; for (let item of filez) { let stats; try { stats = await fs.stat(item.fullpath); } catch (e) { continue; } if (!stats.isDirectory()) { _filez.push(item); } } // Modify the basedir to be Bookroot filez = _filez.map(item => { item.basedir = config.renderedFullPath; return item; }); // Look at existing manifest entries // If not in current set of files, then delete const todel = []; for (let mItem of config.manifest) { let found = false; for (let f of filez) { if (f.path === mItem.path) { found = true; break; } } if (!found) todel.push(mItem); } // Perform the deletions for (let mi of todel) { config.manifest.remove(mi.path); } // Then scan existing files // If it is in manifest, update the entry // Else add a new entry config.manifest.addItems(filez); }; */