///
import { IndirectObject, PDFObject, Rectangle, DictionaryObject } from './pdfdom';
/**
Importing PDF from './PDF' induces a breaking circular dependency.
*/
export interface PDF {
getObject(object_number: number, generation_number: number): PDFObject;
getModel(object_number: number, generation_number: number, ctor: {
new (pdf: PDF, object: PDFObject): T;
}): T;
_resolveObject(object: PDFObject): PDFObject;
}
/**
Most of the classes in this module are wrappers for typed objects in a PDF,
where the object's Type indicates useful ways it may be processed.
*/
export declare class IndirectReference {
object_number: number;
generation_number: number;
constructor(object_number: number, generation_number: number);
static isIndirectReference(object: any): object is IndirectReference;
/**
Create an IndirectReference from an "object[:reference=0]" string.
*/
static fromString(reference: string): IndirectReference;
toString(): string;
}
/**
_pdf: PDF -- the base PDF
_object: the original plain old javascript object parsed from the PDF
The _object may be an IndirectReference; if so, it will not be resolved
immediately, but only when the `object` getter is called.
If a new Model is constructed with a null `_object`, it will create the Model,
but Model#object will return null.
*/
export declare class Model {
protected _pdf: PDF;
private _object;
private _resolved;
constructor(_pdf: PDF, _object: PDFObject);
readonly object: PDFObject;
/**
Read a value from the `object` mapping (assuming `this` is a PDFDictionary or
behaves like one), resolving indirect references as needed.
Much like `new Model(this._pdf, this.object[key]).object`, but avoids creating
a whole new Model.
*/
get(key: string): any;
/**
This is an (icky?) hack to get around circular dependencies with subclasses
of Model (like Font).
*/
asType(ctor: {
new (pdf: PDF, object: PDFObject): T;
}): T;
toJSON(): PDFObject;
}
/**
interface Pages {
Type: 'Pages';
Kids: IndirectReference[]; // -> Array
}
*/
export declare class Pages extends Model {
/**
All Pages objects except for the root object (i.e., the Pages indicated by Trailer.Root.Pages)
must have a 'Parent' field that points to their immediate ancestor in the Pages/Page tree.
It's an indirect reference (since it's a pointer back up the tree, it has to be).
*/
readonly Parent: Pages;
readonly Kids: Array;
/**
"Pages"-type objects have a field, Kids: IndirectReference[].
Each indirect reference will resolve to a Page or Pages object.
This will flatten the page list breadth-first, returning only the Page objects
at the leaves of the pages tree.
*/
getLeaves(): Page[];
readonly MediaBox: Rectangle;
toJSON(): {
Type: string;
Kids: (Pages | Page)[];
};
}
/**
Only `Type`, `Parent`, `Resources`, and `MediaBox` are required.
Optional fields:
LastModified?: string; // actually Date
Annots?: IndirectReference;
CropBox?: Rectangle;
BleedBox?: Rectangle;
TrimBox?: Rectangle;
ArtBox?: Rectangle;
BoxColorInfo?: DictionaryObject;
Contents?: IndirectReference | IndirectReference[];
Rotate?: number;
Group?: DictionaryObject;
Thumb?: Stream;
See "Table 30 – Entries in a page object".
*/
export declare class Page extends Model {
readonly Parent: Pages;
readonly MediaBox: Rectangle;
readonly Resources: Resources;
/**
The Contents field may be a reference to a Stream object, an array of
references to Stream objects, or a reference to an array (of references to
stream objects)
*/
readonly Contents: Model;
/**
A page's 'Contents' field may be a single stream or an array of streams. We
need to iterate through all of them and concatenate them into a single stream.
From the spec:
> If the value is an array, the effect shall be as if all of the streams in the array were concatenated, in order, to form a single stream. Conforming writers can create image objects and other resources as they occur, even though they interrupt the content stream. The division between streams may occur only at the boundaries between lexical tokens but shall be unrelated to the page's logical content or organization. Applications that consume or produce PDF files need not preserve the existing structure of the Contents array. Conforming writers shall not create a Contents array containing no elements.
Merging the streams would be pretty simple, except that the separations
between them count as token separators, so we can't feed the result of
`Buffer.concat(...)` directly into the StackOperationParser (via Canvas).
TODO: don't combine the strings (more complex)
see MultiStringIterator in scratch.txt
*/
joinContents(separator: Buffer): Buffer;
toJSON(): {
Type: string;
MediaBox: [number, number, number, number];
Resources: Resources;
Contents: Model;
};
}
/**
interface ContentStream {
dictionary: {
Length: number;
Filter?: string | string[];
};
buffer: Buffer;
}
*/
export declare class ContentStream extends Model {
readonly Length: number;
readonly Resources: Resources;
readonly Subtype: string;
readonly dictionary: any;
/**
Return the object's buffer, decoding if necessary.
*/
readonly buffer: Buffer;
toJSON(): any;
static isContentStream(object: any): object is ContentStream;
}
/**
An ObjectStream is denoted by Type='ObjStm', and documented in PDF32000_2008.pdf:7.5.7 Object Streams
*/
export declare class ObjectStream extends ContentStream {
readonly objects: IndirectObject[];
toJSON(): any;
}
import { Font } from './font/index';
/**
Pages that render to text are defined by their `Contents` field, but
that field sometimes references objects or fonts in the `Resources` field,
which in turns has a field, `XObject`, which is a mapping from names object
names to nested streams of content. I'm pretty sure they're always streams.
Despite being plural, the `Resources` field is always a single object,
as far as I can tell.
None of the fields are required.
*/
export declare class Resources extends Model {
private _cached_fonts;
/**
returns `undefined` if no matching XObject is found.
*/
getXObject(name: string): ContentStream;
/**
Retrieve a Font instance from the given Resources' Font dictionary.
Caches Fonts (which is pretty hot when rendering a page),
even missing ones (as null).
Using PDF#getModel() allows reuse of all the memoizing each Font instance does.
Otherwise, we have to create a new Font instance (albeit, perhaps using the
PDF's object cache, which is helpful) for each Resources.
throws an Error if the Font dictionary has no matching `name` key.
*/
getFont(name: string): Font;
/**
return a Model since the values may be indirect references.
returns `undefined` if no matching ExtGState is found.
*/
getExtGState(name: string): Model;
toJSON(): {
ExtGState: any;
ColorSpace: any;
Pattern: any;
Shading: any;
XObject: any;
Font: any;
ProcSet: any;
Properties: any;
};
}
/**
The PDF points to its catalog object with its trailer's `Root` reference.
interface Catalog {
Type: 'Catalog';
Pages: IndirectReference; // reference to a {type: 'Pages', ...} object
Names?: IndirectReference;
PageMode?: string;
OpenAction?: IndirectReference;
}
*/
export declare class Catalog extends Model {
readonly Pages: Pages;
toJSON(): {
Type: string;
Names: any;
PageMode: any;
OpenAction: any;
};
}
/**
The Trailer is not a typical extension of models.Model, because it is not
backed by a single PDFObject, but by a collection of PDFObjects.
*/
export declare class Trailer {
private _pdf;
objects: DictionaryObject[];
constructor(_pdf: PDF, objects?: DictionaryObject[]);
/**
The PDF's trailers are read from newer to older.
*/
add(object: DictionaryObject): void;
/**
this._objects contains the trailers from older to newer, so merging the
the newer trailers' values over the older trailers is straightfoward.
Not as generic as the typical Model#object getter, but similar enough to
warrant using the same name.
*/
readonly object: DictionaryObject;
readonly Size: number;
/**
I'm pretty sure the `Root` reference is always a reference.
*/
readonly Root: Catalog;
/**
I'm pretty sure the `Info` reference is also always a reference.
*/
readonly Info: PDFObject;
toJSON(): {
Size: number;
Root: Catalog;
Info: PDFObject;
};
}