/*
Phaneron - Clustered, accelerated and cloud-fit video server, pre-assembled and in kit form.
Copyright (C) 2020 Streampunk Media Ltd.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
https://www.streampunk.media/ mailto:furnace@streampunk.media
14 Ormiscaig, Aultbea, Achnasheen, IV22 2JJ U.K.
*/
import { PackImpl, Interlace } from './packer'
import { KernelParams, OpenCLBuffer } from 'nodencl'
const yuv420pKernel = `
__kernel void read(__global uchar8* restrict inputY,
__global uchar4* restrict inputU,
__global uchar4* restrict inputV,
__global float4* restrict output,
__private unsigned int width,
__constant float4* restrict colMatrix,
__global float* restrict gammaLut,
__constant float4* restrict gamutMatrix) {
bool lastItemOnLine = get_local_id(0) == get_local_size(0) - 1;
// 64 output pixels per workItem = 8 input luma uchar8s per work item, 8 each u & v uchar4s per work item
uint numPixels = lastItemOnLine && (0 != width % 64) ? width % 64 : 64;
uint numLoops = numPixels / 8;
uint remain = numPixels % 8;
uint pitchReads = (width + 7) / 8;
uint inOffY[2];
inOffY[0] = 8 * get_local_id(0) + pitchReads * get_group_id(0) * 2;
inOffY[1] = inOffY[0] + pitchReads;
uint inOffUV = 8 * get_local_id(0) + pitchReads * get_group_id(0);
uint outOff[2];
outOff[0] = 64 * get_local_id(0) + width * get_group_id(0) * 2;
outOff[1] = outOff[0] + width;
float4 colMatR = colMatrix[0];
float4 colMatG = colMatrix[1];
float4 colMatB = colMatrix[2];
// optimise loading of the 3x3 gamut matrix
float4 gamutMat0 = gamutMatrix[0];
float4 gamutMat1 = gamutMatrix[1];
float4 gamutMat2 = gamutMatrix[2];
float3 gamutMatR = (float3)(gamutMat0.s0, gamutMat0.s1, gamutMat0.s2);
float3 gamutMatG = (float3)(gamutMat0.s3, gamutMat1.s0, gamutMat1.s1);
float3 gamutMatB = (float3)(gamutMat1.s2, gamutMat1.s3, gamutMat2.s0);
for (uint i=0; i 0) {
uchar4 u = inputU[inOffUV];
uchar4 v = inputV[inOffUV];
for (uint l=0; l<2; ++l) {
uchar8 y = inputY[inOffY[l]];
uchar4 yuva[6];
yuva[0] = (uchar4)(y.s0, u.s0, v.s0, 1);
yuva[1] = (uchar4)(y.s1, u.s0, v.s0, 1);
if (remain > 2) {
yuva[2] = (uchar4)(y.s2, u.s1, v.s1, 1);
yuva[3] = (uchar4)(y.s3, u.s1, v.s1, 1);
if (remain > 4) {
yuva[4] = (uchar4)(y.s4, u.s2, v.s2, 1);
yuva[5] = (uchar4)(y.s5, u.s2, v.s2, 1);
}
}
for (uint p=0; p 0) {
for (uint l=0; l 2) {
y.s2 = yuv[2].s0;
y.s3 = yuv[3].s0;
u.s1 = yuv[2].s1;
v.s1 = yuv[2].s2;
if (remain > 4) {
y.s4 = yuv[4].s0;
y.s5 = yuv[5].s0;
u.s2 = yuv[4].s1;
v.s2 = yuv[4].s2;
}
}
outputY[outOffY[l]] = y;
if (l == 0) {
outputU[outOffUV] = u;
outputV[outOffUV] = v;
}
}
}
}
`
const getPitch = (width: number): number => width + 7 - ((width - 1) % 8)
const getPitchBytes = (width: number): number => getPitch(width)
export const fillBuf = (buf: Buffer, width: number, height: number): void => {
const lumaPitchBytes = getPitchBytes(width)
const chromaPitchBytes = lumaPitchBytes / 2
let lOff = 0
let uOff = lumaPitchBytes * height
let vOff = uOff + chromaPitchBytes * Math.ceil(height / 2)
buf.fill(16, lOff)
buf.fill(128, uOff) // fills both u and v
let Y0 = 16
let Y1 = 234
const Cb = 128
const Cr = 128
for (let y = 0; y < height; y += 2) {
let xlOff = 0
let xcOff = 0
for (let x = 0; x < width; x += 2) {
buf.writeUInt8(Y0, lOff + xlOff)
buf.writeUInt8(Y0 + 1, lOff + xlOff + 1)
buf.writeUInt8(Y1 + 1, lumaPitchBytes + lOff + xlOff)
buf.writeUInt8(Y1, lumaPitchBytes + lOff + xlOff + 1)
xlOff += 2
buf.writeUInt8(Cb, uOff + xcOff)
buf.writeUInt8(Cr, vOff + xcOff)
xcOff++
Y0 = 234 == Y0 ? 16 : (Y0 += 2)
Y1 = 16 == Y1 ? 234 : (Y1 -= 2)
}
lOff += lumaPitchBytes * 2
uOff += chromaPitchBytes
vOff += chromaPitchBytes
}
}
export const dumpBuf = (
buf: Buffer,
width: number,
height: number,
numLines: number,
lineEnds?: boolean
): void => {
console.log()
const lumaPitchBytes = getPitchBytes(width)
const chromaPitchBytes = lumaPitchBytes / 2
let y0LineOff = 0
let y1LineOff = 0
let uLineOff = lumaPitchBytes * height
let vLineOff = uLineOff + chromaPitchBytes * (height / 2)
const getYHex = (line: number, off: number): string =>
buf.readUInt8((line % 2 ? y1LineOff : y0LineOff) + off).toString(16)
const getUHex = (off: number): string => buf.readUInt8(uLineOff + off / 2).toString(16)
const getVHex = (off: number): string => buf.readUInt8(vLineOff + off / 2).toString(16)
const numPixels = 8
const endOffset = lineEnds ? lumaPitchBytes - numPixels : 0
for (let line = 0; line < numLines; line += 2) {
y0LineOff = lumaPitchBytes * line + endOffset
y1LineOff = lumaPitchBytes * (line + 1) + endOffset
uLineOff = lumaPitchBytes * height + chromaPitchBytes * (line / 2) + endOffset / 2
vLineOff = uLineOff + chromaPitchBytes * (height / 2) + endOffset / 2
for (let l = line; l < line + 2; ++l) {
let s = `Line ${l}:`
for (let p = 0; p < numPixels; p += 2)
s += ` ${getUHex(p)}, ${getYHex(l, p)}, ${getVHex(p)}, ${getYHex(l, p + 1)};`
console.log(s)
}
}
}
// process one image line pair per work group
const pixelsPerWorkItem = 64
export class Reader extends PackImpl {
constructor(width: number, height: number) {
super('yuv420p', width, height, yuv420pKernel, 'read')
this.numBits = 8
this.lumaBlack = 16
this.lumaWhite = 235
this.chromaRange = 224
this.isRGB = false
const lumaBytes = getPitchBytes(this.width) * this.height
this.numBytes = [lumaBytes, lumaBytes / 4, lumaBytes / 4]
this.workItemsPerGroup = Math.ceil(getPitch(this.width) / pixelsPerWorkItem)
// Halve total items as each item processes two lines
this.globalWorkItems = (this.workItemsPerGroup * this.height) / 2
}
getKernelParams(params: KernelParams): KernelParams {
const srcArray: Array = params.sources as Array
if (srcArray.length !== 3)
throw new Error(`Reader for ${this.name} requires 'sources' parameter with 3 OpenCL buffers`)
return {
inputY: srcArray[0],
inputU: srcArray[1],
inputV: srcArray[2],
output: params.dest,
width: this.width,
colMatrix: params.colMatrix,
gammaLut: params.gammaLut,
gamutMatrix: params.gamutMatrix
}
}
}
export class Writer extends PackImpl {
constructor(width: number, height: number, interlaced: boolean) {
super('yuv420p', width, height, yuv420pKernel, 'write')
this.interlaced = interlaced
this.numBits = 8
this.lumaBlack = 16
this.lumaWhite = 235
this.chromaRange = 224
this.isRGB = false
const lumaBytes = getPitchBytes(this.width) * this.height
this.numBytes = [lumaBytes, lumaBytes / 4, lumaBytes / 4]
this.workItemsPerGroup = Math.ceil(getPitch(this.width) / pixelsPerWorkItem)
// Halve total items as each item processes two lines
this.globalWorkItems = (this.workItemsPerGroup * this.height) / 2
}
getKernelParams(params: KernelParams): KernelParams {
const dstArray: Array = params.dests as Array
if (dstArray.length !== 3)
throw new Error(`Writer for ${this.name} requires 'dests' parameter with 3 OpenCL buffers`)
return {
input: params.source,
outputY: dstArray[0],
outputU: dstArray[1],
outputV: dstArray[2],
width: this.width,
interlace: this.interlaced ? (params.interlace as Interlace) : Interlace.Progressive,
colMatrix: params.colMatrix,
gammaLut: params.gammaLut
}
}
}