/*
Phaneron - Clustered, accelerated and cloud-fit video server, pre-assembled and in kit form.
Copyright (C) 2020 Streampunk Media Ltd.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
https://www.streampunk.media/ mailto:furnace@streampunk.media
14 Ormiscaig, Aultbea, Achnasheen, IV22 2JJ U.K.
*/
import { PackImpl, Interlace } from './packer'
import { KernelParams, OpenCLBuffer } from 'nodencl'
const yuv422p10leKernel = `
__kernel void read(__global ushort8* restrict inputY,
__global ushort4* restrict inputU,
__global ushort4* restrict inputV,
__global float4* restrict output,
__private unsigned int width,
__constant float4* restrict colMatrix,
__global float* restrict gammaLut,
__constant float4* restrict gamutMatrix) {
bool lastItemOnLine = get_local_id(0) == get_local_size(0) - 1;
// 64 output pixels per workItem = 8 input luma ushort8s per work item, 8 each u & v ushort4s per work item
uint numPixels = lastItemOnLine && (0 != width % 64) ? width % 64 : 64;
uint numLoops = numPixels / 8;
uint remain = numPixels % 8;
uint pitchReads = (width + 7) / 8;
uint inOff = 8 * get_local_id(0) + pitchReads * get_group_id(0);
uint outOff = width * get_group_id(0) + get_local_id(0) * 64;
float4 colMatR = colMatrix[0];
float4 colMatG = colMatrix[1];
float4 colMatB = colMatrix[2];
// optimise loading of the 3x3 gamut matrix
float4 gamutMat0 = gamutMatrix[0];
float4 gamutMat1 = gamutMatrix[1];
float4 gamutMat2 = gamutMatrix[2];
float3 gamutMatR = (float3)(gamutMat0.s0, gamutMat0.s1, gamutMat0.s2);
float3 gamutMatG = (float3)(gamutMat0.s3, gamutMat1.s0, gamutMat1.s1);
float3 gamutMatB = (float3)(gamutMat1.s2, gamutMat1.s3, gamutMat2.s0);
for (uint i=0; i 0) {
ushort8 y = inputY[inOff];
ushort4 u = inputU[inOff];
ushort4 v = inputV[inOff];
ushort4 yuva[6];
yuva[0] = (ushort4)(y.s0, u.s0, v.s0, 1);
yuva[1] = (ushort4)(y.s1, u.s0, v.s0, 1);
if (remain > 2) {
yuva[2] = (ushort4)(y.s2, u.s1, v.s1, 1);
yuva[3] = (ushort4)(y.s3, u.s1, v.s1, 1);
if (remain > 4) {
yuva[4] = (ushort4)(y.s4, u.s2, v.s2, 1);
yuva[5] = (ushort4)(y.s5, u.s2, v.s2, 1);
}
}
for (uint p=0; p 0) {
ushort8 y = (ushort8)(64, 64, 64, 64, 64, 64, 64, 64);
ushort4 u = (ushort4)(512, 512, 512, 512);
ushort4 v = (ushort4)(512, 512, 512, 512);
ushort3 yuv[6];
for (uint p=0; p 2) {
y.s2 = yuv[2].s0;
y.s3 = yuv[3].s0;
u.s1 = yuv[2].s1;
v.s1 = yuv[2].s2;
if (remain > 4) {
y.s4 = yuv[4].s0;
y.s5 = yuv[5].s0;
u.s1 = yuv[4].s1;
v.s1 = yuv[4].s2;
}
}
outputY[outOff] = y;
outputU[outOff] = u;
outputV[outOff] = v;
}
}
`
const getPitch = (width: number): number => width + 7 - ((width - 1) % 8)
const getPitchBytes = (width: number): number => getPitch(width) * 2
export const fillBuf = (buf: Buffer, width: number, height: number): void => {
const lumaPitchBytes = getPitchBytes(width)
const chromaPitchBytes = lumaPitchBytes / 2
let lOff = 0
let uOff = lumaPitchBytes * height
let vOff = uOff + chromaPitchBytes * height
for (let i = 0; i < lumaPitchBytes * height; i += 2) buf.writeUInt16LE(64, lOff + i)
for (let i = 0; i < chromaPitchBytes * height; i += 2) buf.writeUInt16LE(512, uOff + i)
for (let i = 0; i < chromaPitchBytes * height; i += 2) buf.writeUInt16LE(512, vOff + i)
let Y = 64
const Cb = 512
const Cr = 512
for (let y = 0; y < height; ++y) {
let xlOff = 0
let xcOff = 0
for (let x = 0; x < width; x += 2) {
buf.writeUInt16LE(Y, lOff + xlOff)
buf.writeUInt16LE(Y + 1, lOff + xlOff + 2)
xlOff += 4
buf.writeUInt16LE(Cb, uOff + xcOff)
buf.writeUInt16LE(Cr, vOff + xcOff)
xcOff += 2
Y = 938 == Y ? 64 : (Y += 2)
}
lOff += lumaPitchBytes
uOff += chromaPitchBytes
vOff += chromaPitchBytes
}
}
export const dumpBuf = (
buf: Buffer,
width: number,
height: number,
numLines: number,
lineEnds?: boolean
): void => {
console.log()
const lumaPitchBytes = getPitchBytes(width)
const chromaPitchBytes = lumaPitchBytes / 2
let yLineOff = 0
let uLineOff = lumaPitchBytes * height
let vLineOff = uLineOff + chromaPitchBytes * height
const readHex = (off: number, numDigits: number): string =>
buf.readUInt16LE(off).toString(16).padStart(numDigits, '0')
const getYHex = (off: number): string => readHex(yLineOff + off * 2, 3)
const getUHex = (off: number): string => readHex(uLineOff + off, 3)
const getVHex = (off: number): string => readHex(vLineOff + off, 3)
const numPixels = 8
const endOffset = lineEnds ? lumaPitchBytes - numPixels * 2 : 0
for (let l = 0; l < numLines; ++l) {
yLineOff = lumaPitchBytes * l + endOffset
uLineOff = yLineOff + lumaPitchBytes * height + endOffset / 2
vLineOff = uLineOff + chromaPitchBytes * height + endOffset / 2
let s = `Line ${l}:`
for (let p = 0; p < numPixels; p += 2)
s += ` ${getUHex(p)}, ${getYHex(p)}, ${getVHex(p)}, ${getYHex(p + 1)};`
console.log(s)
}
}
// process one image line per work group
const pixelsPerWorkItem = 64
export class Reader extends PackImpl {
constructor(width: number, height: number) {
super('yuv422p10le', width, height, yuv422p10leKernel, 'read')
this.numBits = 10
this.lumaBlack = 64
this.lumaWhite = 940
this.chromaRange = 896
this.isRGB = false
const lumaBytes = getPitchBytes(this.width) * this.height
this.numBytes = [lumaBytes, lumaBytes / 2, lumaBytes / 2]
this.workItemsPerGroup = Math.ceil(getPitch(this.width) / pixelsPerWorkItem)
this.globalWorkItems = this.workItemsPerGroup * this.height
}
getKernelParams(params: KernelParams): KernelParams {
const srcArray: Array = params.sources as Array
if (srcArray.length !== 3)
throw new Error(`Reader for ${this.name} requires sources parameter with 3 OpenCL buffers`)
return {
inputY: srcArray[0],
inputU: srcArray[1],
inputV: srcArray[2],
output: params.dest,
width: this.width,
colMatrix: params.colMatrix,
gammaLut: params.gammaLut,
gamutMatrix: params.gamutMatrix
}
}
}
export class Writer extends PackImpl {
constructor(width: number, height: number, interlaced: boolean) {
super('yuv422p10le', width, height, yuv422p10leKernel, 'write')
this.interlaced = interlaced
this.numBits = 10
this.lumaBlack = 64
this.lumaWhite = 940
this.chromaRange = 896
this.isRGB = false
const lumaBytes = getPitchBytes(this.width) * this.height
this.numBytes = [lumaBytes, lumaBytes / 2, lumaBytes / 2]
this.workItemsPerGroup = Math.ceil(getPitch(this.width) / pixelsPerWorkItem)
this.globalWorkItems = (this.workItemsPerGroup * this.height) / (this.interlaced ? 2 : 1)
}
getKernelParams(params: KernelParams): KernelParams {
const dstArray: Array = params.dests as Array
if (dstArray.length !== 3)
throw new Error(`Writer for ${this.name} requires dests parameter with 3 OpenCL buffers`)
return {
input: params.source,
outputY: dstArray[0],
outputU: dstArray[1],
outputV: dstArray[2],
width: this.width,
interlace: this.interlaced ? (params.interlace as Interlace) : Interlace.Progressive,
colMatrix: params.colMatrix,
gammaLut: params.gammaLut
}
}
}