/* Phaneron - Clustered, accelerated and cloud-fit video server, pre-assembled and in kit form. Copyright (C) 2020 Streampunk Media Ltd. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . https://www.streampunk.media/ mailto:furnace@streampunk.media 14 Ormiscaig, Aultbea, Achnasheen, IV22 2JJ U.K. */ import { PackImpl, Interlace } from './packer' import { KernelParams, OpenCLBuffer } from 'nodencl' const yuv422p10leKernel = ` __kernel void read(__global ushort8* restrict inputY, __global ushort4* restrict inputU, __global ushort4* restrict inputV, __global float4* restrict output, __private unsigned int width, __constant float4* restrict colMatrix, __global float* restrict gammaLut, __constant float4* restrict gamutMatrix) { bool lastItemOnLine = get_local_id(0) == get_local_size(0) - 1; // 64 output pixels per workItem = 8 input luma ushort8s per work item, 8 each u & v ushort4s per work item uint numPixels = lastItemOnLine && (0 != width % 64) ? width % 64 : 64; uint numLoops = numPixels / 8; uint remain = numPixels % 8; uint pitchReads = (width + 7) / 8; uint inOff = 8 * get_local_id(0) + pitchReads * get_group_id(0); uint outOff = width * get_group_id(0) + get_local_id(0) * 64; float4 colMatR = colMatrix[0]; float4 colMatG = colMatrix[1]; float4 colMatB = colMatrix[2]; // optimise loading of the 3x3 gamut matrix float4 gamutMat0 = gamutMatrix[0]; float4 gamutMat1 = gamutMatrix[1]; float4 gamutMat2 = gamutMatrix[2]; float3 gamutMatR = (float3)(gamutMat0.s0, gamutMat0.s1, gamutMat0.s2); float3 gamutMatG = (float3)(gamutMat0.s3, gamutMat1.s0, gamutMat1.s1); float3 gamutMatB = (float3)(gamutMat1.s2, gamutMat1.s3, gamutMat2.s0); for (uint i=0; i 0) { ushort8 y = inputY[inOff]; ushort4 u = inputU[inOff]; ushort4 v = inputV[inOff]; ushort4 yuva[6]; yuva[0] = (ushort4)(y.s0, u.s0, v.s0, 1); yuva[1] = (ushort4)(y.s1, u.s0, v.s0, 1); if (remain > 2) { yuva[2] = (ushort4)(y.s2, u.s1, v.s1, 1); yuva[3] = (ushort4)(y.s3, u.s1, v.s1, 1); if (remain > 4) { yuva[4] = (ushort4)(y.s4, u.s2, v.s2, 1); yuva[5] = (ushort4)(y.s5, u.s2, v.s2, 1); } } for (uint p=0; p 0) { ushort8 y = (ushort8)(64, 64, 64, 64, 64, 64, 64, 64); ushort4 u = (ushort4)(512, 512, 512, 512); ushort4 v = (ushort4)(512, 512, 512, 512); ushort3 yuv[6]; for (uint p=0; p 2) { y.s2 = yuv[2].s0; y.s3 = yuv[3].s0; u.s1 = yuv[2].s1; v.s1 = yuv[2].s2; if (remain > 4) { y.s4 = yuv[4].s0; y.s5 = yuv[5].s0; u.s1 = yuv[4].s1; v.s1 = yuv[4].s2; } } outputY[outOff] = y; outputU[outOff] = u; outputV[outOff] = v; } } ` const getPitch = (width: number): number => width + 7 - ((width - 1) % 8) const getPitchBytes = (width: number): number => getPitch(width) * 2 export const fillBuf = (buf: Buffer, width: number, height: number): void => { const lumaPitchBytes = getPitchBytes(width) const chromaPitchBytes = lumaPitchBytes / 2 let lOff = 0 let uOff = lumaPitchBytes * height let vOff = uOff + chromaPitchBytes * height for (let i = 0; i < lumaPitchBytes * height; i += 2) buf.writeUInt16LE(64, lOff + i) for (let i = 0; i < chromaPitchBytes * height; i += 2) buf.writeUInt16LE(512, uOff + i) for (let i = 0; i < chromaPitchBytes * height; i += 2) buf.writeUInt16LE(512, vOff + i) let Y = 64 const Cb = 512 const Cr = 512 for (let y = 0; y < height; ++y) { let xlOff = 0 let xcOff = 0 for (let x = 0; x < width; x += 2) { buf.writeUInt16LE(Y, lOff + xlOff) buf.writeUInt16LE(Y + 1, lOff + xlOff + 2) xlOff += 4 buf.writeUInt16LE(Cb, uOff + xcOff) buf.writeUInt16LE(Cr, vOff + xcOff) xcOff += 2 Y = 938 == Y ? 64 : (Y += 2) } lOff += lumaPitchBytes uOff += chromaPitchBytes vOff += chromaPitchBytes } } export const dumpBuf = ( buf: Buffer, width: number, height: number, numLines: number, lineEnds?: boolean ): void => { console.log() const lumaPitchBytes = getPitchBytes(width) const chromaPitchBytes = lumaPitchBytes / 2 let yLineOff = 0 let uLineOff = lumaPitchBytes * height let vLineOff = uLineOff + chromaPitchBytes * height const readHex = (off: number, numDigits: number): string => buf.readUInt16LE(off).toString(16).padStart(numDigits, '0') const getYHex = (off: number): string => readHex(yLineOff + off * 2, 3) const getUHex = (off: number): string => readHex(uLineOff + off, 3) const getVHex = (off: number): string => readHex(vLineOff + off, 3) const numPixels = 8 const endOffset = lineEnds ? lumaPitchBytes - numPixels * 2 : 0 for (let l = 0; l < numLines; ++l) { yLineOff = lumaPitchBytes * l + endOffset uLineOff = yLineOff + lumaPitchBytes * height + endOffset / 2 vLineOff = uLineOff + chromaPitchBytes * height + endOffset / 2 let s = `Line ${l}:` for (let p = 0; p < numPixels; p += 2) s += ` ${getUHex(p)}, ${getYHex(p)}, ${getVHex(p)}, ${getYHex(p + 1)};` console.log(s) } } // process one image line per work group const pixelsPerWorkItem = 64 export class Reader extends PackImpl { constructor(width: number, height: number) { super('yuv422p10le', width, height, yuv422p10leKernel, 'read') this.numBits = 10 this.lumaBlack = 64 this.lumaWhite = 940 this.chromaRange = 896 this.isRGB = false const lumaBytes = getPitchBytes(this.width) * this.height this.numBytes = [lumaBytes, lumaBytes / 2, lumaBytes / 2] this.workItemsPerGroup = Math.ceil(getPitch(this.width) / pixelsPerWorkItem) this.globalWorkItems = this.workItemsPerGroup * this.height } getKernelParams(params: KernelParams): KernelParams { const srcArray: Array = params.sources as Array if (srcArray.length !== 3) throw new Error(`Reader for ${this.name} requires sources parameter with 3 OpenCL buffers`) return { inputY: srcArray[0], inputU: srcArray[1], inputV: srcArray[2], output: params.dest, width: this.width, colMatrix: params.colMatrix, gammaLut: params.gammaLut, gamutMatrix: params.gamutMatrix } } } export class Writer extends PackImpl { constructor(width: number, height: number, interlaced: boolean) { super('yuv422p10le', width, height, yuv422p10leKernel, 'write') this.interlaced = interlaced this.numBits = 10 this.lumaBlack = 64 this.lumaWhite = 940 this.chromaRange = 896 this.isRGB = false const lumaBytes = getPitchBytes(this.width) * this.height this.numBytes = [lumaBytes, lumaBytes / 2, lumaBytes / 2] this.workItemsPerGroup = Math.ceil(getPitch(this.width) / pixelsPerWorkItem) this.globalWorkItems = (this.workItemsPerGroup * this.height) / (this.interlaced ? 2 : 1) } getKernelParams(params: KernelParams): KernelParams { const dstArray: Array = params.dests as Array if (dstArray.length !== 3) throw new Error(`Writer for ${this.name} requires dests parameter with 3 OpenCL buffers`) return { input: params.source, outputY: dstArray[0], outputU: dstArray[1], outputV: dstArray[2], width: this.width, interlace: this.interlaced ? (params.interlace as Interlace) : Interlace.Progressive, colMatrix: params.colMatrix, gammaLut: params.gammaLut } } }