Tensor <--> image - Adding per channel compute for Norm mean & Bias (#14705)

### Description
Enabled the use of per channel Bias and Mean normalization when converting an image <--> tensor.
Added a few bug fixes and updates to the relevant E2E tests.

---------

Co-authored-by: shalvamist <shalva.mist@microsoft.com>
This commit is contained in:
shalvamist 2023-05-01 09:37:50 -07:00 committed by GitHub
parent 0770cf3699
commit c10a6a9d17
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 247 additions and 87 deletions

View file

@ -192,21 +192,26 @@ export class Tensor implements TensorInterface {
if (options.height === undefined || options.width === undefined) {
throw new Error('Image height and width must be defined');
}
if (options.tensorLayout === 'NHWC') {
throw new Error('NHWC Tensor layout is not supported yet');
}
const {height, width} = options;
const norm = options.norm;
let normMean: number;
let normBias: number;
if (norm === undefined || norm.mean === undefined) {
normMean = 255;
const norm = options.norm ?? {mean: 255, bias: 0};
let normMean: [number, number, number, number];
let normBias: [number, number, number, number];
if (typeof (norm.mean) === 'number') {
normMean = [norm.mean, norm.mean, norm.mean, norm.mean];
} else {
normMean = norm.mean;
normMean = [norm.mean![0], norm.mean![1], norm.mean![2], norm.mean![3] ?? 255];
}
if (norm === undefined || norm.bias === undefined) {
normBias = 0;
if (typeof (norm.bias) === 'number') {
normBias = [norm.bias, norm.bias, norm.bias, norm.bias];
} else {
normBias = norm.bias;
normBias = [norm.bias![0], norm.bias![1], norm.bias![2], norm.bias![3] ?? 0];
}
const inputformat = options.bitmapFormat !== undefined ? options.bitmapFormat : 'RGBA';
@ -215,12 +220,12 @@ export class Tensor implements TensorInterface {
const outputformat = options.tensorFormat !== undefined ?
(options.tensorFormat !== undefined ? options.tensorFormat : 'RGB') :
'RGB';
const offset = height * width;
const float32Data = outputformat === 'RGBA' ? new Float32Array(offset * 4) : new Float32Array(offset * 3);
const stride = height * width;
const float32Data = outputformat === 'RGBA' ? new Float32Array(stride * 4) : new Float32Array(stride * 3);
// Default pointer assignments
let step = 4, rImagePointer = 0, gImagePointer = 1, bImagePointer = 2, aImagePointer = 3;
let rTensorPointer = 0, gTensorPointer = offset, bTensorPointer = offset * 2, aTensorPointer = -1;
let rTensorPointer = 0, gTensorPointer = stride, bTensorPointer = stride * 2, aTensorPointer = -1;
// Updating the pointer assignments based on the input image format
if (inputformat === 'RGB') {
@ -233,24 +238,24 @@ export class Tensor implements TensorInterface {
// Updating the pointer assignments based on the output tensor format
if (outputformat === 'RGBA') {
aTensorPointer = offset * 3;
aTensorPointer = stride * 3;
} else if (outputformat === 'RBG') {
rTensorPointer = 0;
bTensorPointer = offset;
gTensorPointer = offset * 2;
bTensorPointer = stride;
gTensorPointer = stride * 2;
} else if (outputformat === 'BGR') {
bTensorPointer = 0;
gTensorPointer = offset;
rTensorPointer = offset * 2;
gTensorPointer = stride;
rTensorPointer = stride * 2;
}
for (let i = 0; i < offset;
for (let i = 0; i < stride;
i++, rImagePointer += step, bImagePointer += step, gImagePointer += step, aImagePointer += step) {
float32Data[rTensorPointer++] = (buffer[rImagePointer] + normBias) / normMean;
float32Data[gTensorPointer++] = (buffer[gImagePointer] + normBias) / normMean;
float32Data[bTensorPointer++] = (buffer[bImagePointer] + normBias) / normMean;
float32Data[rTensorPointer++] = (buffer[rImagePointer] + normBias[0]) / normMean[0];
float32Data[gTensorPointer++] = (buffer[gImagePointer] + normBias[1]) / normMean[1];
float32Data[bTensorPointer++] = (buffer[bImagePointer] + normBias[2]) / normMean[2];
if (aTensorPointer !== -1 && aImagePointer !== -1) {
float32Data[aTensorPointer++] = (buffer[aImagePointer] + normBias) / normMean;
float32Data[aTensorPointer++] = (buffer[aImagePointer] + normBias[3]) / normMean[3];
}
}
@ -264,7 +269,7 @@ export class Tensor implements TensorInterface {
static async fromImage(imageData: ImageData, options?: TensorFromImageOptions): Promise<Tensor>;
static async fromImage(imageElement: HTMLImageElement, options?: TensorFromImageOptions): Promise<Tensor>;
static async fromImage(bitmap: ImageBitmap, options: TensorFromImageOptions): Promise<Tensor>;
static async fromImage(url: string, options?: TensorFromImageOptions): Promise<Tensor>;
static async fromImage(urlSource: string, options?: TensorFromImageOptions): Promise<Tensor>;
static async fromImage(image: ImageData|HTMLImageElement|ImageBitmap|string, options?: TensorFromImageOptions):
Promise<Tensor> {
@ -272,10 +277,10 @@ export class Tensor implements TensorInterface {
const isHTMLImageEle = typeof (HTMLImageElement) !== 'undefined' && image instanceof HTMLImageElement;
const isImageDataEle = typeof (ImageData) !== 'undefined' && image instanceof ImageData;
const isImageBitmap = typeof (ImageBitmap) !== 'undefined' && image instanceof ImageBitmap;
const isURL = typeof (String) !== 'undefined' && (image instanceof String || typeof image === 'string');
const isString = typeof image === 'string';
let data: Uint8ClampedArray|undefined;
let tensorConfig: TensorFromImageOptions = {};
let tensorConfig: TensorFromImageOptions = options ?? {};
// filling and checking image configuration options
if (isHTMLImageEle) {
@ -406,7 +411,7 @@ export class Tensor implements TensorInterface {
throw new Error('Can not access image data');
}
} else if (isURL) {
} else if (isString) {
return new Promise((resolve, reject) => {
const canvas = document.createElement('canvas');
const context = canvas.getContext('2d');
@ -415,22 +420,20 @@ export class Tensor implements TensorInterface {
}
const newImage = new Image();
newImage.crossOrigin = 'Anonymous';
newImage.src = image as string;
newImage.src = image;
newImage.onload = () => {
canvas.width = newImage.width;
canvas.height = newImage.height;
context.drawImage(newImage, 0, 0, canvas.width, canvas.height);
const img = context.getImageData(0, 0, canvas.width, canvas.height);
if (options !== undefined) {
// using square brackets to avoid TS error - type 'never'
if (options.height !== undefined && options.height !== canvas.height) {
throw new Error('Image input config height doesn\'t match ImageBitmap height');
throw new Error('Image input config height doesn\'t match height');
} else {
tensorConfig.height = canvas.height;
}
// using square brackets to avoid TS error - type 'never'
if (options.width !== undefined && options.width !== canvas.width) {
throw new Error('Image input config width doesn\'t match ImageBitmap width');
throw new Error('Image input config width doesn\'t match width');
} else {
tensorConfig.width = canvas.width;
}
@ -452,20 +455,141 @@ export class Tensor implements TensorInterface {
}
}
toDataURL(options?: TensorToImageDataOptions): string {
const canvas = document.createElement('canvas');
canvas.width = this.dims[3];
canvas.height = this.dims[2];
const pixels2DContext = canvas.getContext('2d');
if (pixels2DContext != null) {
// Default values for height and width & format
let width: number;
let height: number;
if (options?.tensorLayout !== undefined && options.tensorLayout === 'NHWC') {
width = this.dims[2];
height = this.dims[3];
} else { // Default layout is NCWH
width = this.dims[3];
height = this.dims[2];
}
const inputformat = options?.format !== undefined ? options.format : 'RGB';
const norm = options?.norm;
let normMean: [number, number, number, number];
let normBias: [number, number, number, number];
if (norm === undefined || norm.mean === undefined) {
normMean = [255, 255, 255, 255];
} else {
if (typeof (norm.mean) === 'number') {
normMean = [norm.mean, norm.mean, norm.mean, norm.mean];
} else {
normMean = [norm.mean[0], norm.mean[1], norm.mean[2], 0];
if (norm.mean[3] !== undefined) {
normMean[3] = norm.mean[3];
}
}
}
if (norm === undefined || norm.bias === undefined) {
normBias = [0, 0, 0, 0];
} else {
if (typeof (norm.bias) === 'number') {
normBias = [norm.bias, norm.bias, norm.bias, norm.bias];
} else {
normBias = [norm.bias[0], norm.bias[1], norm.bias[2], 0];
if (norm.bias[3] !== undefined) {
normBias[3] = norm.bias[3];
}
}
}
const stride = height * width;
// Default pointer assignments
let rTensorPointer = 0, gTensorPointer = stride, bTensorPointer = stride * 2, aTensorPointer = -1;
// Updating the pointer assignments based on the input image format
if (inputformat === 'RGBA') {
rTensorPointer = 0;
gTensorPointer = stride;
bTensorPointer = stride * 2;
aTensorPointer = stride * 3;
} else if (inputformat === 'RGB') {
rTensorPointer = 0;
gTensorPointer = stride;
bTensorPointer = stride * 2;
} else if (inputformat === 'RBG') {
rTensorPointer = 0;
bTensorPointer = stride;
gTensorPointer = stride * 2;
}
for (let i = 0; i < height; i++) {
for (let j = 0; j < width; j++) {
const R = ((this.data[rTensorPointer++] as number) - normBias[0]) * normMean[0]; // R value
const G = ((this.data[gTensorPointer++] as number) - normBias[1]) * normMean[1]; // G value
const B = ((this.data[bTensorPointer++] as number) - normBias[2]) * normMean[2]; // B value
const A = aTensorPointer === -1 ?
255 :
((this.data[aTensorPointer++] as number) - normBias[3]) * normMean[3]; // A value
// eslint-disable-next-line @typescript-eslint/restrict-plus-operands
pixels2DContext.fillStyle = 'rgba(' + R + ',' + G + ',' + B + ',' + A + ')';
pixels2DContext.fillRect(j, i, 1, 1);
}
}
return canvas.toDataURL();
} else {
throw new Error('Can not access image data');
}
}
toImageData(options?: TensorToImageDataOptions): ImageData {
const pixels2DContext = document.createElement('canvas').getContext('2d');
let image: ImageData;
if (pixels2DContext != null) {
// Default values for height and width & format
const width = this.dims[3];
const height = this.dims[2];
const channels = this.dims[1];
let width: number;
let height: number;
let channels: number;
if (options?.tensorLayout !== undefined && options.tensorLayout === 'NHWC') {
width = this.dims[2];
height = this.dims[1];
channels = this.dims[3];
} else { // Default layout is NCWH
width = this.dims[3];
height = this.dims[2];
channels = this.dims[1];
}
const inputformat = options !== undefined ? (options.format !== undefined ? options.format : 'RGB') : 'RGB';
const normMean = options !== undefined ? (options.norm?.mean !== undefined ? options.norm.mean : 255) : 255;
const normBias = options !== undefined ? (options.norm?.bias !== undefined ? options.norm.bias : 0) : 0;
const offset = height * width;
const norm = options?.norm;
let normMean: [number, number, number, number];
let normBias: [number, number, number, number];
if (norm === undefined || norm.mean === undefined) {
normMean = [255, 255, 255, 255];
} else {
if (typeof (norm.mean) === 'number') {
normMean = [norm.mean, norm.mean, norm.mean, norm.mean];
} else {
normMean = [norm.mean[0], norm.mean[1], norm.mean[2], 255];
if (norm.mean[3] !== undefined) {
normMean[3] = norm.mean[3];
}
}
}
if (norm === undefined || norm.bias === undefined) {
normBias = [0, 0, 0, 0];
} else {
if (typeof (norm.bias) === 'number') {
normBias = [norm.bias, norm.bias, norm.bias, norm.bias];
} else {
normBias = [norm.bias[0], norm.bias[1], norm.bias[2], 0];
if (norm.bias[3] !== undefined) {
normBias[3] = norm.bias[3];
}
}
}
const stride = height * width;
if (options !== undefined) {
if (options.height !== undefined && options.height !== height) {
throw new Error('Image output config height doesn\'t match tensor height');
@ -482,33 +606,34 @@ export class Tensor implements TensorInterface {
// Default pointer assignments
const step = 4;
let rImagePointer = 0, gImagePointer = 1, bImagePointer = 2, aImagePointer = 3;
let rTensorPointer = 0, gTensorPointer = offset, bTensorPointer = offset * 2, aTensorPointer = -1;
let rTensorPointer = 0, gTensorPointer = stride, bTensorPointer = stride * 2, aTensorPointer = -1;
// Updating the pointer assignments based on the input image format
if (inputformat === 'RGBA') {
rTensorPointer = 0;
gTensorPointer = offset;
bTensorPointer = offset * 2;
aTensorPointer = offset * 3;
gTensorPointer = stride;
bTensorPointer = stride * 2;
aTensorPointer = stride * 3;
} else if (inputformat === 'RGB') {
rTensorPointer = 0;
gTensorPointer = offset;
bTensorPointer = offset * 2;
gTensorPointer = stride;
bTensorPointer = stride * 2;
} else if (inputformat === 'RBG') {
rTensorPointer = 0;
bTensorPointer = offset;
gTensorPointer = offset * 2;
bTensorPointer = stride;
gTensorPointer = stride * 2;
}
image = pixels2DContext.createImageData(width, height);
for (let i = 0; i < height * width;
rImagePointer += step, gImagePointer += step, bImagePointer += step, aImagePointer += step, i++) {
image.data[rImagePointer] = ((this.data[rTensorPointer++] as number) - normBias) * normMean; // R value
image.data[gImagePointer] = ((this.data[gTensorPointer++] as number) - normBias) * normMean; // G value
image.data[bImagePointer] = ((this.data[bTensorPointer++] as number) - normBias) * normMean; // B value
image.data[aImagePointer] =
aTensorPointer === -1 ? 255 : ((this.data[aTensorPointer++] as number) - normBias) * normMean; // A value
image.data[rImagePointer] = ((this.data[rTensorPointer++] as number) - normBias[0]) * normMean[0]; // R value
image.data[gImagePointer] = ((this.data[gTensorPointer++] as number) - normBias[1]) * normMean[1]; // G value
image.data[bImagePointer] = ((this.data[bTensorPointer++] as number) - normBias[2]) * normMean[2]; // B value
image.data[aImagePointer] = aTensorPointer === -1 ?
255 :
((this.data[aTensorPointer++] as number) - normBias[3]) * normMean[3]; // A value
}
} else {

View file

@ -21,6 +21,15 @@ export interface TypedShapeUtils<T extends Tensor.Type> {
// TODO: add more tensor utilities
export interface TypedTensorUtils<T extends Tensor.Type> extends Properties, TypedShapeUtils<T> {
/**
* creates an DataURL instance from tensor
*
* @param options - Interface describing tensor instance - Defaults: RGB, 3 channels, 0-255, NHWC
* 0-255, NHWC
* @returns An DataURL instance which can be used to draw on canvas
*/
toDataURL(options?: TensorToImageDataOptions): string;
/**
* creates an ImageData instance from tensor
*

View file

@ -261,10 +261,12 @@ export interface TensorToImageDataOptions {
width?: number;
/**
* Describes normalization parameters to ImageData conversion from tensor - default values - Bias: 0, Mean: 255
* Supports computation base on single parameter (extended to all channels) up to value per channel
* Example - tesnor.toImageData({norm:{bias:[2/5,3/6,9/17,5/8],mean:[5,6,17,8]}})
*/
norm?: {
bias?: number; // Todo add support - |[number,number,number]|[number,number,number,number];
mean?: number; // Todo add support - |[number,number,number]|[number,number,number,number];
bias?: number|[number, number, number]|[number, number, number, number];
mean?: number | [number, number, number] | [number, number, number, number];
};
}
/**
@ -284,7 +286,7 @@ export interface TensorFromImageOptions {
*/
dataType?: 'float32'|'uint8';
/**
* Tensor channel layout - default is 'NHWC'
* Tensor channel layout - default is 'NCHW' - TODO: add support for 'NHWC'
*/
tensorLayout?: 'NHWC'|'NCHW';
/**
@ -305,10 +307,12 @@ export interface TensorFromImageOptions {
resizedWidth?: number;
/**
* Describes normalization parameters to tensor conversion from image data - default values - Bias: 0, Mean: 255
* Supports computation base on single parameter (extended to all channels) up to value per channel
* Example - Tensor.fromImage(img, {norm:{bias:[2,3,9,5],mean:[5,6,17,8]}});
*/
norm?: {
bias?: number; // Todo add support - |[number,number,number]|[number,number,number,number];
mean?: number; // Todo add support - |[number,number,number]|[number,number,number,number];
bias?: number|[number, number, number]|[number, number, number, number];
mean?: number | [number, number, number] | [number, number, number, number];
};
}
export interface TensorFactory {
@ -335,12 +339,12 @@ export interface TensorFactory {
/**
* create a tensor from image object - HTMLImageElement, ImageData, ImageBitmap, URL
*
* @param url - {string} - Assuming the string is a URL to an image
* @param urlSource - {string} - Assuming the string is a URL to an image or Data URL
* @param options - Optional - Interface describing input image & output tensor -
* Input Defaults: RGBA, 3 channels, 0-255, NHWC - Output Defaults: same as input parameters
* @returns A promise that resolves to a tensor object
*/
fromImage(url: string, options?: TensorFromImageOptions): Promise<Tensor>;
fromImage(urlSource: string, options?: TensorFromImageOptions): Promise<Tensor>;
/**
* create a tensor from image object - HTMLImageElement, ImageData, ImageBitmap, URL

View file

@ -3,63 +3,85 @@
'use strict';
const IMAGE_HEIGHT = 20
const IMAGE_WIDTH = 15
function getRndColor() {
let r = 255*Math.random()|0,
g = 255*Math.random()|0,
b = 255*Math.random()|0;
return 'rgb(' + r + ',' + g + ',' + b + ')';
b = 255*Math.random()|0,
a = 255*Math.random()|0;
return 'rgb(' + r + ',' + g + ',' + b + ',' + a +')';
}
function compareTensors(tensorA, tensorB, msg){
for (let i = 0; i < IMAGE_HEIGHT*IMAGE_WIDTH*3; i++) {
if(tensorA.data[i]!==tensorB.data[i]){
console.log("Element - " + i + " - " + tensorA.data[i] + " - " + tensorB.data[i]);
throw new Error(msg);
}
}
}
it('Browser E2E testing - Tensor <--> Image E2E test', async function () {
// Creating Image HTML Image Element
let img = document.createElement('img');
let img = new Image();
img.crossOrigin = 'Anonymous';
// Creating canvas element
const canvas = document.createElement('canvas');
canvas.height = 200;
canvas.width = 200;
canvas.height = IMAGE_HEIGHT;
canvas.width = IMAGE_WIDTH;
const context = canvas.getContext('2d');
let y, x;
for(y = 0; y < 200; y++) {
for(x = 0; x < 200; x++) {
// Filling the canvas with random data
for(y = 0; y < IMAGE_HEIGHT; y++) {
for(x = 0; x < IMAGE_WIDTH; x++) {
context.fillStyle = getRndColor();
context.fillRect(x, y, 1, 1);
}
}
img = canvas.toDataURL();
// Copying the canavas data to the image
img.src = canvas.toDataURL();
// Image HTML element to tensor API
const inputTensorHTML = await ort.Tensor.fromImage(img);
// Tensor to ImageDAta API
let newImage = inputTensorHTML.toImageData();
// ImageData to tensor API
let inputTensorImageData = await ort.Tensor.fromImage(newImage);
// Testing HTML Image Element --> Tensor --> ImageData --> Tensor
img.onload = async () =>{
// Image HTML element to tensor API - HTML
const inputTensorHTML = await ort.Tensor.fromImage(img, {norm:{bias:[2,3,9,5],mean:[5,6,17,8]}});
// Tensor to ImageDAta API
let newImage = inputTensorHTML.toImageData({norm:{bias:[2/5,3/6,9/17,5/8],mean:[5,6,17,8]}});
// ImageData to tensor API
let inputTensorImageData = await ort.Tensor.fromImage(newImage, options={norm:{bias:[2,3,9,5],mean:[5,6,17,8]}});
for (let i = 0; i < newImage.height*newImage.width*3; i++) {
if(inputTensorImageData.data[i]!==inputTensorHTML.data[i]){
console.log("Element - " + i + " - " + inputTensorHTML.data[i] + " - " + inputTensorImageData.data[i]);
throw new Error('BUG in ImageData & URL');
}
compareTensors(inputTensorHTML,inputTensorImageData,'BUG in HTML image element & ImageData use case');
}
let online = navigator.onLine;
// Copying the canavas data to the image as Data URL
let image = canvas.toDataURL();
// Testing Data URL --> Tensor --> Data URL --> Tensor
// Data URL to tensor API -
const inputTensorDataURL = await ort.Tensor.fromImage(image,{format:'RBG', norm:{bias:[1,10,5,0],mean:[5,7,11,0]}});
// Tensor to ImageDAta API
let newImage = inputTensorDataURL.toDataURL({norm:{bias:[1/5,10/7,5/11,0],mean:[5,7,11,0]}});
// ImageData to tensor API
let inputTensorImageData = await ort.Tensor.fromImage(newImage,{format:'RGB', norm:{bias:[1,10,5,0],mean:[5,7,11,0]}});
compareTensors(inputTensorDataURL,inputTensorImageData,'BUG in ImageData & Data URL use case');
// Testing URL --> Tensor --> ImageData --> Tensor
let online = navigator.onLine;
if(online){
// URL element to tensor API
const inputTensorURL = await ort.Tensor.fromImage('https://media.istockphoto.com/id/172859087/photo/square-eggs.jpg?s=2048x2048&w=is&k=20&c=KiBRyyYaoUUSjcJLBh1-qqVu7LW6UQZBopZdva0f5e4=');
const inputTensorURL = await ort.Tensor.fromImage('https://media.istockphoto.com/id/172859087/photo/square-eggs.jpg?s=2048x2048&w=is&k=20&c=KiBRyyYaoUUSjcJLBh1-qqVu7LW6UQZBopZdva0f5e4=',{norm:{bias:[2,3,9,0],mean:[5,6,17,0]}});
// Tensor to ImageDAta API
newImage = inputTensorURL.toImageData();
let newImage = inputTensorURL.toImageData({format:'RGB',norm:{bias:[2/5,3/6,9/17,0],mean:[5,6,17,0]}});
// ImageData to tensor API
inputTensorImageData = await ort.Tensor.fromImage(newImage);
let inputTensorImageData = await ort.Tensor.fromImage(newImage,{format:'RGB',norm:{bias:[2,3,9,0],mean:[5,6,17,0]}});
for (let i = 0; i < newImage.height*newImage.width*3; i++) {
if(inputTensorURL.data[i]!==inputTensorImageData.data[i]){
console.log("Element - " + i + " - " + inputTensorURL.data[i] + " - " + inputTensorImageData.data[i]);
throw new Error('BUG in ImageData & URL');
}
}
compareTensors(inputTensorURL,inputTensorImageData,'BUG in ImageData & URL');
}else{
console.log("No internet connection - didn't test Image URL to tensor API");
}