[ { "name": "MultiHeadAttention Basic, one head", "operator": "MultiHeadAttention", "opset": { "domain": "com.microsoft", "version": 1 }, "attributes": [{ "name": "num_heads", "data": 1, "type": "int" }], "cases": [ { "name": "T[0]", "inputs": [ { "data": [1, 2, 3, 4, 5, 6, 7, 8], "dims": [1, 2, 4], "type": "float32" }, { "data": [1, 1, 1, 1, 2, 2, 2, 2], "dims": [1, 2, 4], "type": "float32" }, { "data": [1, 2, 3, 4, 5, 6, 7, 8], "dims": [1, 2, 4], "type": "float32" } ], "outputs": [ { "data": [ 4.973228454589844, 5.973228454589844, 6.973228454589844, 7.973228454589844, 4.999990940093994, 5.999990940093994, 6.999990940093994, 7.999990940093994 ], "dims": [1, 2, 4], "type": "float32" } ] } ] }, { "name": "MultiHeadAttention Basic", "operator": "MultiHeadAttention", "opset": { "domain": "com.microsoft", "version": 1 }, "attributes": [{ "name": "num_heads", "data": 2, "type": "int" }], "cases": [ { "name": "T[0]", "inputs": [ { "data": [1, 2, 3, 4, 5, 6, 7, 8], "dims": [1, 2, 4], "type": "float32" }, { "data": [1, 1, 1, 1, 2, 2, 2, 2], "dims": [1, 2, 4], "type": "float32" }, { "data": [1, 2, 3, 4, 5, 6, 7, 8], "dims": [1, 2, 4], "type": "float32" } ], "outputs": [ { "data": [ 4.571832656860352, 5.571832656860352, 6.971858501434326, 7.971858501434326, 4.998325824737549, 5.998325824737549, 6.999900817871094, 7.999900817871094 ], "dims": [1, 2, 4], "type": "float32" } ] } ] }, { "name": "MultiHeadAttention Basic with bias", "operator": "MultiHeadAttention", "opset": { "domain": "com.microsoft", "version": 1 }, "attributes": [{ "name": "num_heads", "data": 2, "type": "int" }], "cases": [ { "name": "T[0]", "inputs": [ { "data": [1, 2, 3, 4, 5, 6, 7, 8], "dims": [1, 2, 4], "type": "float32" }, { "data": [1, 1, 1, 1, 2, 2, 2, 2], "dims": [1, 2, 4], "type": "float32" }, { "data": [1, 2, 3, 4, 5, 6, 7, 8], "dims": [1, 2, 4], "type": "float32" }, { "data": [1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4], "dims": [12], "type": "float32" } ], "outputs": [ { "data": [ 5.943336009979248, 7.94333553314209, 9.999799728393555, 11.999798774719238, 5.9997992515563965, 7.9997992515563965, 10, 11.999999046325684 ], "dims": [1, 2, 4], "type": "float32" } ] } ] }, { "name": "MultiHeadAttention two heads", "operator": "MultiHeadAttention", "opset": { "domain": "com.microsoft", "version": 1 }, "attributes": [{ "name": "num_heads", "data": 2, "type": "int" }], "cases": [ { "name": "T[0]", "inputs": [ { "data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "dims": [1, 2, 8], "type": "float32" }, { "data": [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4], "dims": [1, 2, 8], "type": "float32" }, { "data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "dims": [1, 2, 8], "type": "float32" } ], "outputs": [ { "data": [ 8.99963665008545, 9.99963665008545, 10.99963665008545, 11.999635696411133, 13, 14, 15, 16, 9, 10, 11, 12, 13, 14, 15, 16 ], "dims": [1, 2, 8], "type": "float32" } ] } ] }, { "name": "MultiHeadAttention two heads", "operator": "MultiHeadAttention", "opset": { "domain": "com.microsoft", "version": 1 }, "attributes": [{ "name": "num_heads", "data": 2, "type": "int" }], "cases": [ { "name": "T[1]", "inputs": [ { "data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "dims": [1, 2, 8], "type": "float32" }, { "data": [1, 1, 1, 1, 2, 2, 2, 2], "dims": [1, 1, 8], "type": "float32" }, { "data": [1, 2, 3, 4, 5, 6, 7, 8], "dims": [1, 1, 8], "type": "float32" } ], "outputs": [ { "data": [1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8], "dims": [1, 2, 8], "type": "float32" } ] } ] }, { "name": "MultiHeadAttention Basic, one head and head-size=1 with pastKey and pastValue", "operator": "MultiHeadAttention", "opset": { "domain": "com.microsoft", "version": 1 }, "attributes": [{ "name": "num_heads", "data": 1, "type": "int" }], "cases": [ { "name": "T[0]", "inputs": [ // Q { "data": [1], "dims": [1, 1, 1], "type": "float32" }, // K { "data": [2], "dims": [1, 1, 1], "type": "float32" }, // V { "data": [3], "dims": [1, 1, 1], "type": "float32" }, // Bias { "data": null, "type": "float32" }, // Mask { "data": null, "type": "int32" }, // AttentionBias { "data": null, "type": "float32" }, // PastKey { "data": [4], "dims": [1, 1, 1, 1], "type": "float32" }, // PastValue { "data": [5], "dims": [1, 1, 1, 1], "type": "float32" } ], "outputs": [ { "data": [3], "dims": [1, 1, 1], "type": "float32" } ] } ] }, { "name": "MultiHeadAttention Basic, one head and head-size=4 with pastKey and pastValue", "operator": "MultiHeadAttention", "opset": { "domain": "com.microsoft", "version": 1 }, "attributes": [{ "name": "num_heads", "data": 1, "type": "int" }], "cases": [ { "name": "T[0]", "inputs": [ // Q { "data": [1, 2, 3, 4], "dims": [1, 1, 4], "type": "float32" }, // K { "data": [5, 6, 7, 8], "dims": [1, 1, 4], "type": "float32" }, // V { "data": [9, 10, 11, 12], "dims": [1, 1, 4], "type": "float32" }, // Bias { "data": null, "type": "float32" }, // Mask { "data": null, "type": "int32" }, // AttentionBias { "data": null, "type": "float32" }, // PastKey { "data": [13, 14, 15, 16], "dims": [1, 1, 1, 4], "type": "float32" }, // PastValue { "data": [17, 18, 19, 20], "dims": [1, 1, 1, 4], "type": "float32" } ], "outputs": [ { "data": [9, 10, 11, 12], "dims": [1, 1, 4], "type": "float32" } ] } ] }, { "name": "MultiHeadAttention Basic, one head and head-size=1 with optional AttentionBias, pastKey, pastValue inputs and optional presentKey, presentValue outputs", "operator": "MultiHeadAttention", "opset": { "domain": "com.microsoft", "version": 1 }, "attributes": [{ "name": "num_heads", "data": 1, "type": "int" }], "cases": [ { "name": "T[0]", "inputs": [ // Q { "data": [1], "dims": [1, 1, 1], "type": "float32" }, // K { "data": [2], "dims": [1, 1, 1], "type": "float32" }, // V { "data": [3], "dims": [1, 1, 1], "type": "float32" }, // Bias { "data": null, "type": "float32" }, // Mask { "data": null, "type": "int32" }, // AttentionBias { "data": null, "type": "float32" }, // PastKey { "data": [4], "dims": [1, 1, 1, 1], "type": "float32" }, // PastValue { "data": [5], "dims": [1, 1, 1, 1], "type": "float32" } ], "outputs": [ { "data": [4.761593818664551], "dims": [1, 1, 1], "type": "float32" }, { "data": [4, 2], "dims": [1, 1, 2, 1], "type": "float32" }, { "data": [5, 3], "dims": [1, 1, 2, 1], "type": "float32" } ] } ] }, { "name": "MultiHeadAttention Basic, one head and head-size=4 with attentionBias, pastKey, pastValue inputs and optional presentKey, presentValue outputs", "operator": "MultiHeadAttention", "opset": { "domain": "com.microsoft", "version": 1 }, "attributes": [{ "name": "num_heads", "data": 1, "type": "int" }], "cases": [ { "name": "T[0]", "inputs": [ // Q { "data": [1, 2, 3, 4], "dims": [1, 1, 4], "type": "float32" }, // K { "data": [5, 6, 7, 8], "dims": [1, 1, 4], "type": "float32" }, // V { "data": [9, 10, 11, 12], "dims": [1, 1, 4], "type": "float32" }, // Bias { "data": null, "type": "float32" }, // Mask { "data": null, "type": "int32" }, // AttentionBias { "data": null, "type": "float32" }, // Past Key { "data": [13, 14, 15, 16], "dims": [1, 1, 1, 4], "type": "float32" }, // Past Value { "data": [17, 18, 19, 20], "dims": [1, 1, 1, 4], "type": "float32" } ], "outputs": [ { "data": [17, 18, 19, 20], "dims": [1, 1, 4], "type": "float32" }, // Present key { "data": [13, 14, 15, 16, 5, 6, 7, 8], "dims": [1, 1, 2, 4], "type": "float32" }, // Present value { "data": [17, 18, 19, 20, 9, 10, 11, 12], "dims": [1, 1, 2, 4], "type": "float32" } ] } ] }, { "name": "MultiHeadAttention Basic, one head and head-size=1 with attentionBias, pastKey and pastValue", "operator": "MultiHeadAttention", "opset": { "domain": "com.microsoft", "version": 1 }, "attributes": [{ "name": "num_heads", "data": 1, "type": "int" }], "cases": [ { "name": "T[0]", "inputs": [ // Q { "data": [1], "dims": [1, 1, 1], "type": "float32" }, // K { "data": [2], "dims": [1, 1, 1], "type": "float32" }, // V { "data": [3], "dims": [1, 1, 1], "type": "float32" }, // Bias { "data": null, "type": "float32" }, // Mask { "data": null, "type": "int32" }, // AttentionBias { "data": [10, 20], "dims": [1, 1, 1, 2], "type": "float32" }, // PastKey { "data": [4], "dims": [1, 1, 1, 1], "type": "float32" }, // PastValue { "data": [5], "dims": [1, 1, 1, 1], "type": "float32" } ], "outputs": [ { "data": [3], "dims": [1, 1, 1], "type": "float32" } ] } ] }, { "name": "MultiHeadAttention Basic, one head and head-size=4 with attentionBias, and pastValue", "operator": "MultiHeadAttention", "opset": { "domain": "com.microsoft", "version": 1 }, "attributes": [{ "name": "num_heads", "data": 1, "type": "int" }], "cases": [ { "name": "T[0]", "inputs": [ // Q { "data": [1, 2, 3, 4], "dims": [1, 1, 4], "type": "float32" }, // K { "data": [5, 6, 7, 8], "dims": [1, 1, 4], "type": "float32" }, // V { "data": [9, 10, 11, 12], "dims": [1, 1, 4], "type": "float32" }, // Bias { "data": null, "type": "float32" }, // Mask { "data": null, "type": "int32" }, // AttentionBias { "data": [100, 200], "dims": [1, 1, 1, 2], "type": "float32" }, // PastKey { "data": [13, 14, 15, 16], "dims": [1, 1, 1, 4], "type": "float32" }, // PastValue { "data": [17, 18, 19, 20], "dims": [1, 1, 1, 4], "type": "float32" } ], "outputs": [ { "data": [9, 10, 11, 12], "dims": [1, 1, 4], "type": "float32" } ] } ] }, { "name": "MultiHeadAttention Basic, 4 heads and head-size=1 with pastKey, pastValue, presentKey and presentValue", "operator": "MultiHeadAttention", "opset": { "domain": "com.microsoft", "version": 1 }, "attributes": [{ "name": "num_heads", "data": 4, "type": "int" }], "cases": [ { "name": "T[0]", "inputs": [ // Q { "data": [1, 2, 3, 4], "dims": [1, 1, 4], "type": "float32" }, // K { "data": [5, 6, 7, 8], "dims": [1, 1, 4], "type": "float32" }, // V { "data": [9, 10, 11, 12], "dims": [1, 1, 4], "type": "float32" }, // Bias { "data": null, "type": "float32" }, // Mask { "data": null, "type": "int32" }, // AttentionBias { "data": null, "type": "float32" }, // PastKey { "data": [13, 14, 15, 16], "dims": [1, 4, 1, 1], "type": "float32" }, // PastValue { "data": [17, 18, 19, 20], "dims": [1, 4, 1, 1], "type": "float32" } ], "outputs": [ { "data": [16.997316360473633, 18, 19, 20], "dims": [1, 1, 4], "type": "float32" }, { "data": [13, 5, 14, 6, 15, 7, 16, 8], "dims": [1, 4, 2, 1], "type": "float32" }, { "data": [17, 9, 18, 10, 19, 11, 20, 12], "dims": [1, 4, 2, 1], "type": "float32" } ] } ] }, { "name": "MultiHeadAttention Basic, 4 heads and head-size=4 with pastKey and pastValue", "operator": "MultiHeadAttention", "opset": { "domain": "com.microsoft", "version": 1 }, "attributes": [{ "name": "num_heads", "data": 4, "type": "int" }], "cases": [ { "name": "T[0]", "inputs": [ // Q { "data": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], "dims": [1, 1, 16], "type": "float32" }, // K { "data": [16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1], "dims": [1, 1, 16], "type": "float32" }, // V { "data": [2, 4, 8, 16, 1, 3, 9, 27, 1, 2, 4, 8, 16, 32, 64, 128], "dims": [1, 1, 16], "type": "float32" }, // Bias { "data": null, "type": "float32" }, // Mask { "data": null, "type": "int32" }, // AttentionBias { "data": null, "type": "float32" }, // Past Key { "data": [13, 14, 15, 16, 5, 6, 7, 8, 1, 2, 3, 4, 9, 10, 11, 12], "dims": [1, 4, 1, 4], "type": "float32" }, // Past Value { "data": [17, 18, 19, 20, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8], "dims": [1, 4, 1, 4], "type": "float32" } ], "outputs": [ { "data": [ 16.899608612060547, 17.906301498413086, 18.926380157470703, 19.973230361938477, 1, 3, 9, 27, 1, 2, 4, 8, 5, 6, 7, 8 ], "dims": [1, 1, 16], "type": "float32" }, // Present key { "data": [ 13, 14, 15, 16, 16, 15, 14, 13, 5, 6, 7, 8, 12, 11, 10, 9, 1, 2, 3, 4, 8, 7, 6, 5, 9, 10, 11, 12, 4, 3, 2, 1 ], "dims": [1, 4, 2, 4], "type": "float32" }, // Present value { "data": [ 17, 18, 19, 20, 2, 4, 8, 16, 9, 10, 11, 12, 1, 3, 9, 27, 1, 2, 3, 4, 1, 2, 4, 8, 5, 6, 7, 8, 16, 32, 64, 128 ], "dims": [1, 4, 2, 4], "type": "float32" } ] } ] }, { "name": "MultiHeadAttention Basic, one head and head-size one with attentionBias, pastKey, pastValue, presentKey and presentValue", "operator": "MultiHeadAttention", "opset": { "domain": "com.microsoft", "version": 1 }, "attributes": [{ "name": "num_heads", "data": 1, "type": "int" }], "cases": [ { "name": "T[0]", "inputs": [ // Q { "data": [1.0], "dims": [1, 1, 1], "type": "float32" }, // K { "data": [2.0], "dims": [1, 1, 1], "type": "float32" }, // V { "data": [3.0], "dims": [1, 1, 1], "type": "float32" }, // Bias { "data": null, "type": "float32" }, // Mask { "data": null, "type": "int32" }, // AttentionBias { "data": [10, 20], "dims": [1, 1, 1, 2], "type": "float32" }, // PastKey { "data": [4.0], "dims": [1, 1, 1, 1], "type": "float32" }, // PastValue { "data": [5.0], "dims": [1, 1, 1, 1], "type": "float32" } ], "outputs": [ { "data": [3.0006706714630127], "dims": [1, 1, 1], "type": "float32" }, { "data": [4, 2], "dims": [1, 1, 2, 1], "type": "float32" }, { "data": [5, 3], "dims": [1, 1, 2, 1], "type": "float32" } ] } ] }, { "name": "MultiHeadAttention Basic, one head and head-size=4 with attentionBias, PastKey, PastValue inputs and PresentKey and PresentValue outputs", "operator": "MultiHeadAttention", "opset": { "domain": "com.microsoft", "version": 1 }, "attributes": [{ "name": "num_heads", "data": 1, "type": "int" }], "cases": [ { "name": "T[0]", "inputs": [ // Q { "data": [1, 2, 3, 4], "dims": [1, 1, 4], "type": "float32" }, // K { "data": [5, 6, 7, 8], "dims": [1, 1, 4], "type": "float32" }, // V { "data": [9, 10, 11, 12], "dims": [1, 1, 4], "type": "float32" }, // Bias { "data": null, "type": "float32" }, // Mask { "data": null, "type": "int32" }, // AttentionBias { "data": [100, 200], "dims": [1, 1, 1, 2], "type": "float32" }, // Past Key { "data": [13, 14, 15, 16], "dims": [1, 1, 1, 4], "type": "float32" }, // Past Value { "data": [17, 18, 19, 20], "dims": [1, 1, 1, 4], "type": "float32" } ], "outputs": [ { "data": [9, 10, 11, 12], "dims": [1, 1, 4], "type": "float32" }, // Present key { "data": [13, 14, 15, 16, 5, 6, 7, 8], "dims": [1, 1, 2, 4], "type": "float32" }, // Present value { "data": [17, 18, 19, 20, 9, 10, 11, 12], "dims": [1, 1, 2, 4], "type": "float32" } ] } ] }, { "name": "MultiHeadAttention Basic, one head and head-size one with pastKey and pastValue; kvBNSH (4-dim Key and Value, 3-dim Q)", "operator": "MultiHeadAttention", "opset": { "domain": "com.microsoft", "version": 1 }, "attributes": [{ "name": "num_heads", "data": 1, "type": "int" }], "cases": [ { "name": "T[0]", "inputs": [ // Q { "data": [1.0], "dims": [1, 1, 1], "type": "float32" }, // K { "data": [2.0], "dims": [1, 1, 1, 1], "type": "float32" }, // V { "data": [3.0], "dims": [1, 1, 1, 1], "type": "float32" }, // Bias { "data": null, "type": "float32" }, // Mask { "data": null, "type": "int32" }, // AttentionBias { "data": [10, 20], "dims": [1, 1, 1, 2], "type": "float32" }, // PastKey { "data": [4.0], "dims": [1, 1, 1, 1], "type": "float32" }, // PastValue { "data": [5.0], "dims": [1, 1, 1, 1], "type": "float32" } ], "outputs": [ { "data": [3], "dims": [1, 1, 1], "type": "float32" }, { "data": [4, 2], "dims": [1, 1, 2, 1], "type": "float32" }, { "data": [5, 3], "dims": [1, 1, 2, 1], "type": "float32" } ] } ] }, { "name": "MultiHeadAttention Basic, one head and head-size 4 with pastKey and pastValue; Key and Value 4-dims", "operator": "MultiHeadAttention", "opset": { "domain": "com.microsoft", "version": 1 }, "attributes": [{ "name": "num_heads", "data": 1, "type": "int" }], "cases": [ { "name": "T[0]", "inputs": [ // Q { "data": [1, 2, 3, 4], "dims": [1, 1, 4], "type": "float32" }, // K { "data": [5, 6, 7, 8], "dims": [1, 1, 1, 4], "type": "float32" }, // V { "data": [9, 10, 11, 12], "dims": [1, 1, 1, 4], "type": "float32" }, // Bias { "data": null, "type": "float32" }, // Mask { "data": null, "type": "int32" }, // AttentionBias { "data": [50, 100], "dims": [1, 1, 1, 2], "type": "float32" }, // PastKey { "data": [13, 14, 15, 16], "dims": [1, 1, 1, 4], "type": "float32" }, // PastValue { "data": [17, 18, 19, 20], "dims": [1, 1, 1, 4], "type": "float32" } ], "outputs": [ { "data": [9, 10, 11, 12], "dims": [1, 1, 4], "type": "float32" }, // Present key { "data": [13, 14, 15, 16, 5, 6, 7, 8], "dims": [1, 1, 2, 4], "type": "float32" }, // Present value { "data": [17, 18, 19, 20, 9, 10, 11, 12], "dims": [1, 1, 2, 4], "type": "float32" } ] } ] }, { "name": "MultiHeadAttention Basic, one head and head-size=1 with empty pastKey, pastValue inputs and optional presentKey, presentValue outputs", "operator": "MultiHeadAttention", "opset": { "domain": "com.microsoft", "version": 1 }, "attributes": [{ "name": "num_heads", "data": 1, "type": "int" }], "cases": [ { "name": "T[0]", "inputs": [ // Q { "data": [1], "dims": [1, 1, 1], "type": "float32" }, // K { "data": [2], "dims": [1, 1, 1], "type": "float32" }, // V { "data": [3], "dims": [1, 1, 1], "type": "float32" }, // Bias { "data": null, "type": "float32" }, // Mask { "data": null, "type": "int32" }, // AttentionBias { "data": null, "type": "float32" }, // PastKey { "data": [], "dims": [1, 1, 0, 1], "type": "float32" }, // PastValue { "data": [], "dims": [1, 1, 0, 1], "type": "float32" } ], "outputs": [ { "data": [3], "dims": [1, 1, 1], "type": "float32" }, { "data": [2], "dims": [1, 1, 1, 1], "type": "float32" }, { "data": [3], "dims": [1, 1, 1, 1], "type": "float32" } ] } ] } ]