FiveTech Software tech support forums

by **Antonio Linares** » Mon Apr 17, 2023 1:38 pm

This code was mostly written by GPT4, I just supervised errors and tried to make it work.

I find it quite interesting for educational purposes, getting an idea about how transformers (chatGPT) work:

attention.prg

Code: Select all Expand view: #define MIN_DOUBLE Val( "-1.7976931348623158e+308" ) PROCEDURE Main() LOCAL aTokenIds, aEmbeddings, aInputEmbeddings, aPositionalEncoding, aEncodedEmbeddings, aWQ, aWK, aWV, aSelfAttentionOutput, nHeads, nDim, i nDim := 4 // Set the number of dimensions for the embeddings, weight matrices, etc. aTokenIds := {1, 5, 8, 2} // Example token IDs aEmbeddings := InitializeEmbeddings(10, nDim) // Initialize embeddings for 10 tokens, each with nDim dimensions aInputEmbeddings := GetInputEmbeddings(aTokenIds, aEmbeddings) aPositionalEncoding := GeneratePositionalEncoding(LEN(aTokenIds), nDim) aEncodedEmbeddings := AddPositionalEncoding(aInputEmbeddings, aPositionalEncoding) // Initialize the weight matrices for Query (aWQ), Key (aWK), and Value (aWV) for each head nHeads := 4 aWQ := Array(nHeads) aWK := Array(nHeads) aWV := Array(nHeads) FOR i := 1 TO nHeads aWQ[i] := InitializeEmbeddings(nDim, nDim) aWK[i] := InitializeEmbeddings(nDim, nDim) aWV[i] := InitializeEmbeddings(nDim, nDim) NEXT aSelfAttentionOutput := MultiHeadSelfAttention(aEncodedEmbeddings, aWQ, aWK, aWV, nHeads, nDim) ? "Input Token IDs:", aTokenIds ? "Input Embeddings:", aInputEmbeddings ? "Positional Encoding:", aPositionalEncoding ? "Encoded Embeddings:", aEncodedEmbeddings ? "Multi-Head Self Attention Output:", aSelfAttentionOutput RETURN FUNCTION InitializeEmbeddings(nTokens, nDimensions) LOCAL aEmbeddings, nIndex, nDim aEmbeddings := Array( nTokens, nDimensions ) FOR nIndex := 1 TO nTokens FOR nDim := 1 TO nDimensions aEmbeddings[nIndex][nDim] := (HB_Random() - 0.5) * 2 // Random number between -1 and 1 NEXT NEXT RETURN aEmbeddings FUNCTION GetInputEmbeddings(aTokenIds, aEmbeddings) LOCAL aInputEmbeddings, nIndex aInputEmbeddings := Array( LEN(aTokenIds) ) FOR nIndex := 1 TO LEN(aTokenIds) aInputEmbeddings[nIndex] := aEmbeddings[aTokenIds[nIndex]] NEXT RETURN aInputEmbeddings FUNCTION GeneratePositionalEncoding(nSequenceLength, nDimensions) LOCAL aPositionalEncoding, nIndex, nDim, nPos, nDivTerm aPositionalEncoding := Array( nSequenceLength, nDimensions ) FOR nIndex := 1 TO nSequenceLength nPos := nIndex - 1 FOR nDim := 1 TO nDimensions nDivTerm := 10000 ^ ((nDim - 1) / nDimensions) IF Mod(nDim, 2) == 1 aPositionalEncoding[nIndex][nDim] := Sin(nPos / nDivTerm) ELSE aPositionalEncoding[nIndex][nDim] := Cos(nPos / nDivTerm) ENDIF NEXT NEXT RETURN aPositionalEncoding FUNCTION AddPositionalEncoding(aInputEmbeddings, aPositionalEncoding) LOCAL nIndex, nDim, aEncodedEmbeddings aEncodedEmbeddings := Array(LEN(aInputEmbeddings)) FOR nIndex := 1 TO LEN(aInputEmbeddings) aEncodedEmbeddings[nIndex] := Array(LEN(aInputEmbeddings[nIndex])) FOR nDim := 1 TO LEN(aInputEmbeddings[nIndex]) aEncodedEmbeddings[nIndex][nDim] := aInputEmbeddings[nIndex][nDim] + aPositionalEncoding[nIndex][nDim] NEXT NEXT RETURN aEncodedEmbeddings FUNCTION MultiHeadSelfAttention(aEncodedEmbeddings, aWQ, aWK, aWV, nHeads, nDim ) LOCAL aHeadOutputs, aConcatenatedHeads, nIndex aHeadOutputs := Array(nHeads) // Create an array to store the output of each attention head FOR i := 1 TO nHeads aHeadOutputs[i] := SelfAttention(aEncodedEmbeddings, aWQ[i], aWK[i], aWV[i], nDim, nHeads) NEXT // Concatenate the outputs of all heads aConcatenatedHeads := ConcatenateHeads(aHeadOutputs) RETURN aConcatenatedHeads FUNCTION SelfAttention(aEncodedEmbeddings, aWQ, aWK, aWV, nDim, nHeads) LOCAL aQ, aK, aV, aScores, aSoftmaxScores, aAttentionOutput // Reshape the weight matrices aWQ, aWK, and aWV aWQ := Reshape(aWQ, nDim, nHeads) aWK := Reshape(aWK, nDim, nHeads) aWV := Reshape(aWV, nDim, nHeads) // Compute the Query (Q), Key (K), and Value (V) matrices aQ := MultiplyMatrix(aEncodedEmbeddings, aWQ) aK := MultiplyMatrix(aEncodedEmbeddings, aWK) aV := MultiplyMatrix(aEncodedEmbeddings, aWV) // Transpose the Key (K) matrix aK := TransposeMatrix(aK) // Compute the attention scores aScores := MultiplyMatrix(aQ, aK) // Apply the softmax function to the scores aSoftmaxScores := Softmax(aScores) // Compute the attention output aAttentionOutput := MultiplyMatrix(aSoftmaxScores, aV) RETURN aAttentionOutput FUNCTION SplitMatrix(aMatrix, nSplitSize) LOCAL nRows, nCols, nSplits, aSplitMatrices, i, j, k, aSplitMatrix nRows := LEN(aMatrix) nCols := LEN(aMatrix[1]) nSplits := INT(nCols / nSplitSize) // Calculate the number of splits // Initialize the array of split matrices aSplitMatrices := Array(nSplits) // Split the matrix into multiple matrices of size nSplitSize FOR i := 1 TO nSplits aSplitMatrix := Array(nRows) FOR j := 1 TO nRows aSplitMatrix[j] := Array(nSplitSize) FOR k := 1 TO nSplitSize aSplitMatrix[j][k] := aMatrix[j][(i - 1) * nSplitSize + k] NEXT NEXT aSplitMatrices[i] := aSplitMatrix NEXT RETURN aSplitMatrices FUNCTION ConcatenateHeads(aMultiHeadOutput) LOCAL nIndex, nHead, aConcatenated aConcatenated := aMultiHeadOutput[1] FOR nHead := 2 TO LEN(aMultiHeadOutput) aConcatenated := ConcatenateArrays(aConcatenated, aMultiHeadOutput[nHead]) NEXT RETURN aConcatenated FUNCTION ConcatenateArrays(aArray1, aArray2) LOCAL aResult, nIndex aResult := Array(LEN(aArray1) + LEN(aArray2)) FOR nIndex := 1 TO LEN(aArray1) aResult[nIndex] := aArray1[nIndex] NEXT FOR nIndex := 1 TO LEN(aArray2) aResult[LEN(aArray1) + nIndex] := aArray2[nIndex] NEXT RETURN aResult function MultiplyMatrix(m1, m2) local nRows1, nCols1, nRows2, nCols2, i, j, k local result := {} nRows1 := Len(m1) nCols1 := Len(m1[1]) nRows2 := Len(m2) nCols2 := Len(m2[1]) // Initialize result matrix with correct dimensions result := Array(nRows1, nCols2 ) // Calculate result matrix for i := 1 to nRows1 for j := 1 to nCols2 for k := 1 to nCols1 result[i][j] = m1[i][k] * m2[k][j] next next next return result FUNCTION TransposeMatrix(aMatrix) LOCAL nRows, nCols, aResult, i, j nRows := LEN(aMatrix) nCols := LEN(aMatrix[1]) aResult := {} FOR i := 1 TO nCols AAdd(aResult, {}) FOR j := 1 TO nRows AAdd(aResult[i], aMatrix[j][i]) NEXT NEXT RETURN aResult FUNCTION NormalizeAndSoftmax(aMatrix) LOCAL nRows, nCols, aResult, i, j, nMax, nSum nRows := LEN(aMatrix) nCols := LEN(aMatrix[1]) aResult := {} FOR i := 1 TO nRows AAdd(aResult, {}) nMax := MaxElem(aMatrix[i]) nSum := 0 FOR j := 1 TO nCols aMatrix[i][j] := Exp(aMatrix[i][j] - nMax) nSum += aMatrix[i][j] NEXT FOR j := 1 TO nCols AAdd(aResult[i], aMatrix[i][j] / nSum) NEXT NEXT RETURN aResult FUNCTION MaxElem(aArray) LOCAL nMax, nElem nMax := aArray[1] FOR EACH nElem IN aArray IF nElem > nMax nMax := nElem ENDIF NEXT RETURN nMax FUNCTION ScaledDotProductAttention(aQuery, aKey, aValue) LOCAL nHeads, nBatchSize, nSeqLength, nDimPerHead, aQueryTranspose, aDotProduct, aAttentionScores, aAttentionScoresTranspose, aSoftmaxWeights, aMultiplied, aMultipliedTranspose nHeads := LEN(aQuery) nBatchSize := LEN(aQuery[1]) nSeqLength := LEN(aQuery[1][1]) nDimPerHead := LEN(aQuery[1][1]) // Compute the dot product of the Query and Key matrices aQueryTranspose := TransposeMatrix(aQuery) aDotProduct := MultiplyMatrix(aQueryTranspose, aKey) // Scale the dot product by the square root of the number of dimensions per head aScaledDotProduct := aDotProduct / SQRT(nDimPerHead) // Compute the softmax weights for the attention scores aAttentionScores := TransposeMatrix(aScaledDotProduct) aAttentionScoresTranspose := NormalizeAndSoftmax(aAttentionScores) // Compute the matrix multiplication of the softmax weights and the Value matrix aSoftmaxWeights := TransposeMatrix(aAttentionScoresTranspose) aMultiplied := MultiplyMatrix(aSoftmaxWeights, aValue) aMultipliedTranspose := TransposeMatrix(aMultiplied) // Reshape the output to match the input shape RETURN aMultipliedTranspose FUNCTION ConcatenateMatrices(aMatrices) LOCAL nSplits, nBatchSize, nSeqLength, nDim, aResultMatrix, i, j, k, nOffset, aSubMatrix nSplits := LEN(aMatrices) nBatchSize := LEN(aMatrices[1]) nSeqLength := LEN(aMatrices[1][1]) nDim := 0 // Calculate the total number of dimensions across all splits FOR i := 1 TO nSplits nDim += LEN(aMatrices[i][1][1]) NEXT // Initialize the result matrix with the correct shape aResultMatrix := Array(nBatchSize) FOR i := 1 TO nBatchSize aResultMatrix[i] := Array(nSeqLength) FOR j := 1 TO nSeqLength aResultMatrix[i][j] := Array(nDim) NEXT NEXT // Concatenate the splits along the dimension axis nOffset := 0 FOR i := 1 TO nSplits FOR j := 1 TO nBatchSize FOR k := 1 TO nSeqLength aSubMatrix := aMatrices[i][j][k] FOR n := 1 TO LEN(aSubMatrix) aResultMatrix[j][k][nOffset+n] := aSubMatrix[n] NEXT NEXT NEXT nOffset += LEN(aMatrices[i][1][1]) NEXT RETURN aResultMatrix FUNCTION Reshape(aMatrix, nRows, nCols) LOCAL aResult, i, j, k, l aResult := Array(nRows) k := 1 l := 1 FOR i := 1 TO nRows aResult[i] := Array(nCols) FOR j := 1 TO nCols aResult[i][j] := aMatrix[k][l] l += 1 IF l > LEN(aMatrix[k]) l := 1 k += 1 ENDIF NEXT NEXT RETURN aResult FUNCTION Softmax(aMatrix) LOCAL aResult, nRows, nCols, i, j, nMax, nSum, nExp nRows := LEN(aMatrix) nCols := LEN(aMatrix[1]) aResult := Array(nRows) // Create an array to store the result FOR i := 1 TO nRows aResult[i] := Array(nCols) // Create an array for each row of the result nMax := MIN_DOUBLE // Initialize the maximum value for numerical stability FOR j := 1 TO nCols IF aMatrix[i][j] > nMax nMax := aMatrix[i][j] ENDIF NEXT nSum := 0 // Initialize the sum for the softmax function FOR j := 1 TO nCols nExp := EXP(aMatrix[i][j] - nMax) // Calculate the exponent for each entry aResult[i][j] := nExp nSum += nExp NEXT FOR j := 1 TO nCols aResult[i][j] /= nSum // Divide each entry by the sum NEXT NEXT RETURN aResult

by **Jimmy** » Mon Apr 17, 2023 2:06 pm

hi Antonio,

does the CODE really work :?:

i can compile it (with some Warning) but i got "no Display" (and no Error LOG)

by **Antonio Linares** » Mon Apr 17, 2023 3:57 pm

Providing it some sentences. Still needs training...

attention.prg

Code: Select all Expand view: #define MIN_DOUBLE Val( "-1.7976931348623158e+308" ) PROCEDURE Main() LOCAL aSentences, aVocabulary := {}, aTokenIds, aEmbeddings, aInputEmbeddings, aPositionalEncoding, aEncodedEmbeddings, aWQ, aWK, aWV, aSelfAttentionOutput, nHeads, nDim, i, aNextWordEmbedding, aOutput, aNextWordTokens, aNextWords aSentences := {; "The quick brown fox jumps over the lazy dog.",; "I have a pen. I have an apple. Apple pen.",; "A journey of a thousand miles begins with a single step."} aTokenIds := SentencesToTokenIds( aSentences, @aVocabulary ) nDim := 4 // Set the number of dimensions for the embeddings, weight matrices, etc. aEmbeddings := InitializeEmbeddings( Len( aTokenIds ), nDim) // Initialize embeddings for 10 tokens, each with nDim dimensions aInputEmbeddings := GetInputEmbeddings(aTokenIds, aEmbeddings) aPositionalEncoding := GeneratePositionalEncoding(LEN(aTokenIds), nDim) aEncodedEmbeddings := AddPositionalEncoding(aInputEmbeddings, aPositionalEncoding) // Initialize the weight matrices for Query (aWQ), Key (aWK), and Value (aWV) for each head nHeads := 4 aWQ := Array(nHeads) aWK := Array(nHeads) aWV := Array(nHeads) FOR i := 1 TO nHeads aWQ[i] := InitializeEmbeddings(nDim, nDim) aWK[i] := InitializeEmbeddings(nDim, nDim) aWV[i] := InitializeEmbeddings(nDim, nDim) NEXT aSelfAttentionOutput := MultiHeadSelfAttention(aEncodedEmbeddings, aWQ, aWK, aWV, nHeads, nDim) aNextWordEmbedding := InitializeEmbeddings(nDim, nDim) // Initialize output weight matrix for the next word prediction aOutput := GetNextWordPrediction(aSelfAttentionOutput, aNextWordEmbedding) aNextWordTokens := GetTokensFromEmbeddings(aOutput, aEmbeddings) aNextWords := TokensToWords( aNextWordTokens, aVocabulary ) // ? "Input Token IDs:", aTokenIds // ? "Input Embeddings:", aInputEmbeddings // ? "Positional Encoding:", aPositionalEncoding // ? "Encoded Embeddings:", aEncodedEmbeddings // ? "Multi-Head Self Attention Output:", aSelfAttentionOutput ? "Next Word Prediction:", aNextWords RETURN FUNCTION TokensToWords(aTokenIds, aVocabulary) LOCAL aReversedVocabulary, aWords, nTokenId, cWord aWords := {} FOR EACH nTokenId IN aTokenIds if nTokenId <= Len( aVocabulary ) cWord := aVocabulary[ nTokenId ] AAdd(aWords, cWord) endif NEXT RETURN aWords FUNCTION ReverseVocabulary(aVocabulary) LOCAL aReversedVocabulary, nTokenId, cWord aReversedVocabulary := Array( Len( aVocabulary ) ) FOR EACH cWord IN aVocabulary nTokenId := AScan( aVocabulary, cWord ) aReversedVocabulary[nTokenId] := cWord NEXT RETURN aReversedVocabulary FUNCTION GetNextWordPrediction(aSelfAttentionOutput, aNextWordEmbedding) LOCAL aOutput, nRow, nCol, nSum aOutput := Array(LEN(aSelfAttentionOutput)) FOR nRow := 1 TO LEN(aSelfAttentionOutput) aOutput[nRow] := Array(LEN(aNextWordEmbedding)) FOR nCol := 1 TO LEN(aNextWordEmbedding) nSum := 0 FOR i := 1 TO LEN(aSelfAttentionOutput[nRow]) nSum := nSum + aSelfAttentionOutput[nRow][i] * aNextWordEmbedding[i][nCol] NEXT aOutput[nRow][nCol] := nSum NEXT NEXT RETURN aOutput FUNCTION SentencesToTokenIds(aSentences, aVocabulary) LOCAL aTokenIds, aSentence, aTokenId, nToken, cWord, aWords aTokenIds := {} FOR EACH aSentence IN aSentences aWords = hb_ATokens( aSentence ) for each cWord in aWords if AScan( aVocabulary, cWord ) == 0 AAdd( aVocabulary, cWord ) nToken = Len( aVocabulary ) else nToken = AScan( aVocabulary, cWord ) endif AAdd( aTokenIds, nToken ) NEXT NEXT RETURN aTokenIds FUNCTION GetTokensFromEmbeddings(aOutputEmbeddings, aEmbeddings) LOCAL aTokenIds, nIndex, nRow aTokenIds := Array(LEN(aOutputEmbeddings)) FOR nRow := 1 TO LEN(aOutputEmbeddings) nIndex := FindClosestEmbedding(aOutputEmbeddings[nRow], aEmbeddings) aTokenIds[nRow] := nIndex NEXT RETURN aTokenIds FUNCTION FindClosestEmbedding(aTargetEmbedding, aEmbeddings) LOCAL nMinDistance, nDistance, nClosestIndex, nRow nMinDistance := Val( "1e99" ) nClosestIndex := -1 FOR nRow := 1 TO LEN(aEmbeddings) nDistance := CalculateDistance(aTargetEmbedding, aEmbeddings[nRow]) IF nDistance < nMinDistance nMinDistance := nDistance nClosestIndex := nRow ENDIF NEXT RETURN nClosestIndex FUNCTION CalculateDistance(aVector1, aVector2) LOCAL nSum, i nSum := 0 FOR i := 1 TO LEN(aVector1) nSum := nSum + (aVector1[i] - aVector2[i])^2 NEXT RETURN SQRT(nSum) FUNCTION InitializeEmbeddings(nTokens, nDimensions) LOCAL aEmbeddings, nIndex, nDim aEmbeddings := Array( nTokens, nDimensions ) FOR nIndex := 1 TO nTokens FOR nDim := 1 TO nDimensions aEmbeddings[nIndex][nDim] := (HB_Random() - 0.5) * 2 // Random number between -1 and 1 NEXT NEXT RETURN aEmbeddings FUNCTION GetInputEmbeddings(aTokenIds, aEmbeddings) LOCAL aInputEmbeddings, nIndex aInputEmbeddings := Array( LEN(aTokenIds) ) FOR nIndex := 1 TO LEN(aTokenIds) aInputEmbeddings[nIndex] := aEmbeddings[aTokenIds[nIndex]] NEXT RETURN aInputEmbeddings FUNCTION GeneratePositionalEncoding(nSequenceLength, nDimensions) LOCAL aPositionalEncoding, nIndex, nDim, nPos, nDivTerm aPositionalEncoding := Array( nSequenceLength, nDimensions ) FOR nIndex := 1 TO nSequenceLength nPos := nIndex - 1 FOR nDim := 1 TO nDimensions nDivTerm := 10000 ^ ((nDim - 1) / nDimensions) IF Mod(nDim, 2) == 1 aPositionalEncoding[nIndex][nDim] := Sin(nPos / nDivTerm) ELSE aPositionalEncoding[nIndex][nDim] := Cos(nPos / nDivTerm) ENDIF NEXT NEXT RETURN aPositionalEncoding FUNCTION AddPositionalEncoding(aInputEmbeddings, aPositionalEncoding) LOCAL nIndex, nDim, aEncodedEmbeddings aEncodedEmbeddings := Array(LEN(aInputEmbeddings)) FOR nIndex := 1 TO LEN(aInputEmbeddings) aEncodedEmbeddings[nIndex] := Array(LEN(aInputEmbeddings[nIndex])) FOR nDim := 1 TO LEN(aInputEmbeddings[nIndex]) aEncodedEmbeddings[nIndex][nDim] := aInputEmbeddings[nIndex][nDim] + aPositionalEncoding[nIndex][nDim] NEXT NEXT RETURN aEncodedEmbeddings FUNCTION MultiHeadSelfAttention(aEncodedEmbeddings, aWQ, aWK, aWV, nHeads, nDim ) LOCAL aHeadOutputs, aConcatenatedHeads, nIndex aHeadOutputs := Array(nHeads) // Create an array to store the output of each attention head FOR i := 1 TO nHeads aHeadOutputs[i] := SelfAttention(aEncodedEmbeddings, aWQ[i], aWK[i], aWV[i], nDim, nHeads) NEXT // Concatenate the outputs of all heads aConcatenatedHeads := ConcatenateHeads(aHeadOutputs) RETURN aConcatenatedHeads FUNCTION SelfAttention(aEncodedEmbeddings, aWQ, aWK, aWV, nDim, nHeads) LOCAL aQ, aK, aV, aScores, aSoftmaxScores, aAttentionOutput // Reshape the weight matrices aWQ, aWK, and aWV aWQ := Reshape(aWQ, nDim, nHeads) aWK := Reshape(aWK, nDim, nHeads) aWV := Reshape(aWV, nDim, nHeads) // Compute the Query (Q), Key (K), and Value (V) matrices aQ := MultiplyMatrix(aEncodedEmbeddings, aWQ) aK := MultiplyMatrix(aEncodedEmbeddings, aWK) aV := MultiplyMatrix(aEncodedEmbeddings, aWV) // Transpose the Key (K) matrix aK := TransposeMatrix(aK) // Compute the attention scores aScores := MultiplyMatrix(aQ, aK) // Apply the softmax function to the scores aSoftmaxScores := Softmax(aScores) // Compute the attention output aAttentionOutput := MultiplyMatrix(aSoftmaxScores, aV) RETURN aAttentionOutput FUNCTION SplitMatrix(aMatrix, nSplitSize) LOCAL nRows, nCols, nSplits, aSplitMatrices, i, j, k, aSplitMatrix nRows := LEN(aMatrix) nCols := LEN(aMatrix[1]) nSplits := INT(nCols / nSplitSize) // Calculate the number of splits // Initialize the array of split matrices aSplitMatrices := Array(nSplits) // Split the matrix into multiple matrices of size nSplitSize FOR i := 1 TO nSplits aSplitMatrix := Array(nRows) FOR j := 1 TO nRows aSplitMatrix[j] := Array(nSplitSize) FOR k := 1 TO nSplitSize aSplitMatrix[j][k] := aMatrix[j][(i - 1) * nSplitSize + k] NEXT NEXT aSplitMatrices[i] := aSplitMatrix NEXT RETURN aSplitMatrices FUNCTION ConcatenateHeads(aMultiHeadOutput) LOCAL nIndex, nHead, aConcatenated aConcatenated := aMultiHeadOutput[1] FOR nHead := 2 TO LEN(aMultiHeadOutput) aConcatenated := ConcatenateArrays(aConcatenated, aMultiHeadOutput[nHead]) NEXT RETURN aConcatenated FUNCTION ConcatenateArrays(aArray1, aArray2) LOCAL aResult, nIndex aResult := Array(LEN(aArray1) + LEN(aArray2)) FOR nIndex := 1 TO LEN(aArray1) aResult[nIndex] := aArray1[nIndex] NEXT FOR nIndex := 1 TO LEN(aArray2) aResult[LEN(aArray1) + nIndex] := aArray2[nIndex] NEXT RETURN aResult function MultiplyMatrix(m1, m2) local nRows1, nCols1, nRows2, nCols2, i, j, k local result := {} nRows1 := Len(m1) nCols1 := Len(m1[1]) nRows2 := Len(m2) nCols2 := Len(m2[1]) // Initialize result matrix with correct dimensions result := Array(nRows1, nCols2 ) // Calculate result matrix for i := 1 to nRows1 for j := 1 to nCols2 for k := 1 to nCols1 result[i][j] = m1[i][k] * m2[k][j] next next next return result FUNCTION TransposeMatrix(aMatrix) LOCAL nRows, nCols, aResult, i, j nRows := LEN(aMatrix) nCols := LEN(aMatrix[1]) aResult := {} FOR i := 1 TO nCols AAdd(aResult, {}) FOR j := 1 TO nRows AAdd(aResult[i], aMatrix[j][i]) NEXT NEXT RETURN aResult FUNCTION NormalizeAndSoftmax(aMatrix) LOCAL nRows, nCols, aResult, i, j, nMax, nSum nRows := LEN(aMatrix) nCols := LEN(aMatrix[1]) aResult := {} FOR i := 1 TO nRows AAdd(aResult, {}) nMax := MaxElem(aMatrix[i]) nSum := 0 FOR j := 1 TO nCols aMatrix[i][j] := Exp(aMatrix[i][j] - nMax) nSum += aMatrix[i][j] NEXT FOR j := 1 TO nCols AAdd(aResult[i], aMatrix[i][j] / nSum) NEXT NEXT RETURN aResult FUNCTION MaxElem(aArray) LOCAL nMax, nElem nMax := aArray[1] FOR EACH nElem IN aArray IF nElem > nMax nMax := nElem ENDIF NEXT RETURN nMax FUNCTION ScaledDotProductAttention(aQuery, aKey, aValue) LOCAL nHeads, nBatchSize, nSeqLength, nDimPerHead, aQueryTranspose, aDotProduct, aAttentionScores, aAttentionScoresTranspose, aSoftmaxWeights, aMultiplied, aMultipliedTranspose nHeads := LEN(aQuery) nBatchSize := LEN(aQuery[1]) nSeqLength := LEN(aQuery[1][1]) nDimPerHead := LEN(aQuery[1][1]) // Compute the dot product of the Query and Key matrices aQueryTranspose := TransposeMatrix(aQuery) aDotProduct := MultiplyMatrix(aQueryTranspose, aKey) // Scale the dot product by the square root of the number of dimensions per head aScaledDotProduct := aDotProduct / SQRT(nDimPerHead) // Compute the softmax weights for the attention scores aAttentionScores := TransposeMatrix(aScaledDotProduct) aAttentionScoresTranspose := NormalizeAndSoftmax(aAttentionScores) // Compute the matrix multiplication of the softmax weights and the Value matrix aSoftmaxWeights := TransposeMatrix(aAttentionScoresTranspose) aMultiplied := MultiplyMatrix(aSoftmaxWeights, aValue) aMultipliedTranspose := TransposeMatrix(aMultiplied) // Reshape the output to match the input shape RETURN aMultipliedTranspose FUNCTION ConcatenateMatrices(aMatrices) LOCAL nSplits, nBatchSize, nSeqLength, nDim, aResultMatrix, i, j, k, nOffset, aSubMatrix nSplits := LEN(aMatrices) nBatchSize := LEN(aMatrices[1]) nSeqLength := LEN(aMatrices[1][1]) nDim := 0 // Calculate the total number of dimensions across all splits FOR i := 1 TO nSplits nDim += LEN(aMatrices[i][1][1]) NEXT // Initialize the result matrix with the correct shape aResultMatrix := Array(nBatchSize) FOR i := 1 TO nBatchSize aResultMatrix[i] := Array(nSeqLength) FOR j := 1 TO nSeqLength aResultMatrix[i][j] := Array(nDim) NEXT NEXT // Concatenate the splits along the dimension axis nOffset := 0 FOR i := 1 TO nSplits FOR j := 1 TO nBatchSize FOR k := 1 TO nSeqLength aSubMatrix := aMatrices[i][j][k] FOR n := 1 TO LEN(aSubMatrix) aResultMatrix[j][k][nOffset+n] := aSubMatrix[n] NEXT NEXT NEXT nOffset += LEN(aMatrices[i][1][1]) NEXT RETURN aResultMatrix FUNCTION Reshape(aMatrix, nRows, nCols) LOCAL aResult, i, j, k, l aResult := Array(nRows) k := 1 l := 1 FOR i := 1 TO nRows aResult[i] := Array(nCols) FOR j := 1 TO nCols aResult[i][j] := aMatrix[k][l] l += 1 IF l > LEN(aMatrix[k]) l := 1 k += 1 ENDIF NEXT NEXT RETURN aResult FUNCTION Softmax(aMatrix) LOCAL aResult, nRows, nCols, i, j, nMax, nSum, nExp nRows := LEN(aMatrix) nCols := LEN(aMatrix[1]) aResult := Array(nRows) // Create an array to store the result FOR i := 1 TO nRows aResult[i] := Array(nCols) // Create an array for each row of the result nMax := MIN_DOUBLE // Initialize the maximum value for numerical stability FOR j := 1 TO nCols IF aMatrix[i][j] > nMax nMax := aMatrix[i][j] ENDIF NEXT nSum := 0 // Initialize the sum for the softmax function FOR j := 1 TO nCols nExp := EXP(aMatrix[i][j] - nMax) // Calculate the exponent for each entry aResult[i][j] := nExp nSum += nExp NEXT FOR j := 1 TO nCols aResult[i][j] /= nSum // Divide each entry by the sum NEXT NEXT RETURN aResult

by **Antonio Linares** » Mon Apr 17, 2023 3:58 pm

Dear Jimmy,

Please run it from here:
https://www.fivetechsoft.com/counter/modpro.php

I love this (mod_harbour) webapp for testing Harbour code and ideas quickly :-)

by **Jimmy** » Tue Apr 18, 2023 12:18 am

hi Antonio,

Antonio Linares wrote:Please run it from here:
https://www.fivetechsoft.com/counter/modpro.php
I love this (mod_harbour) webapp for testing Harbour code and ideas quickly

WOW
i never have see something like this

FiveTech Software tech support forums

Harbour's multi-head self-attention transformer

Harbour's multi-head self-attention transformer

Re: Harbour's multi-head self-attention transformer

Re: Harbour's multi-head self-attention transformer

Re: Harbour's multi-head self-attention transformer

Re: Harbour's multi-head self-attention transformer

Who is online