FUNCTION Main()
LOCAL aInput, aPositionalEncoding, aEncoderOutput
LOCAL nBatchSize := 2, nSeqLen := 5, nModelDim := 8, nHeads := 2
// Generar input de ejemplo
aInput := GenerateRandomMatrix(nBatchSize, nSeqLen, nModelDim)
// Generar codificación posicional
aPositionalEncoding := GeneratePositionalEncoding(nSeqLen, nModelDim)
// Añadir codificación posicional al input
aInput := AddPositionalEncoding(aInput, aPositionalEncoding)
// Crear y aplicar el encoder
aEncoderOutput := TransformerEncoder(aInput, nHeads, 2) // 2 capas de encoder
? "Input con codificación posicional:"
PrintMatrix(aInput)
? "Salida del Encoder:"
PrintMatrix(aEncoderOutput)
RETURN NIL
FUNCTION TransformerEncoder(aInput, nHeads, nLayers)
LOCAL aOutput := aInput
LOCAL i
FOR i := 1 TO nLayers
// Multi-Head Attention
aOutput := AddAndNorm(aOutput, MultiHeadAttention(aOutput, nHeads))
// Feed Forward
aOutput := AddAndNorm(aOutput, FeedForward(aOutput))
NEXT
RETURN aOutput
FUNCTION MultiHeadAttention(aInput, nHeads)
LOCAL aOutputs := {}, aFinalOutput, i
LOCAL nBatchSize := Len(aInput), nSeqLen := Len(aInput[1]), nModelDim := Len(aInput[1, 1])
LOCAL nHeadDim := Int(nModelDim / nHeads)
LOCAL aWq, aWk, aWv, aQ, aK, aV, aAttentionScores, aHeadOutput, aWo
FOR i := 1 TO nHeads
aWq := GenerateRandomMatrix(nModelDim, nHeadDim)
aWk := GenerateRandomMatrix(nModelDim, nHeadDim)
aWv := GenerateRandomMatrix(nModelDim, nHeadDim)
aQ := LinearTransformation(aInput, aWq)
aK := LinearTransformation(aInput, aWk)
aV := LinearTransformation(aInput, aWv)
aAttentionScores := CalculateAttentionScores(aQ, aK)
aHeadOutput := ApplyAttention(aAttentionScores, aV)
AAdd(aOutputs, aHeadOutput)
NEXT
aFinalOutput := ConcatenateOutputs(aOutputs)
aWo := GenerateRandomMatrix(nModelDim, nModelDim)
aFinalOutput := LinearTransformation(aFinalOutput, aWo)
RETURN aFinalOutput
FUNCTION FeedForward(aInput)
LOCAL nBatchSize := Len(aInput), nSeqLen := Len(aInput[1]), nModelDim := Len(aInput[1, 1])
LOCAL nFfDim := nModelDim * 4 // Típicamente, la dimensión interna es 4 veces la dimensión del modelo
LOCAL aW1 := GenerateRandomMatrix(nModelDim, nFfDim)
LOCAL aW2 := GenerateRandomMatrix(nFfDim, nModelDim)
LOCAL aHidden := LinearTransformation(aInput, aW1), aOutput
aHidden := ApplyReLU(aHidden)
aOutput := LinearTransformation(aHidden, aW2)
RETURN aOutput
FUNCTION AddAndNorm(aInput, aResidual)
LOCAL aSum := AddMatrices(aInput, aResidual)
LOCAL aNormalized := LayerNorm(aSum)
RETURN aNormalized
FUNCTION LayerNorm(aInput)
LOCAL nBatchSize := Len(aInput), nSeqLen := Len(aInput[1]), nModelDim := Len(aInput[1, 1])
LOCAL aOutput := Array(nBatchSize, nSeqLen, nModelDim)
LOCAL i, j, k, nMean, nVariance, nEpsilon := (1 * 10^-5)
FOR i := 1 TO nBatchSize
FOR j := 1 TO nSeqLen
nMean := CalcMean(aInput[i, j])
nVariance := CalcVariance(aInput[i, j], nMean)
FOR k := 1 TO nModelDim
aOutput[i, j, k] := (aInput[i, j, k] - nMean) / Sqrt(nVariance + nEpsilon)
NEXT
NEXT
NEXT
RETURN aOutput
FUNCTION GeneratePositionalEncoding(nSeqLen, nModelDim)
LOCAL aEncoding := Array(nSeqLen, nModelDim)
LOCAL i, j, nPos, nI
FOR i := 1 TO nSeqLen
FOR j := 1 TO nModelDim
nPos := i - 1
nI := j - 1
IF nI % 2 == 0
aEncoding[i, j] := Sin(nPos / (10000 ** (nI / nModelDim)))
ELSE
aEncoding[i, j] := Cos(nPos / (10000 ** ((nI - 1) / nModelDim)))
ENDIF
NEXT
NEXT
RETURN aEncoding
FUNCTION AddPositionalEncoding(aInput, aPositionalEncoding)
LOCAL nBatchSize := Len(aInput), nSeqLen := Len(aInput[1]), nModelDim := Len(aInput[1, 1])
LOCAL aOutput := Array(nBatchSize, nSeqLen, nModelDim)
LOCAL i, j, k
FOR i := 1 TO nBatchSize
FOR j := 1 TO nSeqLen
FOR k := 1 TO nModelDim
aOutput[i, j, k] := aInput[i, j, k] + aPositionalEncoding[j, k]
NEXT
NEXT
NEXT
RETURN aOutput
FUNCTION LinearTransformation(aX, aW)
LOCAL aResult, i, j, k, nSum
LOCAL nBatchSize := Len(aX), nSeqLen := Len(aX[1])
LOCAL nInDim := Len(aX[1, 1]), nOutDim := Len(aW[1])
aResult := Array(nBatchSize, nSeqLen, nOutDim)
FOR i := 1 TO nBatchSize
FOR j := 1 TO nSeqLen
FOR k := 1 TO nOutDim
nSum := 0
FOR l := 1 TO nInDim
nSum += aX[i, j, l] * aW[l, k]
NEXT
aResult[i, j, k] := nSum
NEXT
NEXT
NEXT
RETURN aResult
FUNCTION CalculateAttentionScores(aQ, aK)
LOCAL aScores, i, j, k, l, nSum
LOCAL nBatchSize := Len(aQ), nSeqLen := Len(aQ[1]), nDimK := Len(aQ[1, 1])
aScores := Array(nBatchSize, nSeqLen, nSeqLen)
FOR i := 1 TO nBatchSize
FOR j := 1 TO nSeqLen
FOR k := 1 TO nSeqLen
nSum := 0
FOR l := 1 TO nDimK
nSum += aQ[i, j, l] * aK[i, k, l]
NEXT
aScores[i, j, k] := nSum / Sqrt(nDimK)
NEXT
NEXT
NEXT
aScores := ApplySoftmax(aScores)
RETURN aScores
FUNCTION ApplyAttention(aScores, aV)
LOCAL aOutput, i, j, k, l, nSum
LOCAL nBatchSize := Len(aScores), nSeqLen := Len(aScores[1]), nDimV := Len(aV[1, 1])
aOutput := Array(nBatchSize, nSeqLen, nDimV)
FOR i := 1 TO nBatchSize
FOR j := 1 TO nSeqLen
FOR k := 1 TO nDimV
nSum := 0
FOR l := 1 TO nSeqLen
nSum += aScores[i, j, l] * aV[i, l, k]
NEXT
aOutput[i, j, k] := nSum
NEXT
NEXT
NEXT
RETURN aOutput
FUNCTION ConcatenateOutputs(aOutputs)
LOCAL nBatchSize := Len(aOutputs[1]), nSeqLen := Len(aOutputs[1, 1])
LOCAL nTotalDim := 0, nHeadDim, nHeads := Len(aOutputs)
LOCAL aResult, i, j, k, l, nIndex
nHeadDim := Len(aOutputs[1, 1, 1])
nTotalDim := nHeadDim * nHeads
aResult := Array(nBatchSize, nSeqLen, nTotalDim)
FOR i := 1 TO nBatchSize
FOR j := 1 TO nSeqLen
nIndex := 1
FOR k := 1 TO nHeads
FOR l := 1 TO nHeadDim
aResult[i, j, nIndex] := aOutputs[k, i, j, l]
nIndex++
NEXT
NEXT
NEXT
NEXT
RETURN aResult
FUNCTION ApplyReLU(aInput)
LOCAL aOutput := AClone(aInput)
LOCAL i, j, k
FOR i := 1 TO Len(aOutput)
FOR j := 1 TO Len(aOutput[i])
FOR k := 1 TO Len(aOutput[i, j])
aOutput[i, j, k] := Max(0, aOutput[i, j, k])
NEXT
NEXT
NEXT
RETURN aOutput
FUNCTION ApplySoftmax(aInput)
LOCAL aOutput := AClone(aInput)
LOCAL i, j, k, nMax, nSum, nBatchSize := Len(aInput), nSeqLen := Len(aInput[1])
FOR i := 1 TO nBatchSize
FOR j := 1 TO nSeqLen
nMax := MaxInArray(aOutput[i, j])
nSum := 0
FOR k := 1 TO nSeqLen
aOutput[i, j, k] := Exp(aOutput[i, j, k] - nMax)
nSum += aOutput[i, j, k]
NEXT
FOR k := 1 TO nSeqLen
aOutput[i, j, k] /= nSum
NEXT
NEXT
NEXT
RETURN aOutput
FUNCTION AddMatrices(aA, aB)
LOCAL aResult := AClone(aA)
LOCAL i, j, k
FOR i := 1 TO Len(aA)
FOR j := 1 TO Len(aA[i])
FOR k := 1 TO Len(aA[i, j])
aResult[i, j, k] += aB[i, j, k]
NEXT
NEXT
NEXT
RETURN aResult
FUNCTION GenerateRandomMatrix(nDim1, nDim2, nDim3)
LOCAL aMatrix, i, j, k
IF nDim3 == NIL
aMatrix := Array(nDim1, nDim2)
FOR i := 1 TO nDim1
FOR j := 1 TO nDim2
aMatrix[i, j] := hb_Random(0, 0.02)
NEXT
NEXT
ELSE
aMatrix := Array(nDim1, nDim2, nDim3)
FOR i := 1 TO nDim1
FOR j := 1 TO nDim2
FOR k := 1 TO nDim3
aMatrix[i, j, k] := hb_Random(0, 0.02)
NEXT
NEXT
NEXT
ENDIF
RETURN aMatrix
FUNCTION CalcMean(aArray)
LOCAL nSum := 0, i
FOR i := 1 TO Len(aArray)
nSum += aArray[i]
NEXT
RETURN nSum / Len(aArray)
FUNCTION CalcVariance(aArray, nMean)
LOCAL nSum := 0, i
FOR i := 1 TO Len(aArray)
nSum += (aArray[i] - nMean) ** 2
NEXT
RETURN nSum / Len(aArray)
FUNCTION MaxInArray(aArray)
LOCAL nMax := aArray[1], i
FOR i := 2 TO Len(aArray)
IF aArray[i] > nMax
nMax := aArray[i]
ENDIF
NEXT
RETURN nMax
FUNCTION PrintMatrix(aMatrix)
LOCAL i, j, k
FOR i := 1 TO Len(aMatrix)
? "Batch", i
FOR j := 1 TO Len(aMatrix[i])
?? " Seq", j, ":"
FOR k := 1 TO Len(aMatrix[i, j])
?? Round(aMatrix[i, j, k], 4), " "
NEXT
?
NEXT
?
NEXT
RETURN NIL