Page 1 of 1

CLASS Transformer

Posted: Sat Jan 11, 2025 9:31 am
by Antonio Linares

Code: Select all | Expand

#include "FiveWin.ch"

PROCEDURE Main()

   local transformer, src, tgt, output, loss, d_output
 
   // Crear el Transformer
   transformer := Transformer():New(6, 512, 8)
 
   // Datos de entrada y salida (simulados)
   src := hb_MatrixRandom(10, 512)  // Ejemplo de matriz de entrada
   tgt := hb_MatrixRandom(10, 512)  // Ejemplo de matriz de salida
 
   // Forward pass
   output := transformer:Forward(src, tgt)

   // Cálculo de la pérdida (simplificado)
   loss := hb_MatrixSubstract(output, tgt)
   loss := hb_MatrixSum(hb_MatrixMultiply(loss, hb_MatrixTranspose( loss ) ) )  // Pérdida cuadrática
 
   // Backward pass
   d_output := hb_MatrixSubstract(output, tgt)
   transformer:Backward(d_output)
 
   ? "Pérdida:", loss
 
return

// Clase para la capa de Multi-Head Attention
CLASS MultiHeadAttention
   DATA WQ, WK, WV, WO  // Pesos de la capa
   DATA dWQ, dWK, dWV, dWO  // Gradientes de los pesos
   DATA d_model, n_heads

   METHOD New(d_model, n_heads)
   METHOD ScaledDotProductAttention(Q, K, V)
   METHOD Forward(query, key, value)
   METHOD Backward(d_output, Q, K, V)
ENDCLASS

METHOD New(d_model, n_heads) CLASS MultiHeadAttention
   ::d_model := d_model
   ::n_heads := n_heads

   // Inicialización de pesos (aleatoria)
   ::WQ := hb_MatrixRandom(d_model, d_model)
   ::WK := hb_MatrixRandom(d_model, d_model)
   ::WV := hb_MatrixRandom(d_model, d_model)
   ::WO := hb_MatrixRandom(d_model, d_model)

   // Inicialización de gradientes
   ::dWQ := hb_MatrixZero(d_model, d_model)
   ::dWK := hb_MatrixZero(d_model, d_model)
   ::dWV := hb_MatrixZero(d_model, d_model)
   ::dWO := hb_MatrixZero(d_model, d_model)

   return Self

METHOD ScaledDotProductAttention(Q, K, V) CLASS MultiHeadAttention

   local scores, scaled_scores, attention_weights, output

   // Producto punto escalado
   scores := hb_MatrixMultiply(Q, hb_MatrixTranspose(K))

   scaled_scores := hb_MatrixScale(scores, 1 / Sqrt(::d_model))

   // Aplicar softmax
   attention_weights := hb_Softmax(scaled_scores)

   // Atención ponderada
   output := hb_MatrixMultiply(attention_weights, V)
   
return output

METHOD Forward(query, key, value) CLASS MultiHeadAttention

   local Q, K, V, attention_output, output

   // Proyección de los pesos
   Q := hb_MatrixMultiply(query, ::WQ)
   K := hb_MatrixMultiply(key, ::WK)
   V := hb_MatrixMultiply(value, ::WV)

   // Atención escalada
   attention_output := ::ScaledDotProductAttention(Q, K, V)

   // Proyección final
   output := hb_MatrixMultiply(attention_output, ::WO)

return output

METHOD Backward(d_output, Q, K, V) CLASS MultiHeadAttention

   // Gradientes para WO
   ::dWO := hb_MatrixMultiply(hb_MatrixTranspose(Q), d_output)

   // Gradientes para WQ, WK y WV (simplificados)
   ::dWQ := hb_MatrixMultiply(hb_MatrixTranspose(d_output), Q)
   ::dWK := hb_MatrixMultiply(hb_MatrixTranspose(d_output), K)
   ::dWV := hb_MatrixMultiply(hb_MatrixTranspose(d_output), V)

return hb_MatrixZero(::d_model, ::d_model)  // Simplificación para no propagar más

// Clase para el Transformer
CLASS Transformer
   DATA layers
   DATA num_layers

   METHOD New(num_layers, d_model, n_heads)
   METHOD Forward(src, tgt)
   METHOD Backward(d_output)
ENDCLASS

METHOD New(num_layers, d_model, n_heads) CLASS Transformer

   local i 

   ::num_layers := num_layers
   ::layers := Array(num_layers)

   FOR i := 1 TO num_layers
      ::layers[i] := MultiHeadAttention():New(d_model, n_heads)
   NEXT

return Self

METHOD Forward(src, tgt) CLASS Transformer

   local output, i

   output := src

   FOR i := 1 TO ::num_layers
      output := ::layers[i]:Forward(output, output, output)
   NEXT

return output

METHOD Backward(d_output) CLASS Transformer

   local grad := d_output, i

   FOR i := ::num_layers TO 1 STEP -1
      grad := ::layers[i]:Backward(grad, grad, grad)
   NEXT

return grad

#pragma BEGINDUMP

#include <hbapi.h>
#include <hbapiitm.h>
#include <hbapierr.h>
#include <math.h>

HB_FUNC( HB_MATRIXMULTIPLY )
{
   PHB_ITEM pMatrix1 = hb_param( 1, HB_IT_ARRAY ); // Primera matriz
   PHB_ITEM pMatrix2 = hb_param( 2, HB_IT_ARRAY ); // Segunda matriz

   if( pMatrix1 && pMatrix2 )
   {
      // Dimensiones de la primera matriz
      int rows1 = hb_arrayLen( pMatrix1 );
      PHB_ITEM pRow1, pRow2, pResult, pRowResult;
      int i, k, cols1, rows2, cols2;

      if( rows1 == 0 )
      {
         hb_errRT_BASE( EG_ARG, 3012, "First matrix is empty", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         return;
      }
      pRow1 = hb_arrayGetItemPtr( pMatrix1, 1 );
      if( !pRow1 || !HB_IS_ARRAY( pRow1 ) )
      {
         hb_errRT_BASE( EG_ARG, 3012, "First matrix is not valid", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         return;
      }
      cols1 = hb_arrayLen( pRow1 );

      // Dimensiones de la segunda matriz
      rows2 = hb_arrayLen( pMatrix2 );
      if( rows2 == 0 )
      {
         hb_errRT_BASE( EG_ARG, 3012, "Second matrix is empty", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         return;
      }
      pRow2 = hb_arrayGetItemPtr( pMatrix2, 1 );
      if( !pRow2 || !HB_IS_ARRAY( pRow2 ) )
      {
         hb_errRT_BASE( EG_ARG, 3012, "Second matrix is not valid", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         return;
      }
      cols2 = hb_arrayLen( pRow2 );

      // Validar compatibilidad para la multiplicación (cols1 debe ser igual a rows2)
      if( cols1 != rows2 )
      {
         hb_errRT_BASE( EG_ARG, 3012, "Matrix dimensions do not match for multiplication", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         return;
      }

      // Crear la matriz de resultado (rows1 x cols2)
      pResult = hb_itemArrayNew( rows1 );

      for( i = 0; i < rows1; i++ )
      {
         PHB_ITEM pRowResult = hb_itemArrayNew( cols2 );
         hb_arraySet( pResult, i + 1, pRowResult );
         hb_itemRelease( pRowResult ); // Liberar referencia local
      }

      // Realizar la multiplicación de matrices
      for( i = 0; i < rows1; i++ )
      {
         PHB_ITEM pRowA = hb_arrayGetItemPtr( pMatrix1, i + 1 );
         int j;

         for( j = 0; j < cols2; j++ )
         {
            double sum = 0.0;
            for( k = 0; k < cols1; k++ )
            {
               double a = hb_arrayGetND( pRowA, k + 1 );
               PHB_ITEM pRowB = hb_arrayGetItemPtr( pMatrix2, k + 1 );
               double b = hb_arrayGetND( pRowB, j + 1 );
               sum += a * b;
            }
            
            pRowResult = hb_arrayGetItemPtr( pResult, i + 1 );
            hb_arraySetND( pRowResult, j + 1, sum );
         }
      }

      // Devolver la matriz de resultado
      hb_itemReturnRelease( pResult );
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, "Invalid parameters", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

#include <windows.h>

HB_FUNC( HB_MATRIXSCALE )
{
   PHB_ITEM pMatrix = hb_param( 1, HB_IT_ARRAY ); // Primer parámetro: matriz a escalar
   double scale = hb_parnd( 2 );                 // Segundo parámetro: escalar

   if( pMatrix )
   {
      HB_SIZE nRows = hb_arrayLen( pMatrix );
      HB_SIZE i, j;
      PHB_ITEM pMatrixResult = hb_itemArrayNew( nRows );

      // Copiar y escalar los datos
      for( i = 0; i < nRows; i++ )
      {
         PHB_ITEM pRow = hb_arrayGetItemPtr( pMatrix, i + 1 );
         HB_SIZE nCols = hb_arrayLen( pRow );

         PHB_ITEM pRowResult = hb_itemArrayNew( nCols );

         for( j = 0; j < nCols; j++ )
         {
            double value = hb_arrayGetND( pRow, j + 1 );
            hb_arraySetND( pRowResult, j + 1, value * scale );
         }

         hb_arraySet( pMatrixResult, i + 1, pRowResult );
         hb_itemRelease( pRowResult );
      }

      hb_itemReturnRelease( pMatrixResult );
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXTRANSPOSE )
{
   PHB_ITEM pMatrix = hb_param( 1, HB_IT_ARRAY ); // Primer parámetro: matriz a transponer

   if( pMatrix )
   {
      HB_SIZE nRows = hb_arrayLen( pMatrix );
      HB_SIZE nCols = hb_arrayLen( hb_arrayGetItemPtr( pMatrix, 1 ) ); // Número de columnas de la primera fila
      HB_SIZE i, j;

      PHB_ITEM pMatrixResult = hb_itemArrayNew( nCols ); // Crear matriz transpuesta (nCols x nRows)

      // Inicializar las filas de la matriz transpuesta
      for( i = 0; i < nCols; i++ )
      {
         hb_arraySet( pMatrixResult, i + 1, hb_itemArrayNew( nRows ) );
      }

      // Rellenar la matriz transpuesta
      for( i = 0; i < nRows; i++ )
      {
         PHB_ITEM pRow = hb_arrayGetItemPtr( pMatrix, i + 1 );
         for( j = 0; j < nCols; j++ )
         {
            double value = hb_arrayGetND( pRow, j + 1 ); // Obtener el valor original
            PHB_ITEM pTransposedRow = hb_arrayGetItemPtr( pMatrixResult, j + 1 );
            hb_arraySetND( pTransposedRow, i + 1, value ); // Asignar a la posición transpuesta
         }
      }

      hb_itemReturnRelease( pMatrixResult ); // Devolver la matriz transpuesta
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXZERO )
{
   HB_SIZE nRows = hb_parns( 1 ); // Número de filas
   HB_SIZE nCols = hb_parns( 2 ); // Número de columnas

   if( nRows > 0 && nCols > 0 )
   {
      HB_SIZE i, j;

      PHB_ITEM pMatrix = hb_itemArrayNew( nRows ); // Crear la matriz de nRows filas

      // Inicializar la matriz con ceros
      for( i = 0; i < nRows; i++ )
      {
         PHB_ITEM pRow = hb_itemArrayNew( nCols ); // Crear una fila con nCols columnas
         for( j = 0; j < nCols; j++ )
         {
            hb_arraySetND( pRow, j + 1, 0.0 ); // Establecer cada elemento a 0.0
         }
         hb_arraySet( pMatrix, i + 1, pRow ); // Añadir la fila a la matriz
         hb_itemRelease( pRow ); // Liberar la fila temporal
      }

      hb_itemReturnRelease( pMatrix ); // Devolver la matriz completa
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXRANDOM )
{
   HB_SIZE nRows = hb_parns( 1 ); // Número de filas
   HB_SIZE nCols = hb_parns( 2 ); // Número de columnas

   if( nRows > 0 && nCols > 0 )
   {
      HB_SIZE i, j;

      PHB_ITEM pMatrix = hb_itemArrayNew( nRows ); // Crear la matriz de nRows filas

      // Inicializar la matriz con valores aleatorios
      for( i = 0; i < nRows; i++ )
      {
         PHB_ITEM pRow = hb_itemArrayNew( nCols ); // Crear una fila con nCols columnas
         for( j = 0; j < nCols; j++ )
         {
            double randomValue = (double)rand() / RAND_MAX; // Valor aleatorio entre 0.0 y 1.0
            hb_arraySetND( pRow, j + 1, randomValue );
         }
         hb_arraySet( pMatrix, i + 1, pRow ); // Añadir la fila a la matriz
         hb_itemRelease( pRow ); // Liberar la fila temporal
      }

      hb_itemReturnRelease( pMatrix ); // Devolver la matriz completa
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_SOFTMAX )
{
   PHB_ITEM pValues = hb_param( 1, HB_IT_ARRAY ); // Primer parámetro: array multidimensional de valores

   if( pValues )
   {
      int nRows = hb_arrayLen( pValues ); // Número de filas
      if( nRows > 0 )
      {
         // Asumimos que las filas tienen la misma longitud
         PHB_ITEM pFirstRow = hb_arrayGetItemPtr( pValues, 1 );
         int nCols = hb_arrayLen( pFirstRow ); // Número de columnas (basado en la primera fila)

         PHB_ITEM pResult = hb_itemArrayNew( nRows ); // Array para almacenar los resultados
         int i, j;

         // Recorrer cada fila
         for( i = 0; i < nRows; i++ )
         {
            PHB_ITEM pRow = hb_arrayGetItemPtr( pValues, i + 1 );
            PHB_ITEM pRowResult = hb_itemArrayNew( nCols ); // Fila de resultados para Softmax

            double* expValues = (double*) hb_xgrab( nCols * sizeof(double) );
            double sumExp = 0.0;

            // Calcular e^x para cada elemento de la fila y la suma total
            for( j = 0; j < nCols; j++ )
            {
               double value = hb_arrayGetND( pRow, j + 1 );
               expValues[j] = pow( M_E, value );
               sumExp += expValues[j];
            }

            // Calcular Softmax para la fila dividiendo cada e^x por la suma total
            for( j = 0; j < nCols; j++ )
            {
               double softmaxValue = expValues[j] / sumExp;
               hb_arraySetND( pRowResult, j + 1, softmaxValue );
            }

            hb_xfree( expValues ); // Liberar memoria para los exponentes

            // Guardar la fila de resultados en la matriz resultante
            hb_arraySet( pResult, i + 1, pRowResult );
            hb_itemRelease( pRowResult ); // Liberar la fila de resultados
         }

         hb_itemReturnRelease( pResult ); // Devolver la matriz de resultados
      }
      else
      {
         hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
      }
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXSUBSTRACT )
{
   PHB_ITEM pMatrix1 = hb_param( 1, HB_IT_ARRAY ); // Primera matriz
   PHB_ITEM pMatrix2 = hb_param( 2, HB_IT_ARRAY ); // Segunda matriz

   if( pMatrix1 && pMatrix2 )
   {
      HB_SIZE nRows1 = hb_arrayLen( pMatrix1 );
      HB_SIZE nRows2 = hb_arrayLen( pMatrix2 );

      if( nRows1 == nRows2 && nRows1 > 0 )
      {
         HB_SIZE nCols1 = hb_arrayLen( hb_arrayGetItemPtr( pMatrix1, 1 ) );
         HB_SIZE nCols2 = hb_arrayLen( hb_arrayGetItemPtr( pMatrix2, 1 ) );

         if( nCols1 == nCols2 && nCols1 > 0 )
         {
            HB_SIZE i, j;

            // Crear la matriz de resultado
            PHB_ITEM pMatrixResult = hb_itemArrayNew( nRows1 );

            // Realizar la resta elemento a elemento
            for( i = 0; i < nRows1; i++ )
            {
               PHB_ITEM pRow1 = hb_arrayGetItemPtr( pMatrix1, i + 1 );
               PHB_ITEM pRow2 = hb_arrayGetItemPtr( pMatrix2, i + 1 );

               PHB_ITEM pRowResult = hb_itemArrayNew( nCols1 );

               for( j = 0; j < nCols1; j++ )
               {
                  double value1 = hb_arrayGetND( pRow1, j + 1 );
                  double value2 = hb_arrayGetND( pRow2, j + 1 );
                  hb_arraySetND( pRowResult, j + 1, value1 - value2 ); // Resta
               }

               hb_arraySet( pMatrixResult, i + 1, pRowResult ); // Añadir la fila al resultado
               hb_itemRelease( pRowResult ); // Liberar la fila temporal
            }

            hb_itemReturnRelease( pMatrixResult ); // Devolver la matriz resultado
         }
         else
         {
            // Error: Las columnas no coinciden
            hb_errRT_BASE( EG_ARG, 3012, "Column dimensions do not match", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         }
      }
      else
      {
         // Error: Las filas no coinciden
         hb_errRT_BASE( EG_ARG, 3012, "Row dimensions do not match", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
      }
   }
   else
   {
      // Error: Argumentos inválidos
      hb_errRT_BASE( EG_ARG, 3012, "Invalid parameters", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXSUM )
{
   PHB_ITEM pMatrix = hb_param( 1, HB_IT_ARRAY ); // Matriz de entrada

   if( pMatrix )
   {
      int nRows = hb_arrayLen( pMatrix ); // Número de filas

      if( nRows > 0 )
      {
         double sum = 0.0;
         int i;

         for( i = 0; i < nRows; i++ )
         {
            PHB_ITEM pRow = hb_arrayGetItemPtr( pMatrix, i + 1 );
            int nCols = hb_arrayLen( pRow ); // Número de columnas
            int j;

            for( j = 0; j < nCols; j++ )
            {
               sum += hb_arrayGetND( pRow, j + 1 ); // Sumar el elemento actual
            }
         }

         hb_retnd( sum ); // Devolver la suma como resultado
      }
      else
      {
         // Error: Matriz vacía
         hb_errRT_BASE( EG_ARG, 3012, "Empty matrix", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
      }
   }
   else
   {
      // Error: Argumentos inválidos
      hb_errRT_BASE( EG_ARG, 3012, "Invalid parameter", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

#pragma ENDDUMP

Re: CLASS Transformer

Posted: Sat Jan 11, 2025 8:06 pm
by Antonio Linares

Code: Select all | Expand

#include "FiveWin.ch"

// Función principal para entrenar el Transformer
PROCEDURE Main()
   LOCAL d_model := 128
   LOCAL n_heads := 4
   LOCAL num_layers := 1
   LOCAL learning_rate := 0.01
   LOCAL max_epochs := 100
   LOCAL output, loss, d_output, epoch

   // Generar datos de entrada y salida aleatorios
   LOCAL src := hb_MatrixRandom(10, d_model)  // 10 secuencias de entrada
   LOCAL tgt := hb_MatrixRandom(10, d_model)  // 10 secuencias de objetivo

   // Crear instancia del Transformer
   LOCAL transformer := Transformer():New(num_layers, d_model, n_heads)

   FOR epoch := 1 TO max_epochs
      output := transformer:Forward(src, tgt)
      loss := hb_MatrixSum(hb_MatrixMultiply(hb_MatrixSubstract(output, tgt), ;
                        hb_MatrixTranspose(hb_MatrixSubstract(output, tgt)))) / (10 * d_model)

      // Gradiente de salida
      d_output := hb_MatrixSubstract(output, tgt)

      // Retropropagación y actualización de pesos
      transformer:Backward(d_output)
      ActualizarPesos(transformer, learning_rate)

      // Mostrar pérdida en cada época
      ? "Época:", epoch, "Pérdida:", loss
   NEXT
RETURN

// Clase para implementar MultiHeadAttention
CLASS MultiHeadAttention
   DATA WQ, WK, WV, WO
   DATA dWQ, dWK, dWV, dWO

   METHOD New(d_model, n_heads)
   METHOD Forward(Q, K, V)
   METHOD Backward(d_output) VIRTUAL
ENDCLASS

METHOD New(d_model, n_heads) CLASS MultiHeadAttention
   LOCAL scale := 1.0 / Sqrt(d_model)
   ::WQ := hb_MatrixScale( hb_MatrixRandom(d_model, d_model), scale )
   ::WK := hb_MatrixScale( hb_MatrixRandom(d_model, d_model), scale )
   ::WV := hb_MatrixScale( hb_MatrixRandom(d_model, d_model), scale )
   ::WO := hb_MatrixScale( hb_MatrixRandom(d_model, d_model), scale )

   ::dWQ = hb_MatrixZero(d_model, d_model)  // Gradientes para WQ
   ::dWK = hb_MatrixZero(d_model, d_model)  // Gradientes para WK
   ::dWV = hb_MatrixZero(d_model, d_model)  // Gradientes para WV
   ::dWO = hb_MatrixZero(d_model, d_model)  // Gradientes para WO   

RETURN Self

METHOD Forward(Q, K, V) CLASS MultiHeadAttention
      LOCAL Q_proj := hb_MatrixMultiply(Q, ::WQ)
      LOCAL K_proj := hb_MatrixMultiply(K, ::WK)
      LOCAL V_proj := hb_MatrixMultiply(V, ::WV)
      LOCAL scores := hb_MatrixDiv( hb_MatrixMultiply(Q_proj, hb_MatrixTranspose(K_proj)), Sqrt(Len(Q_proj[1])) )
      LOCAL attention := Softmax(scores)
      RETURN hb_MatrixMultiply(attention, V_proj)

// Clase para implementar el Transformer
CLASS Transformer
   VAR layers

   METHOD New(num_layers, d_model, n_heads)
   METHOD Forward(src, tgt)
   METHOD Backward(d_output)
ENDCLASS

METHOD New(num_layers, d_model, n_heads) CLASS Transformer
   local i
   ::layers := Array(num_layers)
   FOR i := 1 TO num_layers
      ::layers[i] := MultiHeadAttention():New(d_model, n_heads)
   NEXT
RETURN Self

METHOD Forward(src, tgt) CLASS Transformer
   LOCAL output := src, i
   FOR i := 1 TO Len(::layers)
      output := ::layers[i]:Forward(output, output, output)
   NEXT
RETURN output

METHOD Backward(d_output) CLASS Transformer
   local i
   FOR i := Len(::layers) TO 1 STEP -1
      ::layers[i]:Backward(d_output)
   NEXT
RETURN NIL

// Función para aplicar Softmax con estabilidad numérica
FUNCTION Softmax(matrix)
   LOCAL rows := Len(matrix)
   LOCAL cols := Len(matrix[1])
   LOCAL result := Array(rows, cols)
   local i, j, max_val, sum_exp, exp_values

   FOR i := 1 TO rows
      max_val := hb_ArrayMax(matrix[i])
      sum_exp := 0
      exp_values := Array(cols)

      FOR j := 1 TO cols
         exp_values[j] := Exp(matrix[i][j] - max_val)
         sum_exp += exp_values[j]
      NEXT

      FOR j := 1 TO cols
         result[i][j] := exp_values[j] / sum_exp
      NEXT
   NEXT

RETURN result

// Función para actualizar los pesos del Transformer
FUNCTION ActualizarPesos(transformer, learning_rate)
   local i, layer
   FOR i := 1 TO Len(transformer:layers)
      layer := transformer:layers[i]
      layer:WQ := hb_MatrixSubstract(layer:WQ, hb_MatrixScale(layer:dWQ, learning_rate))
      layer:WK := hb_MatrixSubstract(layer:WK, hb_MatrixScale(layer:dWK, learning_rate))
      layer:WV := hb_MatrixSubstract(layer:WV, hb_MatrixScale(layer:dWV, learning_rate))
      layer:WO := hb_MatrixSubstract(layer:WO, hb_MatrixScale(layer:dWO, learning_rate))
   NEXT
RETURN nil

FUNCTION hb_ArrayMax(aArray)
   LOCAL nMax := NIL
   LOCAL i

   // Verificar que el parámetro sea un array
   IF ValType(aArray) != "A"
      RETURN NIL
   ENDIF

   // Recorrer el array para encontrar el máximo
   FOR i := 1 TO Len(aArray)
      IF i == 1 .OR. aArray[i] > nMax
         nMax := aArray[i]
      ENDIF
   NEXT

RETURN nMax

#pragma BEGINDUMP

#include <hbapi.h>
#include <hbapiitm.h>
#include <hbapierr.h>
#include <math.h>

HB_FUNC( HB_MATRIXMULTIPLY )
{
   PHB_ITEM pMatrix1 = hb_param( 1, HB_IT_ARRAY ); // Primera matriz
   PHB_ITEM pMatrix2 = hb_param( 2, HB_IT_ARRAY ); // Segunda matriz

   if( pMatrix1 && pMatrix2 )
   {
      // Dimensiones de la primera matriz
      int rows1 = hb_arrayLen( pMatrix1 );
      PHB_ITEM pRow1, pRow2, pResult, pRowResult;
      int i, k, cols1, rows2, cols2;

      if( rows1 == 0 )
      {
         hb_errRT_BASE( EG_ARG, 3012, "First matrix is empty", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         return;
      }
      pRow1 = hb_arrayGetItemPtr( pMatrix1, 1 );
      if( !pRow1 || !HB_IS_ARRAY( pRow1 ) )
      {
         hb_errRT_BASE( EG_ARG, 3012, "First matrix is not valid", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         return;
      }
      cols1 = hb_arrayLen( pRow1 );

      // Dimensiones de la segunda matriz
      rows2 = hb_arrayLen( pMatrix2 );
      if( rows2 == 0 )
      {
         hb_errRT_BASE( EG_ARG, 3012, "Second matrix is empty", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         return;
      }
      pRow2 = hb_arrayGetItemPtr( pMatrix2, 1 );
      if( !pRow2 || !HB_IS_ARRAY( pRow2 ) )
      {
         hb_errRT_BASE( EG_ARG, 3012, "Second matrix is not valid", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         return;
      }
      cols2 = hb_arrayLen( pRow2 );

      // Validar compatibilidad para la multiplicación (cols1 debe ser igual a rows2)
      if( cols1 != rows2 )
      {
         hb_errRT_BASE( EG_ARG, 3012, "Matrix dimensions do not match for multiplication", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         return;
      }

      // Crear la matriz de resultado (rows1 x cols2)
      pResult = hb_itemArrayNew( rows1 );

      for( i = 0; i < rows1; i++ )
      {
         PHB_ITEM pRowResult = hb_itemArrayNew( cols2 );
         hb_arraySet( pResult, i + 1, pRowResult );
         hb_itemRelease( pRowResult ); // Liberar referencia local
      }

      // Realizar la multiplicación de matrices
      for( i = 0; i < rows1; i++ )
      {
         PHB_ITEM pRowA = hb_arrayGetItemPtr( pMatrix1, i + 1 );
         int j;

         for( j = 0; j < cols2; j++ )
         {
            double sum = 0.0;
            for( k = 0; k < cols1; k++ )
            {
               double a = hb_arrayGetND( pRowA, k + 1 );
               PHB_ITEM pRowB = hb_arrayGetItemPtr( pMatrix2, k + 1 );
               double b = hb_arrayGetND( pRowB, j + 1 );
               sum += a * b;
            }
            
            pRowResult = hb_arrayGetItemPtr( pResult, i + 1 );
            hb_arraySetND( pRowResult, j + 1, sum );
         }
      }

      // Devolver la matriz de resultado
      hb_itemReturnRelease( pResult );
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, "Invalid parameters", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXSCALE )
{
   PHB_ITEM pMatrix = hb_param( 1, HB_IT_ARRAY ); // Primer parámetro: matriz a escalar
   double scale = hb_parnd( 2 );                 // Segundo parámetro: escalar

   if( pMatrix )
   {
      HB_SIZE nRows = hb_arrayLen( pMatrix );
      HB_SIZE i, j;
      PHB_ITEM pMatrixResult = hb_itemArrayNew( nRows );

      // Copiar y escalar los datos
      for( i = 0; i < nRows; i++ )
      {
         PHB_ITEM pRow = hb_arrayGetItemPtr( pMatrix, i + 1 );
         HB_SIZE nCols = hb_arrayLen( pRow );

         PHB_ITEM pRowResult = hb_itemArrayNew( nCols );

         for( j = 0; j < nCols; j++ )
         {
            double value = hb_arrayGetND( pRow, j + 1 );
            hb_arraySetND( pRowResult, j + 1, value * scale );
         }

         hb_arraySet( pMatrixResult, i + 1, pRowResult );
         hb_itemRelease( pRowResult );
      }

      hb_itemReturnRelease( pMatrixResult );
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXDIV )
{
   PHB_ITEM pMatrix = hb_param( 1, HB_IT_ARRAY ); // Primer parámetro: matriz a escalar
   double scale = hb_parnd( 2 );                 // Segundo parámetro: escalar

   if( pMatrix )
   {
      HB_SIZE nRows = hb_arrayLen( pMatrix );
      HB_SIZE i, j;
      PHB_ITEM pMatrixResult = hb_itemArrayNew( nRows );

      // Copiar y escalar los datos
      for( i = 0; i < nRows; i++ )
      {
         PHB_ITEM pRow = hb_arrayGetItemPtr( pMatrix, i + 1 );
         HB_SIZE nCols = hb_arrayLen( pRow );

         PHB_ITEM pRowResult = hb_itemArrayNew( nCols );

         for( j = 0; j < nCols; j++ )
         {
            double value = hb_arrayGetND( pRow, j + 1 );
            hb_arraySetND( pRowResult, j + 1, value / scale );
         }

         hb_arraySet( pMatrixResult, i + 1, pRowResult );
         hb_itemRelease( pRowResult );
      }

      hb_itemReturnRelease( pMatrixResult );
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXTRANSPOSE )
{
   PHB_ITEM pMatrix = hb_param( 1, HB_IT_ARRAY ); // Primer parámetro: matriz a transponer

   if( pMatrix )
   {
      HB_SIZE nRows = hb_arrayLen( pMatrix );
      HB_SIZE nCols = hb_arrayLen( hb_arrayGetItemPtr( pMatrix, 1 ) ); // Número de columnas de la primera fila
      HB_SIZE i, j;

      PHB_ITEM pMatrixResult = hb_itemArrayNew( nCols ); // Crear matriz transpuesta (nCols x nRows)

      // Inicializar las filas de la matriz transpuesta
      for( i = 0; i < nCols; i++ )
      {
         hb_arraySet( pMatrixResult, i + 1, hb_itemArrayNew( nRows ) );
      }

      // Rellenar la matriz transpuesta
      for( i = 0; i < nRows; i++ )
      {
         PHB_ITEM pRow = hb_arrayGetItemPtr( pMatrix, i + 1 );
         for( j = 0; j < nCols; j++ )
         {
            double value = hb_arrayGetND( pRow, j + 1 ); // Obtener el valor original
            PHB_ITEM pTransposedRow = hb_arrayGetItemPtr( pMatrixResult, j + 1 );
            hb_arraySetND( pTransposedRow, i + 1, value ); // Asignar a la posición transpuesta
         }
      }

      hb_itemReturnRelease( pMatrixResult ); // Devolver la matriz transpuesta
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXZERO )
{
   HB_SIZE nRows = hb_parns( 1 ); // Número de filas
   HB_SIZE nCols = hb_parns( 2 ); // Número de columnas

   if( nRows > 0 && nCols > 0 )
   {
      HB_SIZE i, j;

      PHB_ITEM pMatrix = hb_itemArrayNew( nRows ); // Crear la matriz de nRows filas

      // Inicializar la matriz con ceros
      for( i = 0; i < nRows; i++ )
      {
         PHB_ITEM pRow = hb_itemArrayNew( nCols ); // Crear una fila con nCols columnas
         for( j = 0; j < nCols; j++ )
         {
            hb_arraySetND( pRow, j + 1, 0.0 ); // Establecer cada elemento a 0.0
         }
         hb_arraySet( pMatrix, i + 1, pRow ); // Añadir la fila a la matriz
         hb_itemRelease( pRow ); // Liberar la fila temporal
      }

      hb_itemReturnRelease( pMatrix ); // Devolver la matriz completa
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXRANDOM )
{
   HB_SIZE nRows = hb_parns( 1 ); // Número de filas
   HB_SIZE nCols = hb_parns( 2 ); // Número de columnas

   if( nRows > 0 && nCols > 0 )
   {
      HB_SIZE i, j;

      PHB_ITEM pMatrix = hb_itemArrayNew( nRows ); // Crear la matriz de nRows filas

      // Inicializar la matriz con valores aleatorios
      for( i = 0; i < nRows; i++ )
      {
         PHB_ITEM pRow = hb_itemArrayNew( nCols ); // Crear una fila con nCols columnas
         for( j = 0; j < nCols; j++ )
         {
            double randomValue = (double)rand() / RAND_MAX; // Valor aleatorio entre 0.0 y 1.0
            hb_arraySetND( pRow, j + 1, randomValue );
         }
         hb_arraySet( pMatrix, i + 1, pRow ); // Añadir la fila a la matriz
         hb_itemRelease( pRow ); // Liberar la fila temporal
      }

      hb_itemReturnRelease( pMatrix ); // Devolver la matriz completa
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_SOFTMAX )
{
   PHB_ITEM pValues = hb_param( 1, HB_IT_ARRAY ); // Primer parámetro: array multidimensional de valores

   if( pValues )
   {
      int nRows = hb_arrayLen( pValues ); // Número de filas
      if( nRows > 0 )
      {
         // Asumimos que las filas tienen la misma longitud
         PHB_ITEM pFirstRow = hb_arrayGetItemPtr( pValues, 1 );
         int nCols = hb_arrayLen( pFirstRow ); // Número de columnas (basado en la primera fila)

         PHB_ITEM pResult = hb_itemArrayNew( nRows ); // Array para almacenar los resultados
         int i, j;

         // Recorrer cada fila
         for( i = 0; i < nRows; i++ )
         {
            PHB_ITEM pRow = hb_arrayGetItemPtr( pValues, i + 1 );
            PHB_ITEM pRowResult = hb_itemArrayNew( nCols ); // Fila de resultados para Softmax

            double* expValues = (double*) hb_xgrab( nCols * sizeof(double) );
            double sumExp = 0.0;

            // Calcular e^x para cada elemento de la fila y la suma total
            for( j = 0; j < nCols; j++ )
            {
               double value = hb_arrayGetND( pRow, j + 1 );
               expValues[j] = pow( M_E, value );
               sumExp += expValues[j];
            }

            // Calcular Softmax para la fila dividiendo cada e^x por la suma total
            for( j = 0; j < nCols; j++ )
            {
               double softmaxValue = expValues[j] / sumExp;
               hb_arraySetND( pRowResult, j + 1, softmaxValue );
            }

            hb_xfree( expValues ); // Liberar memoria para los exponentes

            // Guardar la fila de resultados en la matriz resultante
            hb_arraySet( pResult, i + 1, pRowResult );
            hb_itemRelease( pRowResult ); // Liberar la fila de resultados
         }

         hb_itemReturnRelease( pResult ); // Devolver la matriz de resultados
      }
      else
      {
         hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
      }
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXSUBSTRACT )
{
   PHB_ITEM pMatrix1 = hb_param( 1, HB_IT_ARRAY ); // Primera matriz
   PHB_ITEM pMatrix2 = hb_param( 2, HB_IT_ARRAY ); // Segunda matriz

   if( pMatrix1 && pMatrix2 )
   {
      HB_SIZE nRows1 = hb_arrayLen( pMatrix1 );
      HB_SIZE nRows2 = hb_arrayLen( pMatrix2 );

      if( nRows1 == nRows2 && nRows1 > 0 )
      {
         HB_SIZE nCols1 = hb_arrayLen( hb_arrayGetItemPtr( pMatrix1, 1 ) );
         HB_SIZE nCols2 = hb_arrayLen( hb_arrayGetItemPtr( pMatrix2, 1 ) );

         if( nCols1 == nCols2 && nCols1 > 0 )
         {
            HB_SIZE i, j;

            // Crear la matriz de resultado
            PHB_ITEM pMatrixResult = hb_itemArrayNew( nRows1 );

            // Realizar la resta elemento a elemento
            for( i = 0; i < nRows1; i++ )
            {
               PHB_ITEM pRow1 = hb_arrayGetItemPtr( pMatrix1, i + 1 );
               PHB_ITEM pRow2 = hb_arrayGetItemPtr( pMatrix2, i + 1 );

               PHB_ITEM pRowResult = hb_itemArrayNew( nCols1 );

               for( j = 0; j < nCols1; j++ )
               {
                  double value1 = hb_arrayGetND( pRow1, j + 1 );
                  double value2 = hb_arrayGetND( pRow2, j + 1 );
                  hb_arraySetND( pRowResult, j + 1, value1 - value2 ); // Resta
               }

               hb_arraySet( pMatrixResult, i + 1, pRowResult ); // Añadir la fila al resultado
               hb_itemRelease( pRowResult ); // Liberar la fila temporal
            }

            hb_itemReturnRelease( pMatrixResult ); // Devolver la matriz resultado
         }
         else
         {
            // Error: Las columnas no coinciden
            hb_errRT_BASE( EG_ARG, 3012, "Column dimensions do not match", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         }
      }
      else
      {
         // Error: Las filas no coinciden
         hb_errRT_BASE( EG_ARG, 3012, "Row dimensions do not match", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
      }
   }
   else
   {
      // Error: Argumentos inválidos
      hb_errRT_BASE( EG_ARG, 3012, "Invalid parameters", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXSUM )
{
   PHB_ITEM pMatrix = hb_param( 1, HB_IT_ARRAY ); // Matriz de entrada

   if( pMatrix )
   {
      int nRows = hb_arrayLen( pMatrix ); // Número de filas

      if( nRows > 0 )
      {
         double sum = 0.0;
         int i;

         for( i = 0; i < nRows; i++ )
         {
            PHB_ITEM pRow = hb_arrayGetItemPtr( pMatrix, i + 1 );
            int nCols = hb_arrayLen( pRow ); // Número de columnas
            int j;

            for( j = 0; j < nCols; j++ )
            {
               sum += hb_arrayGetND( pRow, j + 1 ); // Sumar el elemento actual
            }
         }

         hb_retnd( sum ); // Devolver la suma como resultado
      }
      else
      {
         // Error: Matriz vacía
         hb_errRT_BASE( EG_ARG, 3012, "Empty matrix", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
      }
   }
   else
   {
      // Error: Argumentos inválidos
      hb_errRT_BASE( EG_ARG, 3012, "Invalid parameter", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

#pragma ENDDUMP

Re: CLASS Transformer

Posted: Sat Jan 11, 2025 8:23 pm
by Antonio Linares

Code: Select all | Expand

#include "FiveWin.ch"

// Función principal para entrenar el Transformer
PROCEDURE Main()
   LOCAL d_model := 128
   LOCAL n_heads := 4
   LOCAL num_layers := 1
   LOCAL learning_rate := 0.001
   LOCAL max_epochs := 100
   LOCAL output, loss, d_output, epoch

   // Generar datos de entrada y salida aleatorios
   LOCAL src := hb_MatrixRandom(10, d_model)  // 10 secuencias de entrada
   LOCAL tgt := hb_MatrixRandom(10, d_model)  // 10 secuencias de objetivo

   // Crear instancia del Transformer
   LOCAL transformer := Transformer():New(num_layers, d_model, n_heads)

   FOR epoch := 1 TO max_epochs
      output := transformer:Forward(src, tgt)
      loss := hb_MatrixSum(hb_MatrixMultiply(hb_MatrixSubstract(output, tgt), ;
                        hb_MatrixTranspose(hb_MatrixSubstract(output, tgt)))) / (10 * d_model)

      // Gradiente de salida
      d_output := hb_MatrixSubstract(output, tgt)

      // Retropropagación y actualización de pesos
      transformer:Backward(d_output)
      ActualizarPesos(transformer, learning_rate)

      // Mostrar pérdida en cada época
      ? "Época:", epoch, "Pérdida:", loss
   NEXT
RETURN

// Clase para implementar MultiHeadAttention
CLASS MultiHeadAttention
   DATA d_model, n_heads
   DATA WQ, WK, WV, WO
   DATA dWQ, dWK, dWV, dWO
   // Cache para backpropagation
   DATA Q_proj
   DATA K_proj
   DATA V_proj
   DATA attention_scores
   DATA attention_probs   

   METHOD New(d_model, n_heads)
   METHOD Forward(Q, K, V)
   METHOD Backward(d_output) 
   METHOD InitGradients()   
ENDCLASS

METHOD New(d_model, n_heads) CLASS MultiHeadAttention
   LOCAL scale := 1.0 / Sqrt(d_model)
   
   // Guardamos d_model como propiedad de la clase
   ::d_model := d_model
   
   // Inicializar matrices de pesos
   ::WQ := hb_MatrixScale(hb_MatrixRandom(d_model, d_model), scale)
   ::WK := hb_MatrixScale(hb_MatrixRandom(d_model, d_model), scale)
   ::WV := hb_MatrixScale(hb_MatrixRandom(d_model, d_model), scale)
   ::WO := hb_MatrixScale(hb_MatrixRandom(d_model, d_model), scale)

   // Inicializar gradientes
   ::InitGradients() 

RETURN Self

METHOD InitGradients() CLASS MultiHeadAttention
   // Inicializar gradientes como matrices de ceros
   ::dWQ := hb_MatrixZero(::d_model, ::d_model)
   ::dWK := hb_MatrixZero(::d_model, ::d_model)
   ::dWV := hb_MatrixZero(::d_model, ::d_model)
   ::dWO := hb_MatrixZero(::d_model, ::d_model)
RETURN NIL

METHOD Forward(Q, K, V) CLASS MultiHeadAttention

   // Guardamos los valores proyectados para usar en backward
   ::Q_proj := hb_MatrixMultiply(Q, ::WQ)
   ::K_proj := hb_MatrixMultiply(K, ::WK)
   ::V_proj := hb_MatrixMultiply(V, ::WV)
   
   // Calcular scores de atención
   ::attention_scores := hb_MatrixDiv(hb_MatrixMultiply(::Q_proj, hb_MatrixTranspose(::K_proj)), ;
                                    Sqrt(Len(::Q_proj[1])))
   
   // Aplicar softmax para obtener probabilidades de atención
   ::attention_probs := Softmax(::attention_scores)
   
RETURN hb_MatrixMultiply(::attention_probs, ::V_proj)

METHOD Backward(d_output) CLASS MultiHeadAttention
   local attention_grad, Q_grad, K_grad, V_grad
   // Reiniciar gradientes antes de acumularlos
   ::InitGradients()
   
   // Gradientes para la atención
   attention_grad := d_output
   
   // Gradientes para las matrices de peso
   ::dWO := hb_MatrixMultiply(hb_MatrixTranspose(attention_grad), ::V_proj)
   
   // Gradientes para Q, K, V proyectados
   Q_grad := hb_MatrixMultiply(attention_grad, ::WQ)
   K_grad := hb_MatrixMultiply(attention_grad, ::WK)
   V_grad := hb_MatrixMultiply(attention_grad, ::WV)
   
   // Actualizar gradientes acumulados usando los valores cacheados
   ::dWQ := hb_MatrixSum(::dWQ, hb_MatrixMultiply(hb_MatrixTranspose(Q_grad), ::K_proj))
   ::dWK := hb_MatrixSum(::dWK, hb_MatrixMultiply(hb_MatrixTranspose(K_grad), ::Q_proj))
   ::dWV := hb_MatrixSum(::dWV, hb_MatrixMultiply(hb_MatrixTranspose(V_grad), ::attention_scores))

RETURN d_output

// Clase para implementar el Transformer
CLASS Transformer
   VAR layers

   METHOD New(num_layers, d_model, n_heads)
   METHOD Forward(src, tgt)
   METHOD Backward(d_output)
ENDCLASS

METHOD New(num_layers, d_model, n_heads) CLASS Transformer
   local i
   ::layers := Array(num_layers)
   FOR i := 1 TO num_layers
      ::layers[i] := MultiHeadAttention():New(d_model, n_heads)
   NEXT
RETURN Self

METHOD Forward(src, tgt) CLASS Transformer
   LOCAL output := src, i
   FOR i := 1 TO Len(::layers)
      output := ::layers[i]:Forward(output, output, output)
   NEXT
RETURN output

METHOD Backward(d_output) CLASS Transformer
   local i
   FOR i := Len(::layers) TO 1 STEP -1
      ::layers[i]:Backward(d_output)
   NEXT
RETURN NIL

// Función para aplicar Softmax con estabilidad numérica
FUNCTION Softmax(matrix)
   LOCAL rows := Len(matrix)
   LOCAL cols := Len(matrix[1])
   LOCAL result := Array(rows, cols)
   local i, j, max_val, sum_exp, exp_values

   FOR i := 1 TO rows
      max_val := hb_ArrayMax(matrix[i])
      sum_exp := 0
      exp_values := Array(cols)

      FOR j := 1 TO cols
         exp_values[j] := Exp(matrix[i][j] - max_val)
         sum_exp += exp_values[j]
      NEXT

      FOR j := 1 TO cols
         result[i][j] := exp_values[j] / sum_exp
      NEXT
   NEXT

RETURN result

// Función para actualizar los pesos del Transformer
FUNCTION ActualizarPesos(transformer, learning_rate)
   local i, layer
   FOR i := 1 TO Len(transformer:layers)
      layer := transformer:layers[i]
      XBrowser( layer:dWQ )
      layer:WQ := hb_MatrixSubstract(layer:WQ, hb_MatrixScale(layer:dWQ, learning_rate))
      layer:WK := hb_MatrixSubstract(layer:WK, hb_MatrixScale(layer:dWK, learning_rate))
      layer:WV := hb_MatrixSubstract(layer:WV, hb_MatrixScale(layer:dWV, learning_rate))
      layer:WO := hb_MatrixSubstract(layer:WO, hb_MatrixScale(layer:dWO, learning_rate))
   NEXT
RETURN nil

FUNCTION hb_ArrayMax(aArray)
   LOCAL nMax := NIL
   LOCAL i

   // Verificar que el parámetro sea un array
   IF ValType(aArray) != "A"
      RETURN NIL
   ENDIF

   // Recorrer el array para encontrar el máximo
   FOR i := 1 TO Len(aArray)
      IF i == 1 .OR. aArray[i] > nMax
         nMax := aArray[i]
      ENDIF
   NEXT

RETURN nMax

#pragma BEGINDUMP

#include <hbapi.h>
#include <hbapiitm.h>
#include <hbapierr.h>
#include <math.h>

HB_FUNC( HB_MATRIXMULTIPLY )
{
   PHB_ITEM pMatrix1 = hb_param( 1, HB_IT_ARRAY ); // Primera matriz
   PHB_ITEM pMatrix2 = hb_param( 2, HB_IT_ARRAY ); // Segunda matriz

   if( pMatrix1 && pMatrix2 )
   {
      // Dimensiones de la primera matriz
      int rows1 = hb_arrayLen( pMatrix1 );
      PHB_ITEM pRow1, pRow2, pResult, pRowResult;
      int i, k, cols1, rows2, cols2;

      if( rows1 == 0 )
      {
         hb_errRT_BASE( EG_ARG, 3012, "First matrix is empty", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         return;
      }
      pRow1 = hb_arrayGetItemPtr( pMatrix1, 1 );
      if( !pRow1 || !HB_IS_ARRAY( pRow1 ) )
      {
         hb_errRT_BASE( EG_ARG, 3012, "First matrix is not valid", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         return;
      }
      cols1 = hb_arrayLen( pRow1 );

      // Dimensiones de la segunda matriz
      rows2 = hb_arrayLen( pMatrix2 );
      if( rows2 == 0 )
      {
         hb_errRT_BASE( EG_ARG, 3012, "Second matrix is empty", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         return;
      }
      pRow2 = hb_arrayGetItemPtr( pMatrix2, 1 );
      if( !pRow2 || !HB_IS_ARRAY( pRow2 ) )
      {
         hb_errRT_BASE( EG_ARG, 3012, "Second matrix is not valid", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         return;
      }
      cols2 = hb_arrayLen( pRow2 );

      // Validar compatibilidad para la multiplicación (cols1 debe ser igual a rows2)
      if( cols1 != rows2 )
      {
         hb_errRT_BASE( EG_ARG, 3012, "Matrix dimensions do not match for multiplication", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         return;
      }

      // Crear la matriz de resultado (rows1 x cols2)
      pResult = hb_itemArrayNew( rows1 );

      for( i = 0; i < rows1; i++ )
      {
         PHB_ITEM pRowResult = hb_itemArrayNew( cols2 );
         hb_arraySet( pResult, i + 1, pRowResult );
         hb_itemRelease( pRowResult ); // Liberar referencia local
      }

      // Realizar la multiplicación de matrices
      for( i = 0; i < rows1; i++ )
      {
         PHB_ITEM pRowA = hb_arrayGetItemPtr( pMatrix1, i + 1 );
         int j;

         for( j = 0; j < cols2; j++ )
         {
            double sum = 0.0;
            for( k = 0; k < cols1; k++ )
            {
               double a = hb_arrayGetND( pRowA, k + 1 );
               PHB_ITEM pRowB = hb_arrayGetItemPtr( pMatrix2, k + 1 );
               double b = hb_arrayGetND( pRowB, j + 1 );
               sum += a * b;
            }
            
            pRowResult = hb_arrayGetItemPtr( pResult, i + 1 );
            hb_arraySetND( pRowResult, j + 1, sum );
         }
      }

      // Devolver la matriz de resultado
      hb_itemReturnRelease( pResult );
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, "Invalid parameters", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXSCALE )
{
   PHB_ITEM pMatrix = hb_param( 1, HB_IT_ARRAY ); // Primer parámetro: matriz a escalar
   double scale = hb_parnd( 2 );                 // Segundo parámetro: escalar

   if( pMatrix )
   {
      HB_SIZE nRows = hb_arrayLen( pMatrix );
      HB_SIZE i, j;
      PHB_ITEM pMatrixResult = hb_itemArrayNew( nRows );

      // Copiar y escalar los datos
      for( i = 0; i < nRows; i++ )
      {
         PHB_ITEM pRow = hb_arrayGetItemPtr( pMatrix, i + 1 );
         HB_SIZE nCols = hb_arrayLen( pRow );

         PHB_ITEM pRowResult = hb_itemArrayNew( nCols );

         for( j = 0; j < nCols; j++ )
         {
            double value = hb_arrayGetND( pRow, j + 1 );
            hb_arraySetND( pRowResult, j + 1, value * scale );
         }

         hb_arraySet( pMatrixResult, i + 1, pRowResult );
         hb_itemRelease( pRowResult );
      }

      hb_itemReturnRelease( pMatrixResult );
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXDIV )
{
   PHB_ITEM pMatrix = hb_param( 1, HB_IT_ARRAY ); // Primer parámetro: matriz a escalar
   double scale = hb_parnd( 2 );                 // Segundo parámetro: escalar

   if( pMatrix )
   {
      HB_SIZE nRows = hb_arrayLen( pMatrix );
      HB_SIZE i, j;
      PHB_ITEM pMatrixResult = hb_itemArrayNew( nRows );

      // Copiar y escalar los datos
      for( i = 0; i < nRows; i++ )
      {
         PHB_ITEM pRow = hb_arrayGetItemPtr( pMatrix, i + 1 );
         HB_SIZE nCols = hb_arrayLen( pRow );

         PHB_ITEM pRowResult = hb_itemArrayNew( nCols );

         for( j = 0; j < nCols; j++ )
         {
            double value = hb_arrayGetND( pRow, j + 1 );
            hb_arraySetND( pRowResult, j + 1, value / scale );
         }

         hb_arraySet( pMatrixResult, i + 1, pRowResult );
         hb_itemRelease( pRowResult );
      }

      hb_itemReturnRelease( pMatrixResult );
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXTRANSPOSE )
{
   PHB_ITEM pMatrix = hb_param( 1, HB_IT_ARRAY ); // Primer parámetro: matriz a transponer

   if( pMatrix )
   {
      HB_SIZE nRows = hb_arrayLen( pMatrix );
      HB_SIZE nCols = hb_arrayLen( hb_arrayGetItemPtr( pMatrix, 1 ) ); // Número de columnas de la primera fila
      HB_SIZE i, j;

      PHB_ITEM pMatrixResult = hb_itemArrayNew( nCols ); // Crear matriz transpuesta (nCols x nRows)

      // Inicializar las filas de la matriz transpuesta
      for( i = 0; i < nCols; i++ )
      {
         hb_arraySet( pMatrixResult, i + 1, hb_itemArrayNew( nRows ) );
      }

      // Rellenar la matriz transpuesta
      for( i = 0; i < nRows; i++ )
      {
         PHB_ITEM pRow = hb_arrayGetItemPtr( pMatrix, i + 1 );
         for( j = 0; j < nCols; j++ )
         {
            double value = hb_arrayGetND( pRow, j + 1 ); // Obtener el valor original
            PHB_ITEM pTransposedRow = hb_arrayGetItemPtr( pMatrixResult, j + 1 );
            hb_arraySetND( pTransposedRow, i + 1, value ); // Asignar a la posición transpuesta
         }
      }

      hb_itemReturnRelease( pMatrixResult ); // Devolver la matriz transpuesta
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXZERO )
{
   HB_SIZE nRows = hb_parns( 1 ); // Número de filas
   HB_SIZE nCols = hb_parns( 2 ); // Número de columnas

   if( nRows > 0 && nCols > 0 )
   {
      HB_SIZE i, j;

      PHB_ITEM pMatrix = hb_itemArrayNew( nRows ); // Crear la matriz de nRows filas

      // Inicializar la matriz con ceros
      for( i = 0; i < nRows; i++ )
      {
         PHB_ITEM pRow = hb_itemArrayNew( nCols ); // Crear una fila con nCols columnas
         for( j = 0; j < nCols; j++ )
         {
            hb_arraySetND( pRow, j + 1, 0.0 ); // Establecer cada elemento a 0.0
         }
         hb_arraySet( pMatrix, i + 1, pRow ); // Añadir la fila a la matriz
         hb_itemRelease( pRow ); // Liberar la fila temporal
      }

      hb_itemReturnRelease( pMatrix ); // Devolver la matriz completa
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXRANDOM )
{
   HB_SIZE nRows = hb_parns( 1 ); // Número de filas
   HB_SIZE nCols = hb_parns( 2 ); // Número de columnas

   if( nRows > 0 && nCols > 0 )
   {
      HB_SIZE i, j;

      PHB_ITEM pMatrix = hb_itemArrayNew( nRows ); // Crear la matriz de nRows filas

      // Inicializar la matriz con valores aleatorios
      for( i = 0; i < nRows; i++ )
      {
         PHB_ITEM pRow = hb_itemArrayNew( nCols ); // Crear una fila con nCols columnas
         for( j = 0; j < nCols; j++ )
         {
            double randomValue = (double)rand() / RAND_MAX; // Valor aleatorio entre 0.0 y 1.0
            hb_arraySetND( pRow, j + 1, randomValue );
         }
         hb_arraySet( pMatrix, i + 1, pRow ); // Añadir la fila a la matriz
         hb_itemRelease( pRow ); // Liberar la fila temporal
      }

      hb_itemReturnRelease( pMatrix ); // Devolver la matriz completa
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_SOFTMAX )
{
   PHB_ITEM pValues = hb_param( 1, HB_IT_ARRAY ); // Primer parámetro: array multidimensional de valores

   if( pValues )
   {
      int nRows = hb_arrayLen( pValues ); // Número de filas
      if( nRows > 0 )
      {
         // Asumimos que las filas tienen la misma longitud
         PHB_ITEM pFirstRow = hb_arrayGetItemPtr( pValues, 1 );
         int nCols = hb_arrayLen( pFirstRow ); // Número de columnas (basado en la primera fila)

         PHB_ITEM pResult = hb_itemArrayNew( nRows ); // Array para almacenar los resultados
         int i, j;

         // Recorrer cada fila
         for( i = 0; i < nRows; i++ )
         {
            PHB_ITEM pRow = hb_arrayGetItemPtr( pValues, i + 1 );
            PHB_ITEM pRowResult = hb_itemArrayNew( nCols ); // Fila de resultados para Softmax

            double* expValues = (double*) hb_xgrab( nCols * sizeof(double) );
            double sumExp = 0.0;

            // Calcular e^x para cada elemento de la fila y la suma total
            for( j = 0; j < nCols; j++ )
            {
               double value = hb_arrayGetND( pRow, j + 1 );
               expValues[j] = pow( M_E, value );
               sumExp += expValues[j];
            }

            // Calcular Softmax para la fila dividiendo cada e^x por la suma total
            for( j = 0; j < nCols; j++ )
            {
               double softmaxValue = expValues[j] / sumExp;
               hb_arraySetND( pRowResult, j + 1, softmaxValue );
            }

            hb_xfree( expValues ); // Liberar memoria para los exponentes

            // Guardar la fila de resultados en la matriz resultante
            hb_arraySet( pResult, i + 1, pRowResult );
            hb_itemRelease( pRowResult ); // Liberar la fila de resultados
         }

         hb_itemReturnRelease( pResult ); // Devolver la matriz de resultados
      }
      else
      {
         hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
      }
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXSUBSTRACT )
{
   PHB_ITEM pMatrix1 = hb_param( 1, HB_IT_ARRAY ); // Primera matriz
   PHB_ITEM pMatrix2 = hb_param( 2, HB_IT_ARRAY ); // Segunda matriz

   if( pMatrix1 && pMatrix2 )
   {
      HB_SIZE nRows1 = hb_arrayLen( pMatrix1 );
      HB_SIZE nRows2 = hb_arrayLen( pMatrix2 );

      if( nRows1 == nRows2 && nRows1 > 0 )
      {
         HB_SIZE nCols1 = hb_arrayLen( hb_arrayGetItemPtr( pMatrix1, 1 ) );
         HB_SIZE nCols2 = hb_arrayLen( hb_arrayGetItemPtr( pMatrix2, 1 ) );

         if( nCols1 == nCols2 && nCols1 > 0 )
         {
            HB_SIZE i, j;

            // Crear la matriz de resultado
            PHB_ITEM pMatrixResult = hb_itemArrayNew( nRows1 );

            // Realizar la resta elemento a elemento
            for( i = 0; i < nRows1; i++ )
            {
               PHB_ITEM pRow1 = hb_arrayGetItemPtr( pMatrix1, i + 1 );
               PHB_ITEM pRow2 = hb_arrayGetItemPtr( pMatrix2, i + 1 );

               PHB_ITEM pRowResult = hb_itemArrayNew( nCols1 );

               for( j = 0; j < nCols1; j++ )
               {
                  double value1 = hb_arrayGetND( pRow1, j + 1 );
                  double value2 = hb_arrayGetND( pRow2, j + 1 );
                  hb_arraySetND( pRowResult, j + 1, value1 - value2 ); // Resta
               }

               hb_arraySet( pMatrixResult, i + 1, pRowResult ); // Añadir la fila al resultado
               hb_itemRelease( pRowResult ); // Liberar la fila temporal
            }

            hb_itemReturnRelease( pMatrixResult ); // Devolver la matriz resultado
         }
         else
         {
            // Error: Las columnas no coinciden
            hb_errRT_BASE( EG_ARG, 3012, "Column dimensions do not match", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         }
      }
      else
      {
         // Error: Las filas no coinciden
         hb_errRT_BASE( EG_ARG, 3012, "Row dimensions do not match", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
      }
   }
   else
   {
      // Error: Argumentos inválidos
      hb_errRT_BASE( EG_ARG, 3012, "Invalid parameters", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXSUM )
{
   PHB_ITEM pMatrix = hb_param( 1, HB_IT_ARRAY ); // Matriz de entrada

   if( pMatrix )
   {
      int nRows = hb_arrayLen( pMatrix ); // Número de filas

      if( nRows > 0 )
      {
         double sum = 0.0;
         int i;

         for( i = 0; i < nRows; i++ )
         {
            PHB_ITEM pRow = hb_arrayGetItemPtr( pMatrix, i + 1 );
            int nCols = hb_arrayLen( pRow ); // Número de columnas
            int j;

            for( j = 0; j < nCols; j++ )
            {
               sum += hb_arrayGetND( pRow, j + 1 ); // Sumar el elemento actual
            }
         }

         hb_retnd( sum ); // Devolver la suma como resultado
      }
      else
      {
         // Error: Matriz vacía
         hb_errRT_BASE( EG_ARG, 3012, "Empty matrix", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
      }
   }
   else
   {
      // Error: Argumentos inválidos
      hb_errRT_BASE( EG_ARG, 3012, "Invalid parameter", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

#pragma ENDDUMP