#include "FiveWin.ch"
function Main()
local aInput := { { 1, 1, 1, 1, 1 },;
{ 1, 0, 0, 1, -1 },;
{ 1, 1, 1, 1, 2 },;
{ 0, 1, 0, 0, 1 } }
local aAWM := { { 1, 1, 0, 1, 0 },;
{ 1, 0, 1, 1, 0 },;
{ 0, 0, 1, 1, 0 },;
{ 0, 1, 0, 0, 1 },;
{ 0, 1, 1, 0, 1 } }
local aAWF := MatMul( aInput, aAWM )
local aWB := { { 1, 0, 0, 1, 3 },;
{ 0, 1, 1, 1, 4 },;
{ 1, 1, -1, 1, -1 } }
local aWB2 := { { 1, 0, 1, -1 },;
{ -1, 0, 1, 7 },;
{ 1, -1, 0, 2 },;
{ 0, 1, -1, -1 } }
local aWBxaAWF, aReLU, aWB2xaReLU
AAdd( aAWF, { 1, 1, 1, 1, 1 } )
aWBxaAWF = MatMul( aWB, aAWF )
aReLU = ReLU( aWBxaAWF )
AAdd( aReLU, { 1, 1, 1, 1, 1 } )
aWB2xaReLU = MatMul( aWB2, aReLU )
? hb_ValToExp( ReLU( aWB2xaReLU ) )
return nil
#pragma BEGINDUMP
#include <hbapi.h>
#include <hbapiitm.h>
#include <hbapierr.h>
#define UNROLL_FACTOR 4
HB_FUNC( MATMUL )
{
PHB_ITEM pMatrix1 = hb_param( 1, HB_IT_ARRAY );
PHB_ITEM pMatrix2 = hb_param( 2, HB_IT_ARRAY );
if( pMatrix1 && pMatrix2 )
{
HB_SIZE nRowsA = hb_arrayLen( pMatrix1 );
HB_SIZE nColsA = hb_arrayLen( hb_arrayGetItemPtr( pMatrix1, 1 ) );
HB_SIZE nColsB = hb_arrayLen( hb_arrayGetItemPtr( pMatrix2, 1 ) );
PHB_ITEM pMatrixResult = hb_itemArrayNew( nRowsA );
HB_SIZE i, j, k;
// Allocate contiguous memory for matrices
double* pDataA = (double*)hb_xgrab(nRowsA * nColsA * sizeof(double));
double* pDataB = (double*)hb_xgrab(nColsA * nColsB * sizeof(double));
double* pDataResult = (double*)hb_xgrab(nRowsA * nColsB * sizeof(double));
// Copy data to contiguous memory
for( i = 0; i < nRowsA; i++ )
{
PHB_ITEM pRowA = hb_arrayGetItemPtr( pMatrix1, i+1 );
for( j = 0; j < nColsA; j++ )
{
pDataA[i*nColsA + j] = hb_arrayGetND( pRowA, j+1 );
}
}
for( i = 0; i < nColsA; i++ )
{
PHB_ITEM pRowB = hb_arrayGetItemPtr( pMatrix2, i+1 );
for( j = 0; j < nColsB; j++ )
{
pDataB[i*nColsB + j] = hb_arrayGetND( pRowB, j+1 );
}
}
// Perform matrix multiplication
for( i = 0; i < nRowsA; i++ )
{
for( j = 0; j < nColsB; j++ )
{
double sum = 0.0;
for( k = 0; k < nColsA - (UNROLL_FACTOR-1); k += UNROLL_FACTOR )
{
sum += pDataA[i*nColsA + k] * pDataB[k*nColsB + j]
+ pDataA[i*nColsA + k+1] * pDataB[(k+1)*nColsB + j]
+ pDataA[i*nColsA + k+2] * pDataB[(k+2)*nColsB + j]
+ pDataA[i*nColsA + k+3] * pDataB[(k+3)*nColsB + j];
}
// Handle remaining elements
for( ; k < nColsA; k++ )
{
sum += pDataA[i*nColsA + k] * pDataB[k*nColsB + j];
}
pDataResult[i*nColsB + j] = sum;
}
}
// Copy result back to Harbour array
for( i = 0; i < nRowsA; i++ )
{
PHB_ITEM pRow = hb_itemArrayNew( nColsB );
for( j = 0; j < nColsB; j++ )
{
hb_arraySetND( pRow, j+1, pDataResult[i*nColsB + j] );
}
hb_arraySet( pMatrixResult, i+1, pRow );
hb_itemRelease( pRow );
}
hb_xfree(pDataA);
hb_xfree(pDataB);
hb_xfree(pDataResult);
hb_itemReturnRelease( pMatrixResult );
}
else
{
hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
}
}
HB_FUNC( RELU )
{
PHB_ITEM pMatrix = hb_param( 1, HB_IT_ARRAY );
if( pMatrix )
{
HB_SIZE nRows = hb_arrayLen( pMatrix );
HB_SIZE nCols = hb_arrayLen( hb_arrayGetItemPtr( pMatrix, 1 ) );
PHB_ITEM pOutput = hb_itemArrayNew( nRows );
HB_SIZE i, j;
double* pData = (double*)hb_xgrab(nRows * nCols * sizeof(double));
// Copy data to contiguous memory
for( i = 0; i < nRows; i++ )
{
PHB_ITEM pRow = hb_arrayGetItemPtr( pMatrix, i+1 );
for( j = 0; j < nCols; j++ )
{
pData[i*nCols + j] = hb_arrayGetND( pRow, j+1 );
}
}
// Perform ReLU operation
for( i = 0; i < nRows * nCols; i += UNROLL_FACTOR )
{
pData[i] = (pData[i] > 0.0) ? pData[i] : 0.0;
pData[i+1] = (pData[i+1] > 0.0) ? pData[i+1] : 0.0;
pData[i+2] = (pData[i+2] > 0.0) ? pData[i+2] : 0.0;
pData[i+3] = (pData[i+3] > 0.0) ? pData[i+3] : 0.0;
}
// Handle remaining elements
for( ; i < nRows * nCols; i++ )
{
pData[i] = (pData[i] > 0.0) ? pData[i] : 0.0;
}
// Copy result back to Harbour array
for( i = 0; i < nRows; i++ )
{
PHB_ITEM pRow = hb_itemArrayNew( nCols );
for( j = 0; j < nCols; j++ )
{
hb_arraySetND( pRow, j+1, pData[i*nCols + j] );
}
hb_arraySet( pOutput, i+1, pRow );
hb_itemRelease( pRow );
}
hb_xfree(pData);
hb_itemReturnRelease( pOutput );
}
else
{
hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
}
}
#pragma ENDDUMP