function Main()
local hFile := FOpen( "GoogleNews-vectors-negative300.bin", "r" )
local cBuffer := Chr( 0 ), cWord := "", n := 0
local cEmbeddings := Space( 1200 ), nBytesRead
local cVectors1, cVectors2, cVectors3
SET DECIMALS TO 10
DbCreate( "embeddings.dbf", { { "WORD", "C", 50, 0 },;
{ "VECTORS", "C", 1200, 0 } } )
USE embeddings
INDEX ON field->Word TO "words"
SET INDEX TO words
while cBuffer != Chr( 10 )
FRead( hFile, @cBuffer, 1 )
?? cBuffer
end
APPEND BLANK
while cBuffer != Chr( 32 )
FRead( hFile, @cBuffer, 1 )
?? cBuffer
cWord += cBuffer
end
field->Word := cWord
while n++ < 10000
nBytesRead := FRead( hFile, @cEmbeddings, 1200 )
if nBytesRead < 1200
? "End of file"
exit
else
field->Vectors := cEmbeddings
endif
APPEND BLANK
cBuffer = Chr( 0 )
cWord = ""
? ""
while cBuffer != Chr( 32 )
nBytesRead := FRead( hFile, @cBuffer, 1 )
if nBytesRead == 0
? "End of file"
exit
endif
cWord += cBuffer
end
field->Word := cWord
end
GO TOP
SEEK "man"
? field->word
cVectors1 = field->vectors
SEEK "woman"
? field->word
cVectors2 = field->vectors
? CosineSim( cVectors1, cVectors2 )
SEEK "child"
? field->word
cVectors3 = field->vectors
? CosineSim( cVectors1, cVectors3 )
// DbEval( { || If( Empty( field->Vectors ), Alert( "empty" ),) } )
FClose( hFile )
USE
return nil
#pragma BEGINDUMP
#include <math.h>
#include <hbapi.h>
struct st_ieee
{
unsigned int uiMantissa:23;
unsigned int uiExponent:8;
unsigned int uiSign:1;
};
float ULToFloat( unsigned long ulValue )
{
float fValue;
struct st_ieee stValue;
* ( unsigned long * ) &stValue = ulValue;
* ( unsigned long * ) &fValue = * ( unsigned long * ) &stValue;
return fValue;
}
HB_FUNC( BIN2D )
{
unsigned long vector;
memcpy( &vector, hb_parc( 1 ), 4 );
hb_retnd( ULToFloat( vector ) );
}
float cosine_similarity( unsigned long * vec1, unsigned long * vec2, int size )
{
float dot_product = 0.0, norm_a = 0.0, norm_b = 0.0;
for( int i = 0; i < size; i++ )
{
float fl1, fl2;
dot_product += ( fl1 = ULToFloat( vec1[ i ] ) ) * ( fl2 = ULToFloat( vec2[ i ] ) );
norm_a += fl1 * fl1;
norm_b += fl2 * fl2;
}
return ( float ) ( dot_product / ( sqrt( norm_a ) * sqrt( norm_b ) ) );
}
HB_FUNC( COSINESIM )
{
hb_retnd( cosine_similarity( ( unsigned long * ) hb_parc( 1 ), ( unsigned long * ) hb_parc( 2 ), 300 ) );
}
#pragma ENDDUMP