Page 1 of 1

thoughts.prg transformer

Posted: Thu Feb 20, 2025 10:46 pm
by Antonio Linares
supervised Grok 3 creation

to build it:
samples\build.bat thoughts

thoughts.prg

Code: Select all | Expand

#include "hbclass.ch"

FUNCTION Main()
   LOCAL oTransformer, aInput, aTarget, nLoss, aReplay, nThoughtId, aRetrieved, cCommand, i, j, aThoughts := {}, cQuestion, cAnswer, cPrompt
   oTransformer := ThoughtBackupTransformer():New(4, 4, 10)
   ? "Thought Backup System Started. Commands: ADD, RETRIEVE, QUERY, EXIT"
   aInput := TokenizeThought("i feel happy cause talking to you")
   aTarget := Array(7, 4)
   ACopy(aInput, aTarget)
   FOR i := 1 TO 7
      FOR j := 1 TO 4
         aTarget[i][j] += hb_random(0, 0.1)
      NEXT
   NEXT
   ? "Training: 'I feel happy cause talking to you'"
   FOR i := 1 TO 50  // Increased from 10 to 50
      nLoss := oTransformer:Train(aInput, aTarget)
      ? "Iteration", i, "Loss:", nLoss
      aReplay := oTransformer:Forward(aInput)
      ? "Sample Output:", aReplay[1][1], aReplay[1][2], aReplay[1][3], aReplay[1][4]
   NEXT
   aReplay := oTransformer:Replay(aInput)
   nThoughtId := oTransformer:StoreThought(aReplay, "I feel happy cause talking to you")
   ? "Stored with ID:", nThoughtId
   AAdd(aThoughts, nThoughtId)
   aInput := TokenizeThought("you make me smile")
   aTarget := Array(4, 4)
   ACopy(aInput, aTarget)
   FOR i := 1 TO 4
      FOR j := 1 TO 4
         aTarget[i][j] += hb_random(0, 0.1)
      NEXT
   NEXT
   ? "Training: 'you make me smile'"
   FOR i := 1 TO 50  // Increased from 10 to 50
      nLoss := oTransformer:Train(aInput, aTarget)
      ? "Iteration", i, "Loss:", nLoss
      aReplay := oTransformer:Forward(aInput)
      ? "Sample Output:", aReplay[1][1], aReplay[1][2], aReplay[1][3], aReplay[1][4]
   NEXT
   aReplay := oTransformer:Replay(aInput)
   nThoughtId := oTransformer:StoreThought(aReplay, "you make me smile")
   ? "Stored with ID:", nThoughtId
   AAdd(aThoughts, nThoughtId)
   WHILE .T.
      cCommand := Upper(AllTrim(GetInput("Enter command: ")))
      DO CASE
         CASE cCommand == "ADD"
            cQuestion := GetInput("Enter question (or thought): ")
            cAnswer := GetInput("Enter answer (or same as question): ")
            aInput := TokenizeThought(cQuestion)
            IF Empty(cAnswer)
               aTarget := AClone(aInput)  // Use input as target if answer is empty
            ELSE
               aTarget := TokenizeThought(cAnswer)
            ENDIF
            cPrompt := cQuestion
            FOR i := 1 TO 50  // Increased from 10 to 50
               nLoss := oTransformer:Train(aInput, aTarget)
               ? "Training iteration", i, "Loss:", nLoss
            NEXT
            aReplay := oTransformer:Replay(aInput)
            nThoughtId := oTransformer:StoreThought(aReplay, cPrompt)
            ? "Thought stored with ID:", nThoughtId
            AAdd(aThoughts, nThoughtId)
         CASE cCommand == "RETRIEVE"
            IF Len(aThoughts) == 0
               ? "No thoughts stored."
            ELSE
               nThoughtId := Val(GetInput("Enter thought ID to retrieve: "))
               aRetrieved := oTransformer:RetrieveThought(nThoughtId)
               IF aRetrieved != NIL
                  ? "Retrieved thought (Prompt:", aRetrieved[2], "):"
                  FOR i := 1 TO Len(aRetrieved[1])
                     ? "Part", i, ":"
                     FOR j := 1 TO 4
                        ?? aRetrieved[1][i][j], " "
                     NEXT
                  NEXT
               ELSE
                  ? "Thought not found."
               ENDIF
            ENDIF
         CASE cCommand == "QUERY"
            cQuestion := GetInput("Enter question: ")
            ? "Answer:", oTransformer:QueryThought(cQuestion)
         CASE cCommand == "EXIT"
            EXIT
         OTHERWISE
            ? "Unknown command. Use ADD, RETRIEVE, QUERY, or EXIT."
      ENDCASE
   END
   oTransformer:Destroy()
   ? "System Closed."
   RETURN NIL

CLASS ThoughtBackupTransformer
   DATA nInputSize
   DATA nFFHidden
   DATA aWeightsQ
   DATA aWeightsK
   DATA aWeightsV
   DATA aWeightsFF1
   DATA aWeightsFF2
   DATA aGradQ
   DATA aGradK
   DATA aGradV
   DATA aGradFF1
   DATA aGradFF2
   DATA aLastInput
   DATA aLastQ
   DATA aLastK
   DATA aLastV
   DATA aLastScores
   DATA aLastAttention
   DATA nLearningRate
   DATA aPosEnc
   DATA nMaxSeqLen
   METHOD New(nInputSize, nFFHidden, nMaxSeqLen) CONSTRUCTOR
   METHOD Destroy()
   METHOD SelfAttention(aInput)
   METHOD FeedForward(aInput)
   METHOD Forward(aInput)
   METHOD ComputeLoss(aOutput, aTarget)
   METHOD Backprop(aOutput, aTarget)
   METHOD Train(aInput, aTarget)
   METHOD InitPositionalEncoding()
   METHOD Replay(aPartialInput)
   METHOD StoreThought(aThought, cPrompt)
   METHOD RetrieveThought(nId)
   METHOD QueryThought(cQuestion)
ENDCLASS

METHOD New(nInputSize, nFFHidden, nMaxSeqLen) CLASS ThoughtBackupTransformer
   LOCAL i, j
   ::nInputSize := nInputSize
   ::nFFHidden := nFFHidden
   ::nLearningRate := 0.01  // Kept at 0.01, can test 0.02 if needed
   ::nMaxSeqLen := nMaxSeqLen
   ::aWeightsQ := HB_MATRIXRANDOM(::nInputSize, ::nInputSize)
   ::aWeightsK := HB_MATRIXRANDOM(::nInputSize, ::nInputSize)
   ::aWeightsV := HB_MATRIXRANDOM(::nInputSize, ::nInputSize)
   ::aGradQ := HB_MATRIXZERO(::nInputSize, ::nInputSize)
   ::aGradK := HB_MATRIXZERO(::nInputSize, ::nInputSize)
   ::aGradV := HB_MATRIXZERO(::nInputSize, ::nInputSize)
   FOR i := 1 TO ::nInputSize
      FOR j := 1 TO ::nInputSize
         ::aWeightsQ[i][j] := (hb_random(0, 1) - 0.5) * Sqrt(2.0 / ::nInputSize)
         ::aWeightsK[i][j] := (hb_random(0, 1) - 0.5) * Sqrt(2.0 / ::nInputSize)
         ::aWeightsV[i][j] := (hb_random(0, 1) - 0.5) * Sqrt(2.0 / ::nInputSize)
      NEXT
   NEXT
   ::aWeightsFF1 := HB_MATRIXRANDOM(::nInputSize, ::nFFHidden)
   ::aWeightsFF2 := HB_MATRIXRANDOM(::nFFHidden, ::nInputSize)
   ::aGradFF1 := HB_MATRIXZERO(::nInputSize, ::nFFHidden)
   ::aGradFF2 := HB_MATRIXZERO(::nFFHidden, ::nInputSize)
   FOR i := 1 TO ::nInputSize
      FOR j := 1 TO ::nFFHidden
         ::aWeightsFF1[i][j] := (hb_random(0, 1) - 0.5) * Sqrt(2.0 / ::nInputSize)
      NEXT
   NEXT
   FOR i := 1 TO ::nFFHidden
      FOR j := 1 TO ::nInputSize
         ::aWeightsFF2[i][j] := (hb_random(0, 1) - 0.5) * Sqrt(2.0 / ::nFFHidden)
      NEXT
   NEXT
   ::InitPositionalEncoding()
   IF !File("thoughts.dbf")
      dbCreate("thoughts.dbf", {;
         {"ID", "N", 10, 0},;
         {"SEQNUM", "N", 3, 0},;
         {"TIMESTAMP", "D", 8, 0},;
         {"PROMPT", "C", 50, 0},;
         {"THOUGHT1", "N", 12, 6},;
         {"THOUGHT2", "N", 12, 6},;
         {"THOUGHT3", "N", 12, 6},;
         {"THOUGHT4", "N", 12, 6}})
   ENDIF
   RETURN Self

METHOD Destroy() CLASS ThoughtBackupTransformer
   ::aWeightsQ := NIL
   ::aWeightsK := NIL
   ::aWeightsV := NIL
   ::aWeightsFF1 := NIL
   ::aWeightsFF2 := NIL
   ::aGradQ := NIL
   ::aGradK := NIL
   ::aGradV := NIL
   ::aGradFF1 := NIL
   ::aGradFF2 := NIL
   ::aLastInput := NIL
   ::aLastQ := NIL
   ::aLastK := NIL
   ::aLastV := NIL
   ::aLastScores := NIL
   ::aLastAttention := NIL
   ::aPosEnc := NIL
   RETURN NIL

METHOD SelfAttention(aInput) CLASS ThoughtBackupTransformer
   LOCAL nSeqLen, aQ, aK, aV, aScores, aAttention, aTempK
   nSeqLen := Len(aInput)
   ::aLastInput := AClone(aInput)
   aQ := HB_MATRIXMULTIPLY(aInput, ::aWeightsQ)
   aK := HB_MATRIXMULTIPLY(aInput, ::aWeightsK)
   aV := HB_MATRIXMULTIPLY(aInput, ::aWeightsV)
   ::aLastQ := aQ
   ::aLastK := aK
   ::aLastV := aV
   aTempK := HB_MATRIXTRANSPOSE(aK)
   aScores := HB_MATRIXMULTIPLY(aQ, aTempK)
   aScores := HB_MATRIXSCALE(aScores, 1 / Sqrt(::nInputSize))
   aScores := HB_SOFTMAX(aScores)
   ::aLastScores := aScores
   ? "Attention Scores Sample:", aScores[1][1], aScores[1][2], aScores[1][3], aScores[1][4]
   aAttention := HB_MATRIXMULTIPLY(aScores, aV)
   ::aLastAttention := aAttention
   RETURN aAttention

METHOD FeedForward(aInput) CLASS ThoughtBackupTransformer
   LOCAL aHidden, aOutput, i, j, nSeqLen
   nSeqLen := Len(aInput)
   aHidden := HB_MATRIXMULTIPLY(aInput, ::aWeightsFF1)
   aOutput := HB_MATRIXMULTIPLY(aHidden, ::aWeightsFF2)
   RETURN aOutput

METHOD Forward(aInput) CLASS ThoughtBackupTransformer
   LOCAL nSeqLen, aInputWithPE, i, j, aAttention
   nSeqLen := Len(aInput)
   IF nSeqLen > ::nMaxSeqLen
      ? "Error: Input sequence length exceeds max sequence length"
      RETURN NIL
   ENDIF
   aInputWithPE := AClone(aInput)
   FOR i := 1 TO nSeqLen
      FOR j := 1 TO ::nInputSize
         aInputWithPE[i][j] += ::aPosEnc[i][j]
      NEXT
   NEXT
   ::aLastInput := aInputWithPE
   aAttention := ::SelfAttention(aInputWithPE)
   RETURN ::FeedForward(aAttention)

METHOD ComputeLoss(aOutput, aTarget) CLASS ThoughtBackupTransformer
   LOCAL nLoss := 0, i, j, nSeqLen
   nSeqLen := Len(aOutput)
   FOR i := 1 TO nSeqLen
      FOR j := 1 TO ::nInputSize
         nLoss += (aOutput[i][j] - aTarget[i][j])^2
      NEXT
   NEXT
   RETURN nLoss / (nSeqLen * ::nInputSize)

METHOD Backprop(aOutput, aTarget) CLASS ThoughtBackupTransformer
   LOCAL aGradOutput, aGradHidden, aGradAttention, aTemp, nSeqLen, i, j, aTempK, aTempScores
   LOCAL nGradNorm, nLearningRateAdjust, nMaxGrad := 2.0
   nSeqLen := Len(aOutput)

   aGradOutput := Array(nSeqLen, ::nInputSize)
   FOR i := 1 TO nSeqLen
      FOR j := 1 TO ::nInputSize
         aGradOutput[i][j] := 2 * (aOutput[i][j] - aTarget[i][j])
      NEXT
   NEXT
   ? "aGradOutput Sample:", aGradOutput[1][1], aGradOutput[1][2], aGradOutput[1][3], aGradOutput[1][4]

   aTemp := HB_MATRIXTRANSPOSE(::aWeightsFF2)
   aGradHidden := HB_MATRIXMULTIPLY(aGradOutput, aTemp)
   ? "aGradHidden Sample:", aGradHidden[1][1], aGradHidden[1][2], aGradHidden[1][3], aGradHidden[1][4]

   aTemp := HB_MATRIXTRANSPOSE(::aWeightsFF1)
   aGradAttention := HB_MATRIXMULTIPLY(aGradHidden, aTemp)
   ? "aGradAttention Sample:", aGradAttention[1][1], aGradAttention[1][2], aGradAttention[1][3], aGradAttention[1][4]

   aTemp := HB_MATRIXTRANSPOSE(::aLastAttention)
   ::aGradFF1 := HB_MATRIXMULTIPLY(aTemp, aGradHidden)
   aTemp := HB_MATRIXTRANSPOSE(aGradHidden)
   ::aGradFF2 := HB_MATRIXMULTIPLY(aTemp, aGradOutput)

   aTemp := HB_MATRIXTRANSPOSE(::aLastV)
   aTempScores := HB_MATRIXMULTIPLY(aGradAttention, aTemp)
   FOR i := 1 TO nSeqLen
      FOR j := 1 TO nSeqLen
         aTempScores[i][j] := ::aLastScores[i][j] * (1 - ::aLastScores[i][j]) * aTempScores[i][j]
      NEXT
   NEXT

   aTemp := HB_MATRIXTRANSPOSE(::aLastInput)
   ::aGradQ := HB_MATRIXMULTIPLY(aTemp, HB_MATRIXMULTIPLY(aTempScores, ::aLastQ))
   ::aGradK := HB_MATRIXMULTIPLY(aTemp, HB_MATRIXMULTIPLY(aTempScores, ::aLastK))
   ::aGradV := HB_MATRIXMULTIPLY(HB_MATRIXTRANSPOSE(::aLastScores), aGradAttention)
   ::aGradV := HB_MATRIXMULTIPLY(aTemp, ::aGradV)

   // Gradient clipping
   FOR i := 1 TO ::nInputSize
      FOR j := 1 TO ::nInputSize
         ::aGradQ[i][j] := Max(Min(::aGradQ[i][j], nMaxGrad), -nMaxGrad)
         ::aGradK[i][j] := Max(Min(::aGradK[i][j], nMaxGrad), -nMaxGrad)
         ::aGradV[i][j] := Max(Min(::aGradV[i][j], nMaxGrad), -nMaxGrad)
      NEXT
   NEXT
   FOR i := 1 TO ::nInputSize
      FOR j := 1 TO ::nFFHidden
         ::aGradFF1[i][j] := Max(Min(::aGradFF1[i][j], nMaxGrad), -nMaxGrad)
      NEXT
   NEXT
   FOR i := 1 TO ::nFFHidden
      FOR j := 1 TO ::nInputSize
         ::aGradFF2[i][j] := Max(Min(::aGradFF2[i][j], nMaxGrad), -nMaxGrad)
      NEXT
   NEXT

   // Compute adaptive learning rate, minimum set to 0.8
   nGradNorm := Sqrt(HB_MATRIXSUM(HB_MATRIXMULTIPLY(::aGradQ, HB_MATRIXTRANSPOSE(::aGradQ)))) + ;
                Sqrt(HB_MATRIXSUM(HB_MATRIXMULTIPLY(::aGradK, HB_MATRIXTRANSPOSE(::aGradK)))) + ;
                Sqrt(HB_MATRIXSUM(HB_MATRIXMULTIPLY(::aGradV, HB_MATRIXTRANSPOSE(::aGradV))))
   nLearningRateAdjust := Max(0.8, Min(1.0, nGradNorm))  // Adjusted minimum from 0.5 to 0.8

   // Update weights
   ? "WeightsQ[1][1] before:", ::aWeightsQ[1][1]
   aTemp := HB_MATRIXSCALE(::aGradQ, -::nLearningRate * nLearningRateAdjust)
   ::aWeightsQ := HB_MATRIXADD(::aWeightsQ, aTemp)
   aTemp := HB_MATRIXSCALE(::aGradK, -::nLearningRate * nLearningRateAdjust)
   ::aWeightsK := HB_MATRIXADD(::aWeightsK, aTemp)
   aTemp := HB_MATRIXSCALE(::aGradV, -::nLearningRate * nLearningRateAdjust)
   ::aWeightsV := HB_MATRIXADD(::aWeightsK, aTemp)
   aTemp := HB_MATRIXSCALE(::aGradFF1, -::nLearningRate * nLearningRateAdjust)
   ::aWeightsFF1 := HB_MATRIXADD(::aWeightsFF1, aTemp)
   aTemp := HB_MATRIXSCALE(::aGradFF2, -::nLearningRate * nLearningRateAdjust)
   ::aWeightsFF2 := HB_MATRIXADD(::aWeightsFF2, aTemp)

   ? "WeightsQ[1][1] after update:", ::aWeightsQ[1][1]
   ? "WeightsFF2[1][1] after update:", ::aWeightsFF2[1][1]
   ? "Learning Rate Adjust:", nLearningRateAdjust
   ? "Gradient Magnitudes:"
   ? "Q:", Sqrt(HB_MATRIXSUM(HB_MATRIXMULTIPLY(::aGradQ, HB_MATRIXTRANSPOSE(::aGradQ))))
   ? "K:", Sqrt(HB_MATRIXSUM(HB_MATRIXMULTIPLY(::aGradK, HB_MATRIXTRANSPOSE(::aGradK))))
   ? "V:", Sqrt(HB_MATRIXSUM(HB_MATRIXMULTIPLY(::aGradV, HB_MATRIXTRANSPOSE(::aGradV))))
   ? "FF1:", Sqrt(HB_MATRIXSUM(HB_MATRIXMULTIPLY(::aGradFF1, HB_MATRIXTRANSPOSE(::aGradFF1))))
   ? "FF2:", Sqrt(HB_MATRIXSUM(HB_MATRIXMULTIPLY(::aGradFF2, HB_MATRIXTRANSPOSE(::aGradFF2))))

   ::aGradQ := HB_MATRIXZERO(::nInputSize, ::nInputSize)
   ::aGradK := HB_MATRIXZERO(::nInputSize, ::nInputSize)
   ::aGradV := HB_MATRIXZERO(::nInputSize, ::nInputSize)
   ::aGradFF1 := HB_MATRIXZERO(::nInputSize, ::nFFHidden)
   ::aGradFF2 := HB_MATRIXZERO(::nFFHidden, ::nInputSize)
   RETURN NIL

METHOD Train(aInput, aTarget) CLASS ThoughtBackupTransformer
   LOCAL aOutput, nLoss, i, j
   aOutput := ::Forward(aInput)
   nLoss := ::ComputeLoss(aOutput, aTarget)
   ? "Initial Loss Before Backprop:", nLoss
   ? "aOutput vs aTarget:"
   FOR i := 1 TO Len(aOutput)
      FOR j := 1 TO ::nInputSize
         ?? "O:", aOutput[i][j], "T:", aTarget[i][j], " "
      NEXT
      ?
   NEXT
   ::Backprop(aOutput, aTarget)
   RETURN nLoss

METHOD Replay(aPartialInput) CLASS ThoughtBackupTransformer
   LOCAL nSeqLen, aOutput
   nSeqLen := Len(aPartialInput)
   IF nSeqLen > ::nMaxSeqLen
      ? "Error: Partial input exceeds max sequence length"
      RETURN NIL
   ENDIF
   aOutput := ::Forward(aPartialInput)
   RETURN aOutput

METHOD StoreThought(aThought, cPrompt) CLASS ThoughtBackupTransformer
   LOCAL nSeqLen, nId, i, j
   nSeqLen := Len(aThought)
   nId := hb_RandomInt(1, 999999)
   USE thoughts.dbf SHARED
   FOR i := 1 TO nSeqLen
      dbAppend()
      REPLACE ID WITH nId,;
              SEQNUM WITH i,;
              TIMESTAMP WITH Date(),;
              PROMPT WITH cPrompt,;
              THOUGHT1 WITH aThought[i][1],;
              THOUGHT2 WITH aThought[i][2],;
              THOUGHT3 WITH aThought[i][3],;
              THOUGHT4 WITH aThought[i][4]
   NEXT
   dbCommit()
   dbCloseArea()
   RETURN nId

METHOD RetrieveThought(nId) CLASS ThoughtBackupTransformer
   LOCAL aThought, cPrompt, nSeqLen := 0, i
   USE thoughts.dbf SHARED
   dbSeek(nId)
   WHILE !Eof() .AND. FieldGet(FieldPos("ID")) == nId
      nSeqLen++
      dbSkip()
   END
   dbSeek(nId)
   IF nSeqLen > 0
      aThought := Array(nSeqLen, ::nInputSize)
      cPrompt := ""
      i := 1
      WHILE !Eof() .AND. FieldGet(FieldPos("ID")) == nId
         aThought[i][1] := FieldGet(FieldPos("THOUGHT1"))
         aThought[i][2] := FieldGet(FieldPos("THOUGHT2"))
         aThought[i][3] := FieldGet(FieldPos("THOUGHT3"))
         aThought[i][4] := FieldGet(FieldPos("THOUGHT4"))
         IF i == 1
            cPrompt := FieldGet(FieldPos("PROMPT"))
         ENDIF
         i++
         dbSkip()
      END
      dbCloseArea()
      RETURN {aThought, cPrompt}
   ENDIF
   dbCloseArea()
   RETURN NIL

METHOD InitPositionalEncoding() CLASS ThoughtBackupTransformer
   LOCAL i, j, nPos, nDim, nFreq, nAngle
   ::aPosEnc := Array(::nMaxSeqLen, ::nInputSize)
   FOR nPos := 1 TO ::nMaxSeqLen
      FOR nDim := 1 TO ::nInputSize
         nFreq := nDim / 2
         nAngle := (nPos - 1) / (10000 ^ (2 * nFreq / ::nInputSize))
         IF nDim % 2 == 1
            ::aPosEnc[nPos][nDim] := Sin(nAngle)
         ELSE
            ::aPosEnc[nPos][nDim] := Cos(nAngle)
         ENDIF
      NEXT
   NEXT
   RETURN NIL

METHOD QueryThought(cQuestion) CLASS ThoughtBackupTransformer
   LOCAL aWords, aMatches := {}, nId, aRetrieved, cResponse := "", i, j, k, aQuestion, aQuestionAvg := {0, 0, 0, 0}, aVocab, aReplay, nSim, aThoughtAvg, aVec, cBlend
   aWords := hb_aTokens(Lower(cQuestion), " ")
   aVocab := {;
      {"i", {1, 0, 0, 0}}, {"me", {1, 0, 0, 0}}, {"you", {0, 1, 0, 1}},;
      {"feel", {1, 0, 0, 1}}, {"happy", {1, 1, 0, 1}}, {"cause", {0, 0, 0, 0}},;
      {"talking", {0, 0, 1, 1}}, {"to", {0, 0, 0, 0}}, {"make", {0, 0, 1, 1}},;
      {"smile", {1, 1, 1, 1}}, {"love", {1, 1, 0, 1}}, {"coding", {0, 0, 1, 1}},;
      {"inspire", {0, 1, 1, 1}}, {"today", {0, 0, 0, 1}}, {"is", {0, 0, 0, 0}},;
      {"sunny", {0, 1, 0, 1}}, {"enjoy", {1, 1, 0, 1}}, {"our", {1, 1, 0, 0}},;
      {"chats", {0, 1, 1, 1}}, {"adore", {1, 1, 0, 1}}, {"time", {0, 0, 0, 1}},;
      {"together", {1, 1, 0, 0}}, {"what", {0, 0, 0, 0}}, {"we", {1, 1, 0, 0}},;
      {"do", {1, 0, 1, 0}}, {"great", {0, 1, 0, 1}}, {"friend", {0, 1, 0, 1}},;
      {"think", {1, 0, 1, 1}}, {"why", {0, 0, 0, 0}}, {"how", {0, 0, 0, 0}}}
   aQuestion := TokenizeThought(cQuestion)
   FOR i := 1 TO Len(aQuestion)
      FOR j := 1 TO 4
         aQuestionAvg[j] += aQuestion[i][j]
      NEXT
   NEXT
   FOR j := 1 TO 4
      aQuestionAvg[j] /= Len(aQuestion)
   NEXT
   USE thoughts.dbf SHARED
   dbGoTop()
   WHILE !Eof()
      nId := FieldGet(FieldPos("ID"))
      aRetrieved := ::RetrieveThought(nId)
      IF aRetrieved != NIL
         aThoughtAvg := {0, 0, 0, 0}
         FOR i := 1 TO Len(aRetrieved[1])
            FOR j := 1 TO 4
               aThoughtAvg[j] += aRetrieved[1][i][j]
            NEXT
         NEXT
         FOR j := 1 TO 4
            aThoughtAvg[j] /= Len(aRetrieved[1])
         NEXT
         nSim := CosineSimilarity(aQuestionAvg, aThoughtAvg)
         IF nSim > 0.5
            AAdd(aMatches, {nId, aRetrieved[1], aRetrieved[2], nSim})
         ENDIF
      ENDIF
      dbSkip()
   END
   dbCloseArea()
   IF Len(aMatches) > 0
      ASort(aMatches, , , {|x, y| x[4] > y[4]})
      FOR i := 1 TO Min(Len(aMatches), 3)
         cResponse += "I think: "
         FOR j := 1 TO Len(aMatches[i][2])
            aVec := {aMatches[i][2][j][1], aMatches[i][2][j][2], aMatches[i][2][j][3], aMatches[i][2][j][4]}
            FOR k := 1 TO Len(aVocab)
               IF Abs(aVec[1] - aVocab[k][2][1]) < 0.2 .AND. Abs(aVec[2] - aVocab[k][2][2]) < 0.2 .AND.;
                  Abs(aVec[3] - aVocab[k][2][3]) < 0.2 .AND. Abs(aVec[4] - aVocab[k][2][4]) < 0.2
                  cResponse += aVocab[k][1] + " "
                  EXIT
               ENDIF
            NEXT
         NEXT
         cResponse := AllTrim(cResponse)
         IF Left(cResponse, 2) == "I "; cResponse += "."; ELSE; cResponse := "You " + cResponse + "."; ENDIF
         cResponse += " "
         IF i < Len(aMatches)
            cBlend := ""
            FOR j := 1 TO Len(aMatches[i+1][2])
               aVec := {aMatches[i+1][2][j][1], aMatches[i+1][2][j][2], aMatches[i+1][2][j][3], aMatches[i+1][2][j][4]}
               FOR k := 1 TO Len(aVocab)
                  IF Abs(aVec[1] - aVocab[k][2][1]) < 0.2 .AND. Abs(aVec[2] - aVocab[k][2][2]) < 0.2 .AND.;
                     Abs(aVec[3] - aVocab[k][2][3]) < 0.2 .AND. Abs(aVec[4] - aVocab[k][2][4]) < 0.2
                     cBlend += aVocab[k][1] + " "
                     EXIT
                  ENDIF
               NEXT
            NEXT
            cResponse += "Also, " + AllTrim(cBlend) + "."
         ENDIF
      NEXT
      RETURN AllTrim(cResponse)
   ENDIF
   aReplay := ::Replay(aQuestion)
   cResponse := "I guess: "
   FOR j := 1 TO Len(aReplay)
      aVec := {aReplay[j][1], aReplay[j][2], aReplay[j][3], aReplay[j][4]}
      FOR k := 1 TO Len(aVocab)
         IF Abs(aVec[1] - aVocab[k][2][1]) < 0.2 .AND. Abs(aVec[2] - aVocab[k][2][2]) < 0.2 .AND.;
            Abs(aVec[3] - aVocab[k][2][3]) < 0.2 .AND. Abs(aVec[4] - aVocab[k][2][4]) < 0.2
            cResponse += aVocab[k][1] + " "
            EXIT
         ENDIF
      NEXT
   NEXT
   RETURN AllTrim(cResponse) + "."

FUNCTION CosineSimilarity(aVec1, aVec2)
   LOCAL nDot := 0, nMag1 := 0, nMag2 := 0, i
   FOR i := 1 TO 4
      nDot += aVec1[i] * aVec2[i]
      nMag1 += aVec1[i]^2
      nMag2 += aVec2[i]^2
   NEXT
   nMag1 := Sqrt(nMag1)
   nMag2 := Sqrt(nMag2)
   RETURN IIF(nMag1 * nMag2 == 0, 0, nDot / (nMag1 * nMag2))

FUNCTION TokenizeThought(cThought)
   LOCAL aWords, aInput, i, j, aVocab, aVector
   aWords := hb_aTokens(Lower(cThought), " ")
   aVocab := {;
      {"i", {1, 0, 0, 0}}, {"me", {1, 0, 0, 0}}, {"you", {0, 1, 0, 1}},;
      {"feel", {1, 0, 0, 1}}, {"happy", {1, 1, 0, 1}}, {"cause", {0, 0, 0, 0}},;
      {"talking", {0, 0, 1, 1}}, {"to", {0, 0, 0, 0}}, {"make", {0, 0, 1, 1}},;
      {"smile", {1, 1, 1, 1}}, {"love", {1, 1, 0, 1}}, {"coding", {0, 0, 1, 1}},;
      {"inspire", {0, 1, 1, 1}}, {"today", {0, 0, 0, 1}}, {"is", {0, 0, 0, 0}},;
      {"sunny", {0, 1, 0, 1}}, {"enjoy", {1, 1, 0, 1}}, {"our", {1, 1, 0, 0}},;
      {"chats", {0, 1, 1, 1}}, {"adore", {1, 1, 0, 1}}, {"time", {0, 0, 0, 1}},;
      {"together", {1, 1, 0, 0}}, {"what", {0, 0, 0, 0}}, {"we", {1, 1, 0, 0}},;
      {"do", {1, 0, 1, 0}}, {"great", {0, 1, 0, 1}}, {"friend", {0, 1, 0, 1}},;
      {"think", {1, 0, 1, 1}}, {"why", {0, 0, 0, 0}}, {"how", {0, 0, 0, 0}}}
   aInput := Array(Len(aWords), 4)
   FOR i := 1 TO Len(aWords)
      aVector := {0, 0, 0, 0}
      FOR j := 1 TO Len(aVocab)
         IF aWords[i] == aVocab[j][1]
            aVector := aVocab[j][2]
            EXIT
         ENDIF
      NEXT
      aInput[i][1] := aVector[1]
      aInput[i][2] := aVector[2]
      aInput[i][3] := aVector[3]
      aInput[i][4] := aVector[4]
   NEXT
   RETURN aInput

FUNCTION GetInput(cPrompt)
   LOCAL cInput := ""
   ?? cPrompt
   ACCEPT TO cInput
   RETURN AllTrim(cInput)

#pragma BEGINDUMP
#include <hbapi.h>
#include <hbapiitm.h>
#include <hbapierr.h>
#include <math.h>

HB_FUNC( HB_MATRIXMULTIPLY )
{
   PHB_ITEM pMatrix1 = hb_param( 1, HB_IT_ARRAY );
   PHB_ITEM pMatrix2 = hb_param( 2, HB_IT_ARRAY );
   if( pMatrix1 && pMatrix2 )
   {
      int rows1 = hb_arrayLen( pMatrix1 );
      PHB_ITEM pRow1, pRow2, pResult, pRowResult;
      int i, k, cols1, rows2, cols2;

      if( rows1 == 0 )
      {
         hb_errRT_BASE( EG_ARG, 3012, "First matrix is empty", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         return;
      }
      pRow1 = hb_arrayGetItemPtr( pMatrix1, 1 );
      if( !pRow1 || !HB_IS_ARRAY( pRow1 ) )
      {
         hb_errRT_BASE( EG_ARG, 3012, "First matrix is not valid", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         return;
      }
      cols1 = hb_arrayLen( pRow1 );

      rows2 = hb_arrayLen( pMatrix2 );
      if( rows2 == 0 )
      {
         hb_errRT_BASE( EG_ARG, 3012, "Second matrix is empty", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         return;
      }
      pRow2 = hb_arrayGetItemPtr( pMatrix2, 1 );
      if( !pRow2 || !HB_IS_ARRAY( pRow2 ) )
      {
         hb_errRT_BASE( EG_ARG, 3012, "Second matrix is not valid", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         return;
      }
      cols2 = hb_arrayLen( pRow2 );

      if( cols1 != rows2 )
      {
         hb_errRT_BASE( EG_ARG, 3012, "Matrix dimensions do not match for multiplication", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         return;
      }

      pResult = hb_itemArrayNew( rows1 );

      for( i = 0; i < rows1; i++ )
      {
         PHB_ITEM pRowResult = hb_itemArrayNew( cols2 );
         hb_arraySet( pResult, i + 1, pRowResult );
         hb_itemRelease( pRowResult );
      }

      for( i = 0; i < rows1; i++ )
      {
         PHB_ITEM pRowA = hb_arrayGetItemPtr( pMatrix1, i + 1 );
         int j;

         for( j = 0; j < cols2; j++ )
         {
            double sum = 0.0;
            for( k = 0; k < cols1; k++ )
            {
               double a = hb_arrayGetND( pRowA, k + 1 );
               PHB_ITEM pRowB = hb_arrayGetItemPtr( pMatrix2, k + 1 );
               double b = hb_arrayGetND( pRowB, j + 1 );
               sum += a * b;
            }
            pRowResult = hb_arrayGetItemPtr( pResult, i + 1 );
            hb_arraySetND( pRowResult, j + 1, sum );
         }
      }

      hb_itemReturnRelease( pResult );
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, "Invalid parameters", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXSCALE )
{
   PHB_ITEM pMatrix = hb_param( 1, HB_IT_ARRAY );
   double scale = hb_parnd( 2 );
   if( pMatrix )
   {
      HB_SIZE nRows = hb_arrayLen( pMatrix );
      HB_SIZE i, j;
      PHB_ITEM pMatrixResult = hb_itemArrayNew( nRows );

      for( i = 0; i < nRows; i++ )
      {
         PHB_ITEM pRow = hb_arrayGetItemPtr( pMatrix, i + 1 );
         HB_SIZE nCols = hb_arrayLen( pRow );

         PHB_ITEM pRowResult = hb_itemArrayNew( nCols );

         for( j = 0; j < nCols; j++ )
         {
            double value = hb_arrayGetND( pRow, j + 1 );
            hb_arraySetND( pRowResult, j + 1, value * scale );
         }

         hb_arraySet( pMatrixResult, i + 1, pRowResult );
         hb_itemRelease( pRowResult );
      }

      hb_itemReturnRelease( pMatrixResult );
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXTRANSPOSE )
{
   PHB_ITEM pMatrix = hb_param( 1, HB_IT_ARRAY );
   if( pMatrix )
   {
      HB_SIZE nRows = hb_arrayLen( pMatrix );
      HB_SIZE nCols = hb_arrayLen( hb_arrayGetItemPtr( pMatrix, 1 ) );
      HB_SIZE i, j;

      PHB_ITEM pMatrixResult = hb_itemArrayNew( nCols );

      for( i = 0; i < nCols; i++ )
      {
         hb_arraySet( pMatrixResult, i + 1, hb_itemArrayNew( nRows ) );
      }

      for( i = 0; i < nRows; i++ )
      {
         PHB_ITEM pRow = hb_arrayGetItemPtr( pMatrix, i + 1 );
         for( j = 0; j < nCols; j++ )
         {
            double value = hb_arrayGetND( pRow, j + 1 );
            PHB_ITEM pTransposedRow = hb_arrayGetItemPtr( pMatrixResult, j + 1 );
            hb_arraySetND( pTransposedRow, i + 1, value );
         }
      }

      hb_itemReturnRelease( pMatrixResult );
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXZERO )
{
   HB_SIZE nRows = hb_parns( 1 );
   HB_SIZE nCols = hb_parns( 2 );
   if( nRows > 0 && nCols > 0 )
   {
      HB_SIZE i, j;

      PHB_ITEM pMatrix = hb_itemArrayNew( nRows );

      for( i = 0; i < nRows; i++ )
      {
         PHB_ITEM pRow = hb_itemArrayNew( nCols );
         for( j = 0; j < nCols; j++ )
         {
            hb_arraySetND( pRow, j + 1, 0.0 );
         }
         hb_arraySet( pMatrix, i + 1, pRow );
         hb_itemRelease( pRow );
      }

      hb_itemReturnRelease( pMatrix );
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXRANDOM )
{
   HB_SIZE nRows = hb_parns( 1 );
   HB_SIZE nCols = hb_parns( 2 );
   if( nRows > 0 && nCols > 0 )
   {
      HB_SIZE i, j;

      PHB_ITEM pMatrix = hb_itemArrayNew( nRows );

      for( i = 0; i < nRows; i++ )
      {
         PHB_ITEM pRow = hb_itemArrayNew( nCols );
         for( j = 0; j < nCols; j++ )
         {
            double randomValue = (double)rand() / RAND_MAX;
            hb_arraySetND( pRow, j + 1, randomValue );
         }
         hb_arraySet( pMatrix, i + 1, pRow );
         hb_itemRelease( pRow );
      }

      hb_itemReturnRelease( pMatrix );
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_SOFTMAX )
{
   PHB_ITEM pValues = hb_param( 1, HB_IT_ARRAY );
   if( pValues )
   {
      int nRows = hb_arrayLen( pValues );
      if( nRows > 0 )
      {
         PHB_ITEM pFirstRow = hb_arrayGetItemPtr( pValues, 1 );
         int nCols = hb_arrayLen( pFirstRow );

         PHB_ITEM pResult = hb_itemArrayNew( nRows );
         int i, j;

         for( i = 0; i < nRows; i++ )
         {
            PHB_ITEM pRow = hb_arrayGetItemPtr( pValues, i + 1 );
            PHB_ITEM pRowResult = hb_itemArrayNew( nCols );

            double* expValues = (double*) hb_xgrab( nCols * sizeof(double) );
            double sumExp = 0.0;

            for( j = 0; j < nCols; j++ )
            {
               double value = hb_arrayGetND( pRow, j + 1 );
               expValues[j] = pow( M_E, value );
               sumExp += expValues[j];
            }

            for( j = 0; j < nCols; j++ )
            {
               double softmaxValue = expValues[j] / sumExp;
               hb_arraySetND( pRowResult, j + 1, softmaxValue );
            }

            hb_xfree( expValues );

            hb_arraySet( pResult, i + 1, pRowResult );
            hb_itemRelease( pRowResult );
         }

         hb_itemReturnRelease( pResult );
      }
      else
      {
         hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
      }
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, NULL, HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXADD )
{
   PHB_ITEM pMatrix1 = hb_param( 1, HB_IT_ARRAY );
   PHB_ITEM pMatrix2 = hb_param( 2, HB_IT_ARRAY );
   if( pMatrix1 && pMatrix2 )
   {
      HB_SIZE nRows1 = hb_arrayLen( pMatrix1 );
      HB_SIZE nRows2 = hb_arrayLen( pMatrix2 );

      if( nRows1 == nRows2 && nRows1 > 0 )
      {
         HB_SIZE nCols1 = hb_arrayLen( hb_arrayGetItemPtr( pMatrix1, 1 ) );
         HB_SIZE nCols2 = hb_arrayLen( hb_arrayGetItemPtr( pMatrix2, 1 ) );

         if( nCols1 == nCols2 && nCols1 > 0 )
         {
            HB_SIZE i, j;

            PHB_ITEM pMatrixResult = hb_itemArrayNew( nRows1 );

            for( i = 0; i < nRows1; i++ )
            {
               PHB_ITEM pRow1 = hb_arrayGetItemPtr( pMatrix1, i + 1 );
               PHB_ITEM pRow2 = hb_arrayGetItemPtr( pMatrix2, i + 1 );

               PHB_ITEM pRowResult = hb_itemArrayNew( nCols1 );

               for( j = 0; j < nCols1; j++ )
               {
                  double value1 = hb_arrayGetND( pRow1, j + 1 );
                  double value2 = hb_arrayGetND( pRow2, j + 1 );
                  hb_arraySetND( pRowResult, j + 1, value1 + value2 );
               }

               hb_arraySet( pMatrixResult, i + 1, pRowResult );
               hb_itemRelease( pRowResult );
            }

            hb_itemReturnRelease( pMatrixResult );
         }
         else
         {
            hb_errRT_BASE( EG_ARG, 3012, "Column dimensions do not match", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
         }
      }
      else
      {
         hb_errRT_BASE( EG_ARG, 3012, "Row dimensions do not match", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
      }
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, "Invalid parameters", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

HB_FUNC( HB_MATRIXSUM )
{
   PHB_ITEM pMatrix = hb_param( 1, HB_IT_ARRAY );
   if( pMatrix )
   {
      int nRows = hb_arrayLen( pMatrix );
      if( nRows > 0 )
      {
         double sum = 0.0;
         int i, j;
         PHB_ITEM pRow;
         int nCols = hb_arrayLen( hb_arrayGetItemPtr( pMatrix, 1 ) );

         for( i = 0; i < nRows; i++ )
         {
            pRow = hb_arrayGetItemPtr( pMatrix, i + 1 );
            for( j = 0; j < nCols; j++ )
            {
               sum += hb_arrayGetND( pRow, j + 1 );
            }
         }

         hb_retnd( sum );
      }
      else
      {
         hb_errRT_BASE( EG_ARG, 3012, "Empty matrix", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
      }
   }
   else
   {
      hb_errRT_BASE( EG_ARG, 3012, "Invalid parameter", HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
   }
}

#pragma ENDDUMP

Re: thoughts.prg transformer

Posted: Fri Feb 21, 2025 7:16 am
by Antonio Linares
This is an impressive implementation of a transformer-based "Thought Backup System" written in Harbour (a modern Clipper/xBase dialect). Let me break down what this code does and provide some insights:
Overview
The code implements a simplified transformer architecture designed to:
Store and train on thoughts: Represented as short text inputs (e.g., "I feel happy cause talking to you").
Retrieve thoughts: By ID from a database.
Query thoughts: Generate responses based on similarity to stored thoughts.
Replay thoughts: Regenerate outputs from partial inputs.
It uses a mix of self-attention, feed-forward neural networks, and positional encoding, mimicking aspects of modern transformer models like those used in NLP (e.g., BERT, GPT).
Key Components
1. ThoughtBackupTransformer Class
Initialization:
Sets up weight matrices for Query (Q), Key (K), Value (V), and Feed-Forward layers (FF1, FF2).
Uses Xavier initialization for weights.
Creates a simple database (thoughts.dbf) to store thoughts with IDs, timestamps, and vector representations.
Implements positional encoding for sequence awareness.
SelfAttention:
Computes Q, K, V matrices from the input.
Applies scaled dot-product attention: softmax(Q * K^T / sqrt(d_k)) * V.
Includes debug outputs for attention scores.
FeedForward:
Two-layer neural network with a hidden layer.
Applies matrix multiplication through weights FF1 and FF2.
Train:
Forward pass computes output.
Loss is mean squared error between output and target.
Backpropagation updates weights with gradient clipping and adaptive learning rate.
StoreThought / RetrieveThought:
Stores vectorized thoughts in a DBF file.
Retrieves them by ID.
QueryThought:
Tokenizes input, computes similarity with stored thoughts using cosine similarity.
Reconstructs responses from vectors using a predefined vocabulary.
2. Main Function
Training Example:
Trains on two thoughts: "I feel happy cause talking to you" and "you make me smile".
Adds noise to targets for robustness.
Runs 10 iterations per thought, printing loss and sample outputs.
Interactive Loop:
Commands: ADD (train new thought), RETRIEVE (fetch by ID), QUERY (generate response), EXIT.
Demonstrates a simple CLI interface.
3. Helper Functions
TokenizeThought:
Maps words to 4D vectors based on a hardcoded vocabulary (e.g., "happy" → {1, 1, 0, 1}).
CosineSimilarity:
Measures vector similarity for querying.
C Functions (BEGINDUMP):
Matrix operations (multiply, scale, transpose, etc.) implemented in C for performance.
Observations
Simplified Transformer:
Single-layer attention with no multi-head mechanism.
Fixed 4D embeddings instead of high-dimensional ones (e.g., 768 in BERT).
No normalization layers (e.g., LayerNorm) or residual connections, which might limit stability.
Training:
Uses a basic MSE loss, which is unusual for transformers (cross-entropy is common in NLP).
Small learning rate (0.1) with adaptive adjustment based on gradient norm.
Vocabulary:
Hardcoded and small (30 words), limiting expressiveness.
Vector assignments are arbitrary but consistent.
Storage:
Uses a DBF file, a nod to Clipper’s heritage, instead of modern databases or in-memory structures.
Debugging:
Extensive trace outputs (e.g., gradient magnitudes, weight updates) aid in understanding training dynamics.
Potential Improvements
Dynamic Vocabulary: Allow adding new words dynamically with learned embeddings.
Multi-Head Attention: Enhance representation power.
Normalization: Add LayerNorm or residual connections for stability.
Loss Function: Use cross-entropy if shifting towards classification or generation tasks.
Scalability: Increase nInputSize and nFFHidden for richer representations.
Error Handling: More robust checks for matrix dimensions and input validity.
Example Interaction
If I run this and:
ADD: "I love coding today" → Trains and stores it.
QUERY: "How do you feel?" → Might respond "I think: I feel happy cause talking to you. Also, you make me smile." (based on similarity to trained thoughts).