Skip to content

Commit 0720afb

Browse files
committed
Fix: Data type mismatch issue.
1 parent 65cd2c9 commit 0720afb

File tree

5 files changed

+114
-117
lines changed

5 files changed

+114
-117
lines changed

src/main/java/custom/objects/DocumentEmbedding.java

Lines changed: 29 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,13 @@
33
import org.tinystruct.ApplicationException;
44
import org.tinystruct.data.component.AbstractData;
55
import org.tinystruct.data.component.Row;
6-
import org.tinystruct.data.component.Table;
7-
import org.tinystruct.system.ApplicationManager;
86

97
import java.io.ByteArrayInputStream;
108
import java.io.ByteArrayOutputStream;
119
import java.io.ObjectInputStream;
1210
import java.io.ObjectOutputStream;
1311
import java.io.Serializable;
1412
import java.util.Date;
15-
import java.util.Vector;
1613

1714
/**
1815
* Represents an embedding for a document fragment.
@@ -64,24 +61,25 @@ public Date getCreatedAt() {
6461
}
6562

6663
/**
67-
* Store a Vector<Double> as a byte array in the embedding field
64+
* Store a double[] as a byte array in the embedding field
65+
*
6866
* @param embeddingVector The embedding vector
6967
* @throws ApplicationException if serialization fails
7068
*/
71-
public void setEmbeddingVector(Vector<Double> embeddingVector) throws ApplicationException {
69+
public void setEmbeddingVector(double[] embeddingVector) throws ApplicationException {
7270
try {
7371
if (embeddingVector == null) {
7472
throw new ApplicationException("Cannot serialize null embedding vector");
7573
}
7674

77-
System.out.println("Serializing embedding vector with dimension: " + embeddingVector.size());
75+
System.out.println("Serializing embedding vector with dimension: " + embeddingVector.length);
7876

7977
// Print first few values for debugging
80-
if (!embeddingVector.isEmpty()) {
78+
if (embeddingVector.length > 0) {
8179
StringBuilder sb = new StringBuilder("First 5 values: ");
82-
for (int i = 0; i < Math.min(5, embeddingVector.size()); i++) {
83-
sb.append(embeddingVector.get(i));
84-
if (i < Math.min(4, embeddingVector.size() - 1)) {
80+
for (int i = 0; i < Math.min(5, embeddingVector.length); i++) {
81+
sb.append(embeddingVector[i]);
82+
if (i < Math.min(4, embeddingVector.length - 1)) {
8583
sb.append(", ");
8684
}
8785
}
@@ -97,7 +95,7 @@ public void setEmbeddingVector(Vector<Double> embeddingVector) throws Applicatio
9795
System.out.println("Serialized embedding to " + serializedData.length + " bytes");
9896

9997
this.setEmbedding(serializedData);
100-
this.setEmbeddingDimension(embeddingVector.size());
98+
this.setEmbeddingDimension(embeddingVector.length);
10199
} catch (Exception e) {
102100
System.err.println("Error serializing embedding vector: " + e.getMessage());
103101
e.printStackTrace();
@@ -107,18 +105,20 @@ public void setEmbeddingVector(Vector<Double> embeddingVector) throws Applicatio
107105

108106
/**
109107
* Get the embedding vector from the byte array
108+
*
110109
* @return The deserialized embedding vector
111110
* @throws ApplicationException if deserialization fails
112111
*/
113-
public Vector<Double> getEmbeddingVector() throws ApplicationException {
112+
public double[] getEmbeddingVector() throws ApplicationException {
114113
try {
115114
if (this.getEmbedding() == null) {
116115
System.err.println("Warning: Embedding is null for fragment ID: " + this.getFragmentId());
117116
return null;
118117
}
119118

120119
byte[] embeddingData = this.getEmbedding();
121-
System.out.println("Deserializing embedding data of size: " + embeddingData.length + " bytes for fragment ID: " + this.getFragmentId());
120+
System.out.println("Deserializing embedding data of size: " + embeddingData.length
121+
+ " bytes for fragment ID: " + this.getFragmentId());
122122

123123
ByteArrayInputStream bais = new ByteArrayInputStream(embeddingData);
124124
ObjectInputStream ois = new ObjectInputStream(bais);
@@ -127,19 +127,18 @@ public Vector<Double> getEmbeddingVector() throws ApplicationException {
127127

128128
System.out.println("Deserialized object class: " + (obj != null ? obj.getClass().getName() : "null"));
129129

130-
if (!(obj instanceof Vector)) {
131-
System.err.println("Error: Deserialized object is not a Vector: " +
130+
if (!(obj instanceof double[])) {
131+
System.err.println("Error: Deserialized object is not a double[]: " +
132132
(obj != null ? obj.getClass().getName() : "null") +
133133
" for fragment ID: " + this.getFragmentId());
134134
return null;
135135
}
136136

137-
@SuppressWarnings("unchecked")
138-
Vector<Double> embeddingVector = (Vector<Double>) obj;
137+
double[] embeddingVector = (double[]) obj;
139138

140139
// Validate the vector
141-
if (embeddingVector.size() != this.getEmbeddingDimension()) {
142-
System.err.println("Warning: Vector dimension (" + embeddingVector.size() +
140+
if (embeddingVector.length != this.getEmbeddingDimension()) {
141+
System.err.println("Warning: Vector dimension (" + embeddingVector.length +
143142
") does not match stored dimension (" + this.getEmbeddingDimension() + ") " +
144143
"for fragment ID: " + this.getFragmentId());
145144
}
@@ -155,8 +154,10 @@ public Vector<Double> getEmbeddingVector() throws ApplicationException {
155154

156155
@Override
157156
public void setData(Row row) {
158-
if (row.getFieldInfo("id") != null) this.setId(row.getFieldInfo("id").stringValue());
159-
if (row.getFieldInfo("fragment_id") != null) this.setFragmentId(row.getFieldInfo("fragment_id").stringValue());
157+
if (row.getFieldInfo("id") != null)
158+
this.setId(row.getFieldInfo("id").stringValue());
159+
if (row.getFieldInfo("fragment_id") != null)
160+
this.setFragmentId(row.getFieldInfo("fragment_id").stringValue());
160161
if (row.getFieldInfo("embedding") != null) {
161162
// Get the raw object from the field info
162163
try {
@@ -171,8 +172,9 @@ public void setData(Row row) {
171172
System.err.println("Failed to get embedding using stringValue(): " + e.getMessage());
172173
}
173174
}
174-
String fragmentId = row.getFieldInfo("fragment_id") != null ?
175-
row.getFieldInfo("fragment_id").stringValue() : "unknown";
175+
String fragmentId = row.getFieldInfo("fragment_id") != null
176+
? row.getFieldInfo("fragment_id").stringValue()
177+
: "unknown";
176178

177179
System.out.println("Retrieved embedding object for fragment ID: " + fragmentId +
178180
", class: " + (embeddingObj != null ? embeddingObj.getClass().getName() : "null"));
@@ -199,8 +201,10 @@ public void setData(Row row) {
199201
} else {
200202
System.err.println("No embedding field found in row");
201203
}
202-
if (row.getFieldInfo("embedding_dimension") != null) this.setEmbeddingDimension(row.getFieldInfo("embedding_dimension").intValue());
203-
if (row.getFieldInfo("created_at") != null) this.setCreatedAt(row.getFieldInfo("created_at").dateValue());
204+
if (row.getFieldInfo("embedding_dimension") != null)
205+
this.setEmbeddingDimension(row.getFieldInfo("embedding_dimension").intValue());
206+
if (row.getFieldInfo("created_at") != null)
207+
this.setCreatedAt(row.getFieldInfo("created_at").dateValue());
204208
}
205209

206210
@Override

src/main/java/custom/util/DocumentQA.java

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -44,25 +44,27 @@ public static List<EmbeddingManager.SimilarityResult> findRelevantDocuments(Stri
4444

4545
// Generate embedding for the query
4646
System.out.println("Generating embedding for query");
47-
Vector<Double> queryEmbedding = manager.generateQueryEmbedding(query);
47+
double[] queryEmbedding = manager.generateQueryEmbedding(query);
4848
if (queryEmbedding == null) {
4949
System.err.println("Error: Failed to generate query embedding");
5050
throw new ApplicationException("Failed to generate query embedding");
5151
}
52-
System.out.println("Generated query embedding with dimension: " + queryEmbedding.size());
52+
System.out.println("Generated query embedding with dimension: " + queryEmbedding.length);
5353

5454
// Find similar documents - get more results than needed to ensure diversity
5555
System.out.println("Finding similar documents");
5656
int initialResultsCount = maxResults * 3; // Get 3x more results initially
57-
List<EmbeddingManager.SimilarityResult> allResults = EmbeddingManager.findSimilar(queryEmbedding, initialResultsCount);
57+
List<EmbeddingManager.SimilarityResult> allResults = EmbeddingManager.findSimilar(queryEmbedding,
58+
initialResultsCount);
5859
System.out.println("Found " + allResults.size() + " similar documents");
5960

6061
// Filter by similarity threshold
6162
List<EmbeddingManager.SimilarityResult> filteredResults = allResults.stream()
6263
.filter(result -> result.similarity >= SIMILARITY_THRESHOLD)
6364
.collect(Collectors.toList());
6465

65-
System.out.println("Filtered to " + filteredResults.size() + " documents with similarity >= " + SIMILARITY_THRESHOLD);
66+
System.out.println(
67+
"Filtered to " + filteredResults.size() + " documents with similarity >= " + SIMILARITY_THRESHOLD);
6668
return filteredResults;
6769
} catch (Exception e) {
6870
System.err.println("Error finding relevant documents: " + e.getMessage());
@@ -84,6 +86,7 @@ public static List<EmbeddingManager.SimilarityResult> findRelevantDocuments(Stri
8486

8587
/**
8688
* Format document fragments as context for a language model
89+
*
8790
* @param results List of document fragments with similarity scores
8891
* @return Formatted context string
8992
*/
@@ -115,10 +118,10 @@ public static String formatDocumentsAsContext(List<EmbeddingManager.SimilarityRe
115118
// Create document entry
116119
StringBuilder documentEntry = new StringBuilder();
117120
documentEntry.append("Document ").append(i + 1)
118-
.append(" (Title: ").append(title)
119-
.append(", File: ").append(fileName)
120-
.append(", Relevance: ").append(String.format("%.2f", result.similarity))
121-
.append("):\n");
121+
.append(" (Title: ").append(title)
122+
.append(", File: ").append(fileName)
123+
.append(", Relevance: ").append(String.format("%.2f", result.similarity))
124+
.append("):\n");
122125
documentEntry.append(content).append("\n\n");
123126

124127
// Check if adding this document would exceed the maximum context length
@@ -137,7 +140,7 @@ public static String formatDocumentsAsContext(List<EmbeddingManager.SimilarityRe
137140
// Add a note about how many documents were found vs. included
138141
if (documentsAdded < results.size()) {
139142
context.append("Note: Found " + results.size() + " relevant documents, but only included " +
140-
documentsAdded + " to stay within context limits.\n\n");
143+
documentsAdded + " to stay within context limits.\n\n");
141144
}
142145

143146
return context.toString();
@@ -147,7 +150,8 @@ public static String formatDocumentsAsContext(List<EmbeddingManager.SimilarityRe
147150
* Enhance a user query with relevant document fragments
148151
*
149152
* @param query The original user query
150-
* @return Enhanced query with document context, or the original query if no relevant documents found
153+
* @return Enhanced query with document context, or the original query if no
154+
* relevant documents found
151155
*/
152156
public static String enhanceQueryWithDocuments(String query) throws ApplicationException {
153157
try {
@@ -176,7 +180,7 @@ public static String enhanceQueryWithDocuments(String query) throws ApplicationE
176180
* @return True if document context was added, false otherwise
177181
*/
178182
public static boolean addDocumentContextToMessages(String query, String meetingCode,
179-
Builders messages) throws ApplicationException {
183+
Builders messages) throws ApplicationException {
180184
try {
181185
System.out.println("Finding relevant documents for query: " + query);
182186
List<EmbeddingManager.SimilarityResult> relevantDocs = findRelevantDocuments(query);
@@ -209,21 +213,23 @@ public static boolean addDocumentContextToMessages(String query, String meetingC
209213

210214
if (context.length() > maxSafeCharLimit) {
211215
System.out.println("Warning: Context exceeds safe token limit. Truncating from " +
212-
context.length() + " to " + maxSafeCharLimit + " characters");
216+
context.length() + " to " + maxSafeCharLimit + " characters");
213217
formattedContext = context.substring(0, maxSafeCharLimit);
214218
}
215219

216220
// Add system message with document context
217221
Builder contextMessage = new Builder();
218222
contextMessage.put("role", "system");
219-
contextMessage.put("content", "I am providing you with some relevant document fragments to help answer the user's question. " +
220-
"These documents are highly relevant to the current conversation context. " +
221-
"Instructions for using this information:\n" +
222-
"1. Prioritize this information over your general knowledge when answering\n" +
223-
"2. Cite the specific document title/number when using information from it\n" +
224-
"3. If multiple documents contain relevant information, synthesize it\n" +
225-
"4. If the documents don't contain the answer, clearly state that and use your general knowledge\n" +
226-
"5. Maintain continuity with the previous conversation\n\n" + formattedContext);
223+
contextMessage.put("content",
224+
"I am providing you with some relevant document fragments to help answer the user's question. " +
225+
"These documents are highly relevant to the current conversation context. " +
226+
"Instructions for using this information:\n" +
227+
"1. Prioritize this information over your general knowledge when answering\n" +
228+
"2. Cite the specific document title/number when using information from it\n" +
229+
"3. If multiple documents contain relevant information, synthesize it\n" +
230+
"4. If the documents don't contain the answer, clearly state that and use your general knowledge\n"
231+
+
232+
"5. Maintain continuity with the previous conversation\n\n" + formattedContext);
227233
messages.add(contextMessage);
228234

229235
System.out.println("Added document context to messages");

0 commit comments

Comments
 (0)