Spaces:
Running
on
T4
Running
on
T4
Update auditqa/process_chunks.py
Browse files- auditqa/process_chunks.py +15 -15
auditqa/process_chunks.py
CHANGED
|
@@ -69,22 +69,22 @@ def load_chunks():
|
|
| 69 |
qdrant_collections = {}
|
| 70 |
print("embeddings started")
|
| 71 |
batch_size = 1000 # Adjust this value based on your system's memory capacity
|
| 72 |
-
for i in range(0, len(chunks_list), batch_size):
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
print(qdrant_collections)
|
| 89 |
print("vector embeddings done")
|
| 90 |
return qdrant_collections
|
|
|
|
| 69 |
qdrant_collections = {}
|
| 70 |
print("embeddings started")
|
| 71 |
batch_size = 1000 # Adjust this value based on your system's memory capacity
|
| 72 |
+
#for i in range(0, len(chunks_list), batch_size):
|
| 73 |
+
# print("embedding",(i+batch_size)/1000)
|
| 74 |
+
# batch_docs = chunks_list[i:i+batch_size]
|
| 75 |
+
# qdrant = Qdrant.from_documents(
|
| 76 |
+
# batch_docs, embeddings,
|
| 77 |
+
# path="/data/local_qdrant",
|
| 78 |
+
# recreate_collection=False,
|
| 79 |
+
# collection_name='reportsFeb2025',
|
| 80 |
+
# )
|
| 81 |
|
| 82 |
+
qdrant_collections['reportsFeb2025'] = Qdrant.from_documents(
|
| 83 |
+
chunks_list,
|
| 84 |
+
embeddings,
|
| 85 |
+
path="/data/local_qdrant",
|
| 86 |
+
collection_name='reportsFeb2025',
|
| 87 |
+
)
|
| 88 |
print(qdrant_collections)
|
| 89 |
print("vector embeddings done")
|
| 90 |
return qdrant_collections
|