snickerdust commited on
Commit
7bf441c
·
1 Parent(s): ea23cc3

Restore weighted ranking selection in BERT backend while preserving raw cosine similarity scores for frontend UI display

Browse files
.firebase/hosting..cache CHANGED
@@ -81,6 +81,7 @@ indobert_finetuned/config.json,1779199343213,db488af85fe9e2d3f7782467724c7481523
81
  .git/objects/91/603de6577d133ded9bff3b98d8a4eb880749c4,1779654785322,4e343b742ba9acdff6b3d7f4059a63ead19add40d311591e1ab3c7e468cc6d2c
82
  .git/objects/8b/a9d549b1ad7793b5b3cf0b0690c702ef72f10d,1778127769144,52709b563735b7664b98e02de9c9c8cfda7d0eb91cfcf434a835f07e69d02e51
83
  .git/objects/8a/9cbb12c3870a9bc540859116333d7cc30f622d,1778127728931,167451f8c77fdbbde7ed32e6198d89ad61cd85b89053f4d3f26b1f69aea0d75b
 
84
  .git/objects/85/74ecaa09ede87a4d467b462d0472bef387124d,1778127728914,fa7f76611bd3df3d22e9bccba4b27b0d83000637230736f31965514aadfc1ca4
85
  .git/objects/85/326bbd10e1ebbb97a78724e334149cd20d588c,1779632652046,d131a9ac074833c5d6fa1cb25fdba0d0be966ddd056d6414e84b916efb5eb9ad
86
  .git/objects/85/048b47852b8e7b3f2928ded7b9ebc095b982e5,1779632644419,fd7a46003800654f6ef047312639692b5c7e165d7c554a02c5d7a0396012078a
@@ -88,6 +89,7 @@ indobert_finetuned/config.json,1779199343213,db488af85fe9e2d3f7782467724c7481523
88
  .git/objects/81/dad4a0b0f377b0916f654bf23dd9ab6e08a9bd,1779632830544,aec48133332ea51a88f6fe8dda8cb27fafbb051a5c792821bbd13a5af2a4c459
89
  .git/objects/80/5ea838c356c0d221299e85d853bcb7a6fcece9,1778129157385,570592c9c207095d073a56174bae568e9323c86054041612579dc5a6efeabe52
90
  .git/objects/75/e9a51f45dccd112f57ead9a99b36ea1412f380,1778127769162,0f6f3bab13767e91f0bcd97559e638ec27a7d274fbb41f2f7cc231b1a925d997
 
91
  .git/objects/73/7958dc6e3e758651ebc3072afa01883b49d50a,1779706631920,46471c8c84e5659cb13a30cbeb6e207651a706906b112974e79030df515cea8f
92
  .git/objects/73/037004c6d55bfd2513893a06adcec769cd40cf,1778127728890,2e8dd6c583dd8d594922282aa78c64556163b946e72f689dd5478c810bafd171
93
  .git/objects/71/963e03206fe9faad227b7d992db5fe8629903c,1779656019509,3cf906540a64dfe4b3c85d7c346ce56f1c07baa52d29dd801db938fcc07712a9
@@ -116,6 +118,7 @@ indobert_finetuned/config.json,1779199343213,db488af85fe9e2d3f7782467724c7481523
116
  .git/objects/50/396cd4ded1f2c207c7c64888b3adf921773ca5,1779656868014,2ed403878fb8e21732d9ce09b78739b2ba05facd122f60935ba50bc216d922bf
117
  .git/objects/4f/910eb4386f398a30172a9d9f88439edf355d8f,1778127728828,67f305b95316aec3c9fdf876580b725ffea8ff97a6f9b7424c9753843ae770d1
118
  .git/objects/4d/68ac5ad0e0a2d601f7efcfa9eaa05769a1c9f3,1778128239699,a4c0c10e52ed7e5b92d5c93904820ba5a047af2ad083ea9a4d267da20d8b4633
 
119
  .git/objects/47/ba7df2c91a19a47c13970f7cfee172356d57fa,1779655120416,cc2b84b161c0f4660204698e11672a8ee08839b645ceb0f0726deaf9c1d374c4
120
  .git/objects/45/c6043c6b7a55a4a3f5fbd6dfc406aada81a492,1779655499396,b0fd19dc1afe0a28a5f74f3be90471774674d2f36a99c36d8b64a6acc95464bb
121
  .git/objects/43/b9cc154263725a1984489c248e82093573474d,1779706641655,9c997edaefb8825367682b9604a5b280167024d95a88c2bd92670637854e6993
@@ -136,6 +139,8 @@ indobert_finetuned/config.json,1779199343213,db488af85fe9e2d3f7782467724c7481523
136
  .git/objects/20/f664528b9017cee154ce526b345887e41e8b13,1779706631917,25af951a89421127f9e180fb70c50e21d9ca79172a40a587bca570ecf9cac235
137
  .git/objects/18/a8a0afd1c56788799ba6bcfceb19b7bcfa2643,1779656318180,4d983a3ab7229eb21ca4ae44b3904ce569cd901c88d89f061f4e9a07dcaa8660
138
  .git/objects/15/fcaa6e03630909166530b0f016005f3cdfdefa,1778129157406,d810a7419be3127a3c373d8e7f677f87b1c5526f02b2d5ff7192c6b293c256e3
 
 
139
  .git/objects/13/e11ebaccc1218988b3384917053541bebc2a1d,1778127728920,d66b4205fa37fe8b7992e9bd23f4b40af7e9bafc8a932b9172020868eabd12d2
140
  .git/objects/12/2c800cb874e7722308915d0cbed993b558c2a4,1779642502104,3a949cef4df9d19d839278daed5b36737e831e60802b0636776cf5e985b0f4d2
141
  .git/objects/10/b2809106604d0b2b600d88782bfacee7779b73,1779655499390,3d3817590eea4aed1e952d77b7e90e6f9c7aa6cf424b79bd4eb050c8406d6f51
@@ -168,16 +173,25 @@ indobert_finetuned/config.json,1779199343213,db488af85fe9e2d3f7782467724c7481523
168
  .git/hooks/fsmonitor-watchman.sample,1778127720688,c7efd67fa8de750df765a1cf18a7bdf6c2298ab4a4ab1eed2772a32e4fd9725d
169
  .git/hooks/commit-msg.sample,1778127720685,00ffdd1510ab6aa72447787ad1516c9bea8345a9cf6ac5345afb1f44aba07f6f
170
  .git/hooks/applypatch-msg.sample,1778127720683,e989aeea500fde675ea8ee009c4cf25aae7abbf4dd02c15b04cb8c85404dbca3
171
- .git/index,1779707576351,e6262d6d7080e0be245357e3cc8342be4987137683f9ed6c54de50eb30af0610
172
- .git/COMMIT_EDITMSG,1779707576357,c19dc1af7bfcab450ee97e1deaab38bbe3c5eff6d31e7350d288adff40e9560d
173
- .git/refs/remotes/origin/main,1779707598509,91fdfa3ac217a5596eb4a2aae15e4297e6ee9cefa43228e24cad70eb536c7729
174
- .git/refs/heads/main,1779707576366,91fdfa3ac217a5596eb4a2aae15e4297e6ee9cefa43228e24cad70eb536c7729
175
- .git/objects/86/8596df5d496332bdb11b36c9c58aa6befcc108,1779707576348,d9df9f9eebdee6a7a5cf6931a2fb94b4f8e0c6b3a0528c306e111732254e6b08
176
- .git/objects/4a/366c58eadd5b9d0ff59fb16ccb9dbcc8dca02c,1779707576359,5b86ac045d8fbdddfc7295613d0469a999c81d69a982f568f3e596ec05da44c7
177
- .git/objects/15/9cc227d3b9e432b2238dfab778b7965d952f7b,1779707576344,e720ac66ebb1010621bdce7e9aece7ce048b33594c605103bc13642dd93b19b5
178
- .git/objects/74/38ffee553742644c2a3d5aef02150658639d47,1779707564369,926e5cb038f158b22fe326a147cd378e48662f43ee8c8c2aac22c01f8969f91e
179
- .git/logs/refs/remotes/origin/main,1779707598511,21257be5632185f0929bd0151b9a44b83b6eb16234ba186668718d94fced7b4d
180
- .git/logs/refs/heads/main,1779707576367,d70cb7efc0b94c61441d51b036d3ada9288eb9819ad652b214f985719e85432b
181
- .git/logs/HEAD,1779707576367,0bc09d39d6243248bffc4a4316582e1ed5358e0797ce88816f44eb4d410cea23
182
- index.html,1779707522969,ed369b94881708772b3467f113b1fea77541f96aaf31bcae41da9f739f7f37f9
183
- .git/objects/15/5b71319e2be0b5af28af07e651a87b924398f5,1779707564377,1cdf79d225a839d103c99148c8060426d679e36d671c37c7bf11ecde201cc6cb
 
 
 
 
 
 
 
 
 
 
81
  .git/objects/91/603de6577d133ded9bff3b98d8a4eb880749c4,1779654785322,4e343b742ba9acdff6b3d7f4059a63ead19add40d311591e1ab3c7e468cc6d2c
82
  .git/objects/8b/a9d549b1ad7793b5b3cf0b0690c702ef72f10d,1778127769144,52709b563735b7664b98e02de9c9c8cfda7d0eb91cfcf434a835f07e69d02e51
83
  .git/objects/8a/9cbb12c3870a9bc540859116333d7cc30f622d,1778127728931,167451f8c77fdbbde7ed32e6198d89ad61cd85b89053f4d3f26b1f69aea0d75b
84
+ .git/objects/86/8596df5d496332bdb11b36c9c58aa6befcc108,1779707576348,d9df9f9eebdee6a7a5cf6931a2fb94b4f8e0c6b3a0528c306e111732254e6b08
85
  .git/objects/85/74ecaa09ede87a4d467b462d0472bef387124d,1778127728914,fa7f76611bd3df3d22e9bccba4b27b0d83000637230736f31965514aadfc1ca4
86
  .git/objects/85/326bbd10e1ebbb97a78724e334149cd20d588c,1779632652046,d131a9ac074833c5d6fa1cb25fdba0d0be966ddd056d6414e84b916efb5eb9ad
87
  .git/objects/85/048b47852b8e7b3f2928ded7b9ebc095b982e5,1779632644419,fd7a46003800654f6ef047312639692b5c7e165d7c554a02c5d7a0396012078a
 
89
  .git/objects/81/dad4a0b0f377b0916f654bf23dd9ab6e08a9bd,1779632830544,aec48133332ea51a88f6fe8dda8cb27fafbb051a5c792821bbd13a5af2a4c459
90
  .git/objects/80/5ea838c356c0d221299e85d853bcb7a6fcece9,1778129157385,570592c9c207095d073a56174bae568e9323c86054041612579dc5a6efeabe52
91
  .git/objects/75/e9a51f45dccd112f57ead9a99b36ea1412f380,1778127769162,0f6f3bab13767e91f0bcd97559e638ec27a7d274fbb41f2f7cc231b1a925d997
92
+ .git/objects/74/38ffee553742644c2a3d5aef02150658639d47,1779707564369,926e5cb038f158b22fe326a147cd378e48662f43ee8c8c2aac22c01f8969f91e
93
  .git/objects/73/7958dc6e3e758651ebc3072afa01883b49d50a,1779706631920,46471c8c84e5659cb13a30cbeb6e207651a706906b112974e79030df515cea8f
94
  .git/objects/73/037004c6d55bfd2513893a06adcec769cd40cf,1778127728890,2e8dd6c583dd8d594922282aa78c64556163b946e72f689dd5478c810bafd171
95
  .git/objects/71/963e03206fe9faad227b7d992db5fe8629903c,1779656019509,3cf906540a64dfe4b3c85d7c346ce56f1c07baa52d29dd801db938fcc07712a9
 
118
  .git/objects/50/396cd4ded1f2c207c7c64888b3adf921773ca5,1779656868014,2ed403878fb8e21732d9ce09b78739b2ba05facd122f60935ba50bc216d922bf
119
  .git/objects/4f/910eb4386f398a30172a9d9f88439edf355d8f,1778127728828,67f305b95316aec3c9fdf876580b725ffea8ff97a6f9b7424c9753843ae770d1
120
  .git/objects/4d/68ac5ad0e0a2d601f7efcfa9eaa05769a1c9f3,1778128239699,a4c0c10e52ed7e5b92d5c93904820ba5a047af2ad083ea9a4d267da20d8b4633
121
+ .git/objects/4a/366c58eadd5b9d0ff59fb16ccb9dbcc8dca02c,1779707576359,5b86ac045d8fbdddfc7295613d0469a999c81d69a982f568f3e596ec05da44c7
122
  .git/objects/47/ba7df2c91a19a47c13970f7cfee172356d57fa,1779655120416,cc2b84b161c0f4660204698e11672a8ee08839b645ceb0f0726deaf9c1d374c4
123
  .git/objects/45/c6043c6b7a55a4a3f5fbd6dfc406aada81a492,1779655499396,b0fd19dc1afe0a28a5f74f3be90471774674d2f36a99c36d8b64a6acc95464bb
124
  .git/objects/43/b9cc154263725a1984489c248e82093573474d,1779706641655,9c997edaefb8825367682b9604a5b280167024d95a88c2bd92670637854e6993
 
139
  .git/objects/20/f664528b9017cee154ce526b345887e41e8b13,1779706631917,25af951a89421127f9e180fb70c50e21d9ca79172a40a587bca570ecf9cac235
140
  .git/objects/18/a8a0afd1c56788799ba6bcfceb19b7bcfa2643,1779656318180,4d983a3ab7229eb21ca4ae44b3904ce569cd901c88d89f061f4e9a07dcaa8660
141
  .git/objects/15/fcaa6e03630909166530b0f016005f3cdfdefa,1778129157406,d810a7419be3127a3c373d8e7f677f87b1c5526f02b2d5ff7192c6b293c256e3
142
+ .git/objects/15/9cc227d3b9e432b2238dfab778b7965d952f7b,1779707576344,e720ac66ebb1010621bdce7e9aece7ce048b33594c605103bc13642dd93b19b5
143
+ .git/objects/15/5b71319e2be0b5af28af07e651a87b924398f5,1779707564377,1cdf79d225a839d103c99148c8060426d679e36d671c37c7bf11ecde201cc6cb
144
  .git/objects/13/e11ebaccc1218988b3384917053541bebc2a1d,1778127728920,d66b4205fa37fe8b7992e9bd23f4b40af7e9bafc8a932b9172020868eabd12d2
145
  .git/objects/12/2c800cb874e7722308915d0cbed993b558c2a4,1779642502104,3a949cef4df9d19d839278daed5b36737e831e60802b0636776cf5e985b0f4d2
146
  .git/objects/10/b2809106604d0b2b600d88782bfacee7779b73,1779655499390,3d3817590eea4aed1e952d77b7e90e6f9c7aa6cf424b79bd4eb050c8406d6f51
 
173
  .git/hooks/fsmonitor-watchman.sample,1778127720688,c7efd67fa8de750df765a1cf18a7bdf6c2298ab4a4ab1eed2772a32e4fd9725d
174
  .git/hooks/commit-msg.sample,1778127720685,00ffdd1510ab6aa72447787ad1516c9bea8345a9cf6ac5345afb1f44aba07f6f
175
  .git/hooks/applypatch-msg.sample,1778127720683,e989aeea500fde675ea8ee009c4cf25aae7abbf4dd02c15b04cb8c85404dbca3
176
+ .git/index,1779709766881,4fa79b21cd8b023d175a8fb179b260f0a2161dbc8e2b94ae84ba075f873a7e50
177
+ .git/COMMIT_EDITMSG,1779709766892,426f7d1ca16d55afea7e133aec62b7752e22697307521223d7c74c751ea49b3b
178
+ .git/refs/remotes/origin/main,1779709793651,e8120b3b7b42380da3bc119a35d2f87a6cb264ecac7dc51da64f80693b06bf0c
179
+ .git/refs/heads/main,1779709766902,e8120b3b7b42380da3bc119a35d2f87a6cb264ecac7dc51da64f80693b06bf0c
180
+ .git/objects/f7/5ba4cf2d99729242d5b7a327f5b56f6c303e50,1779709629767,016bb85c7464ae8d712d87cc021d0534b09e74f73c7d84a1178cf30f73db1f25
181
+ .git/objects/f6/23b3c6cc4c3e773a7628c8aafc01f6c32a9910,1779709629759,c38f48633023a85c9432962c7caf2108440ee4c61e434ff562b80d91709ac778
182
+ .git/objects/ea/23cc33e2d9648b60e56530275caaee0df564d5,1779709766895,0c6c919c8a0fc1762a5fc41adab169e2b71491eca38c75b25f26b61535d1b167
183
+ .git/objects/d7/3ed247a7fcf32501414f82f283695ba66cb31a,1779709766858,82b38336f2a04f3666a1af584bbc511c2e30a5a30c2e900b946c8f34b2b8eb44
184
+ .git/objects/bd/8618e1fb08010f66d4ebe377dea855e9d3dedc,1779709766854,3b0b8c3169137a730ad232a35411d9b5b313c8d575950f9e4c16a7d4a1a6fb6f
185
+ .git/objects/d6/7e9d916182ac88330eebf91b86d4187320a834,1779709766867,6cfb39d768717411695329ab7c4f5f7bfeeb0658acc0b2b4e08cd78b294f2029
186
+ .git/objects/7c/0c89621485e96fdec0d02912deb20f1bf805ac,1779709766863,a2e3534df3b084d388daa7eb3169e70b163f6ff21346f777811e767fc06c6877
187
+ .git/objects/c3/9375e23bef750adfdf09e324a2ef7f816ec8e2,1779709629742,48ad20904d9471cb5e11752098909f0dbbdef88d248f6869217d0a725fa7f6e1
188
+ .git/objects/37/0d79b23719ff952ec2d2f3ea26d6be27c75421,1779709766872,8c3aee51d89047b7b7da277b6cb4f2b6aeae0cc190a28d83af9c21a8d480b6ed
189
+ .git/objects/18/e1d1ab899b11a5dd60d3d01c093dd362ce9d4d,1779709766876,1b827d9a48956149cc91a9fa3f6a0209440e05ece002f41820a846eb15618f48
190
+ .git/objects/07/b9d6a7c9be5759b6bf71f35d3afb6bbaf69d2e,1779709766849,8e166e78d537cdb19a5eeb1b4bd61e7757da940d209aca05228320003a12c4e1
191
+ .git/objects/07/61f5232232de20c642a63f91aba9f97f5b7c5b,1779709629754,94a4611847a9c6a3ea32988d1bf7d6e2489dff1700943639031c76940743f484
192
+ .git/objects/62/80fd99ef9869429ff5bfac16c291c59e179441,1779709629748,f9c39559f1ad6da44f0a66abb4dfc8286e717ac5b5de3ab3c5d0d0f1d58d58d7
193
+ .git/objects/67/034404655e5be6ff74aa60717d5eecd120bcbc,1779709629776,c0a2d9873b3b27b3fdfaaa9f5dd6cb84a03a6b1333b5507b4052db22016a0599
194
+ .git/logs/refs/remotes/origin/main,1779709793656,6b828237135ce5d733ac9bd1908f5131b0ec7172e3511f85c4b84c215e456c65
195
+ .git/logs/HEAD,1779709766903,81f8d8785d6654175ebd151bb4240175e56018264ac5a9fea9cebf6a697b182e
196
+ .git/logs/refs/heads/main,1779709766903,1f6540c434c9b19b042008362711fd7e10481b4d9c0e9074bdf07c9f9fcf63f6
197
+ index.html,1779708908280,fe2a645c565c40662e61035ca314185cdd4f94cdb3f4363cab5de6951110a232
backend/services/model_bert.py CHANGED
@@ -128,17 +128,19 @@ def extract_summary(valid, sentences, embeddings, weights, topic_labels, num_sen
128
  items = clusters[tl]
129
  idxs, sents_cl, embs_cl, ws_cl = zip(*items)
130
  centroid = np.mean(embs_cl, axis=0, keepdims=True)
131
- # Score = cosine
132
- scores = cosine_similarity(np.array(embs_cl), centroid).flatten()
 
133
  picked = 0
134
- for r in np.argsort(scores)[::-1]:
135
  if picked >= per_cl: break
136
  sent = sents_cl[r]
137
  words = sent.lower().split()
138
  trigrams = set(zip(words, words[1:], words[2:])) if len(words) >= 3 else set()
139
  if trigrams & used_trigrams: continue
140
  used_trigrams |= trigrams
141
- selected.append((idxs[r], sent, scores[r], ws_cl[r]))
 
142
  picked += 1
143
 
144
  selected.sort(key=lambda x: x[0])
 
128
  items = clusters[tl]
129
  idxs, sents_cl, embs_cl, ws_cl = zip(*items)
130
  centroid = np.mean(embs_cl, axis=0, keepdims=True)
131
+ cosine_scores = cosine_similarity(np.array(embs_cl), centroid).flatten()
132
+ # Ranking uses cosine * source_weight to select correct sentences
133
+ ranking_scores = cosine_scores * np.array(ws_cl)
134
  picked = 0
135
+ for r in np.argsort(ranking_scores)[::-1]:
136
  if picked >= per_cl: break
137
  sent = sents_cl[r]
138
  words = sent.lower().split()
139
  trigrams = set(zip(words, words[1:], words[2:])) if len(words) >= 3 else set()
140
  if trigrams & used_trigrams: continue
141
  used_trigrams |= trigrams
142
+ # Store the pure cosine similarity score as the score for display
143
+ selected.append((idxs[r], sent, cosine_scores[r], ws_cl[r]))
144
  picked += 1
145
 
146
  selected.sort(key=lambda x: x[0])