| import matplotlib.pyplot as plt |
| import numpy as np |
| import seaborn as sns |
| from difflib import Differ |
| import pandas as pd |
|
|
| |
| collisions = [ |
| { |
| "colliding_token_sequence": [265, 393, 320], |
| "num_raw_variants": 21, |
| "raw_chunk_variants": [ |
| "\nif __name__ == '_", "\nif __n", "\nif __name__ == '__main", |
| "\nif __name__ == '__main__'", "\nif __", "\nif _", "\nif __name__ ", |
| "\nif __name__ =", "\nif __name__ == '__", "\nif __na", "\nif __name__", |
| "\nif __name", "\nif __name__ == '__main__':", "\nif __name__ == '__main__':\n", |
| "\nif __nam", "\nif __name_", "\nif __name__ == ", "\nif __name__ == '__ma", |
| "\nif __name__ == '__main_" |
| ], |
| "levenshtein_analysis": { |
| "distances": [11, 5, 8, 12, 13, 5, 4, 1, 10, 6, 8, 9, 10, 9, 7, 2, 3, 6, 7, 3], |
| "average_distance": 8.74, |
| "max_distance": 23, |
| "min_distance": 1 |
| } |
| }, |
| { |
| "colliding_token_sequence": [506, 354, 256], |
| "num_raw_variants": 2, |
| "raw_chunk_variants": [ |
| "数据", "数�" |
| ], |
| "levenshtein_analysis": { |
| "distances": [0, 1], |
| "average_distance": 0.5, |
| "max_distance": 1, |
| "min_distance": 0 |
| } |
| }, |
| { |
| "colliding_token_sequence": [123, 456, 789], |
| "num_raw_variants": 4, |
| "raw_chunk_variants": [ |
| " } ", " }\r\n ", "!", " }\r\n " |
| ], |
| "levenshtein_analysis": { |
| "distances": [2, 1, 4, 7], |
| "average_distance": 3.5, |
| "max_distance": 7, |
| "min_distance": 1 |
| } |
| } |
| ] |
|
|
| |
| def plot_text_diff(variants, title, save_path): |
| differ = Differ() |
| diff = list(differ.compare(variants[0].splitlines(), variants[1].splitlines())) |
| fig, ax = plt.subplots(figsize=(8, 6)) |
| ax.set_title(title) |
| colors = {"+": "red", "-": "green", " ": "blue"} |
| for i, line in enumerate(diff): |
| color = colors.get(line[0], "black") |
| ax.text(0, i, line, color=color, fontsize=12, va='top', ha='left') |
| plt.axis("off") |
| plt.savefig(save_path, bbox_inches="tight") |
| plt.close(fig) |
|
|
| |
| def plot_lcp_ratios(lcp_ratios, title, save_path): |
| plt.figure(figsize=(8, 6)) |
| sns.barplot(x=list(range(len(lcp_ratios))), y=lcp_ratios, color="skyblue") |
| plt.title(title) |
| plt.xlabel('Variant Index') |
| plt.ylabel('LCP Ratio') |
| plt.savefig(save_path, bbox_inches="tight") |
| plt.close() |
|
|
| |
| def plot_levenshtein_distances(distances, title, save_path): |
| plt.figure(figsize=(8, 6)) |
| sns.histplot(distances, bins=10, kde=True, color="salmon") |
| plt.title(title) |
| plt.xlabel('Levenshtein Distance') |
| plt.ylabel('Frequency') |
| plt.savefig(save_path, bbox_inches="tight") |
| plt.close() |
|
|
| |
| for i in range(3): |
| |
| collision = collisions[i] |
| plot_text_diff(collision["raw_chunk_variants"], f"Text Difference Visualization Case {i+1}", f"text_diff_case{i+1}.png") |
| plot_lcp_ratios([collision["levenshtein_analysis"]["average_distance"]]*collision["num_raw_variants"], f"LCP Ratio of Variants Case {i+1}", f"lcp_case{i+1}.png") |
| plot_levenshtein_distances(collision["levenshtein_analysis"]["distances"], "Levenshtein Distance Distribution", f"levenshtein_case{i+1}.png") |
|
|
| |
|
|
| print("Plots generated successfully!") |
|
|