File size: 2,777 Bytes
21ad80b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# Load required libraries
library(ggplot2)
library(dplyr)
library(safetensors)
library(jsonlite)

get_tensor <- function(
    matrix_name,
    base_dir,
    index_json = "model.safetensors.index.json") {
  index_file <- file.path(base_dir, index_json)
  model_index <- fromJSON(index_file)

  if (exists(matrix_name, model_index$weight_map)) {
    st_file <- model_index$weight_map[[matrix_name]]
    st_file_fp <- file.path(base_dir, st_file)
    tensors <- safe_load_file(st_file_fp)
    return(tensors[[matrix_name]])
  }
}

get_region <- function(cx, cy, bs, upper_x = 4096, upper_y = 4096) {
  sxs <- cx - bs / 2 + 1
  sxe <- cx + bs / 2
  sxs <- if (sxs < 1) 1 else sxs
  sxe <- if (sxe > upper_x) upper_x else sxe
  sys <- cy - bs / 2 + 1
  sye <- cy + bs / 2
  sys <- if (sys < 1) 1 else sys
  sye <- if (sye > upper_y) upper_y else sye
  return(list(sxs = sxs, sxe = sxe, sys = sys, sye = sye))
}


matrix <- "31.self_attn.o_proj"
orig_matrix <- paste0("model.layers.", matrix, ".weight")
base_dir <- "~/.cache/huggingface/hub/models--meta-llama--Llama-2-7b-hf/snapshots/01c7f73d771dfac7d292323805ebc428287df4f9"
base_dir <- path.expand(base_dir)
wo <- get_tensor(orig_matrix, base_dir)
wo <- as.matrix(wo)


bs <- 64
cx <- 0
cy <- 0
ret <- get_region(cx, cy, bs)
wo1 <- wo[ret$sxs:ret$sxe, ret$sys:ret$sye]

# Generate data with mean = 0.25 (middle of [-1, 1.5]) and sd = 0.5
# Then clip to desired range
raw_data <- as.vector(wo1)
data <- data.frame(
  x = raw_data
)

# Perform k-means clustering
kmeans_result <- kmeans(data, centers = 16, nstart = 25)

# Add cluster assignments to the data
data$cluster <- as.factor(kmeans_result$cluster)

# Create a data frame for centroids
centroids <- data.frame(
  x = kmeans_result$centers[, 1],
  y = 0 # Set y to 0 for 1D visualization
)

# Create a jittered y-coordinate for better visualization
data$y <- jitter(rep(0, nrow(data)), amount = 0.3)

# Create the plot
p_kmeans <- ggplot() +
  # Plot the points with jittering
  geom_point(
    data = data,
    aes(x = x, y = y, color = cluster),
    alpha = 0.6,
    size = 2
  ) +
  # Add centroids
  geom_point(
    data = centroids,
    aes(x = x, y = y),
    color = "black",
    size = 3,
    shape = 2
  ) +
  # Add lines to show the actual 1D nature of data
  geom_segment(
    data = centroids,
    aes(x = x, xend = x, y = -0.5, yend = 0.5),
    color = "black",
    linetype = "dashed",
    alpha = 0.5
  ) +
  # Customize the theme and labels
  theme_minimal() +
  labs(
    title = "1D K-means Clustering (k=16)",
    subtitle = paste0("Llama2-7b ", matrix),
    x = "Value",
    y = ""
  ) +
  theme(
    legend.position = "none",
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank()
  ) +
  scale_color_discrete(name = "Cluster")

p_kmeans