Add 12 evaluation results to model-index

#19
by burtenshaw HF Staff - opened
Files changed (1) hide show
  1. README.md +111 -0
README.md CHANGED
@@ -3,6 +3,117 @@ library_name: transformers
3
  license: apache-2.0
4
  license_link: https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507/blob/main/LICENSE
5
  pipeline_tag: text-generation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  ---
7
 
8
  # Qwen3-4B-Instruct-2507
 
3
  license: apache-2.0
4
  license_link: https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507/blob/main/LICENSE
5
  pipeline_tag: text-generation
6
+ model-index:
7
+ - name: Qwen3-4B-Instruct-2507
8
+ results:
9
+ - task:
10
+ type: question-answering
11
+ dataset:
12
+ name: GPQA
13
+ type: gpqa
14
+ metrics:
15
+ - name: Accuracy
16
+ type: accuracy
17
+ value: 50.3
18
+ - task:
19
+ type: question-answering
20
+ dataset:
21
+ name: SuperGPQA
22
+ type: supergpqa
23
+ metrics:
24
+ - name: Accuracy
25
+ type: accuracy
26
+ value: 32.2
27
+ - task:
28
+ type: reasoning
29
+ dataset:
30
+ name: AIME25
31
+ type: aime25
32
+ metrics:
33
+ - name: Score
34
+ type: score
35
+ value: 22.7
36
+ - task:
37
+ type: reasoning
38
+ dataset:
39
+ name: HMMT25
40
+ type: hmmt25
41
+ metrics:
42
+ - name: Score
43
+ type: score
44
+ value: 9.7
45
+ - task:
46
+ type: reasoning
47
+ dataset:
48
+ name: ZebraLogic
49
+ type: zebralogic
50
+ metrics:
51
+ - name: Score
52
+ type: score
53
+ value: 14.8
54
+ - task:
55
+ type: reasoning
56
+ dataset:
57
+ name: LiveBench 20241125
58
+ type: livebench_20241125
59
+ metrics:
60
+ - name: Score
61
+ type: score
62
+ value: 41.5
63
+ - task:
64
+ type: text-generation
65
+ dataset:
66
+ name: IFEval
67
+ type: ifeval
68
+ metrics:
69
+ - name: Score
70
+ type: score
71
+ value: 74.5
72
+ - task:
73
+ type: text-generation
74
+ dataset:
75
+ name: Creative Writing v3
76
+ type: creative_writing_v3
77
+ metrics:
78
+ - name: Score
79
+ type: score
80
+ value: 72.7
81
+ - task:
82
+ type: text-generation
83
+ dataset:
84
+ name: WritingBench
85
+ type: writingbench
86
+ metrics:
87
+ - name: Score
88
+ type: score
89
+ value: 66.9
90
+ - task:
91
+ type: text-generation
92
+ dataset:
93
+ name: MultiIF
94
+ type: multiif
95
+ metrics:
96
+ - name: Score
97
+ type: score
98
+ value: 60.7
99
+ - task:
100
+ type: text-generation
101
+ dataset:
102
+ name: INCLUDE
103
+ type: include
104
+ metrics:
105
+ - name: Score
106
+ type: score
107
+ value: 58.6
108
+ - task:
109
+ type: reasoning
110
+ dataset:
111
+ name: PolyMATH
112
+ type: polymath
113
+ metrics:
114
+ - name: Score
115
+ type: score
116
+ value: 15.6
117
  ---
118
 
119
  # Qwen3-4B-Instruct-2507