Uday commited on
Commit
32e3089
·
1 Parent(s): 6914bc9

Fix: runtime error, workload was not healthy after 30 min

Browse files
Files changed (2) hide show
  1. entrypoint.sh +4 -0
  2. index.html +27 -0
entrypoint.sh CHANGED
@@ -19,5 +19,9 @@ fi
19
  # This writes to ~/.cache/huggingface/accelerate/default_config.yaml
20
  python -c "from accelerate.utils import write_basic_config; write_basic_config(mixed_precision='fp16')"
21
 
 
 
 
 
22
  # Run accelerate launch with the training script and arguments
23
  exec accelerate launch tasks/image_classification/train_energy.py "${args[@]}"
 
19
  # This writes to ~/.cache/huggingface/accelerate/default_config.yaml
20
  python -c "from accelerate.utils import write_basic_config; write_basic_config(mixed_precision='fp16')"
21
 
22
+ # Start a dummy web server in the background to satisfy HF Spaces health check (port 7860)
23
+ # This serves the current directory, which should contain index.html
24
+ python -m http.server 7860 &
25
+
26
  # Run accelerate launch with the training script and arguments
27
  exec accelerate launch tasks/image_classification/train_energy.py "${args[@]}"
index.html ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>CTM Training Status</title>
5
+ <style>
6
+ body {
7
+ font-family: sans-serif;
8
+ text-align: center;
9
+ padding: 50px;
10
+ }
11
+ h1 {
12
+ color: #333;
13
+ }
14
+ p {
15
+ color: #666;
16
+ }
17
+ </style>
18
+ </head>
19
+ <body>
20
+ <h1>Training in Progress</h1>
21
+ <p>
22
+ The Continuous Thought Machine energy-based halting experiment is
23
+ currently training.
24
+ </p>
25
+ <p>Please check the <strong>Logs</strong> tab for real-time updates.</p>
26
+ </body>
27
+ </html>