commit a4566f179ca2e585e9af3044efe7b599232a58cd
Author: Yi-Ting Shih <ytshih@it.cs.nycu.edu.tw>
Date:   Sat Mar 28 02:59:29 2026 +0800

    Feat: lab1 done

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..bb85d70
--- /dev/null
+++ b/README.md
@@ -0,0 +1,15 @@
+# HPC Lab 1
+
+- https://github.com/ncabatoff/process-exporter
+- https://github.com/utkuozdemir/nvidia_gpu_exporter/blob/master/INSTALL.md
+- https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/
+
+You know what this means.
+
+```
+Mar 28 01:42:44 vm1774609080735-5833800-iaas kernel: NVRM: The NVIDIA Tesla V100-SXM2-32GB GPU installed in this system is
+                                                     NVRM:  supported through the NVIDIA 580.xx Legacy drivers. Please
+                                                     NVRM:  visit http://www.nvidia.com/object/unix.html for more
+                                                     NVRM:  information.  The 595.58.03 NVIDIA driver will ignore
+                                                     NVRM:  this GPU.  Continuing probe...
+```
diff --git a/controller/monitoring/docker-compose.yaml b/controller/monitoring/docker-compose.yaml
new file mode 100644
index 0000000..8029ed9
--- /dev/null
+++ b/controller/monitoring/docker-compose.yaml
@@ -0,0 +1,28 @@
+---
+services:
+  prometheus:
+    image: docker.io/prom/prometheus:v3.9.1
+    ports:
+      - 9090:9090
+    volumes:
+      - './docker/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro'
+      - './docker/prometheus/clients.yml:/etc/prometheus/clients.yml:ro'
+
+  node_exporter:
+    image: quay.io/prometheus/node-exporter:v1.10.2
+    command: ['--path.rootfs=/host']
+    network_mode: host
+    pid: host
+    restart: unless-stopped
+    volumes:
+      - '/:/host:ro,rslave'
+
+  process_exporter:
+    image: docker.io/ncabatoff/process-exporter:v0.8.7
+    privileged: true
+    command: ['-procfs', '/host/proc', '-config.path', '/config/config.yml']
+    ports:
+      - 9256:9256
+    volumes:
+      - '/proc:/host/proc:ro,rslave'
+      - './docker/process_exporter:/config:ro'
diff --git a/controller/monitoring/docker/process_exporter/config.yml b/controller/monitoring/docker/process_exporter/config.yml
new file mode 100644
index 0000000..b7837fe
--- /dev/null
+++ b/controller/monitoring/docker/process_exporter/config.yml
@@ -0,0 +1,5 @@
+process_names:
+  - comm:
+    - bash
+    - prometheus
+    - vim
diff --git a/controller/monitoring/docker/prometheus/clients.yml b/controller/monitoring/docker/prometheus/clients.yml
new file mode 100644
index 0000000..3638a27
--- /dev/null
+++ b/controller/monitoring/docker/prometheus/clients.yml
@@ -0,0 +1,9 @@
+- targets:
+    - controller:9100
+    - cpu01:9100
+    - gpu01:9100
+    - controller:9256
+    - cpu01:9256
+    - gpu01:9256
+    - gpu01:9835
+  labels: {}
diff --git a/controller/monitoring/docker/prometheus/prometheus.yml b/controller/monitoring/docker/prometheus/prometheus.yml
new file mode 100644
index 0000000..fa645e3
--- /dev/null
+++ b/controller/monitoring/docker/prometheus/prometheus.yml
@@ -0,0 +1,40 @@
+global:
+  scrape_interval: 1s
+  evaluation_interval: 30s
+  body_size_limit: 15MB
+  sample_limit: 5000
+  target_limit: 30
+  label_limit: 30
+  label_name_length_limit: 200
+  label_value_length_limit: 200
+  query_log_file: query.log
+  scrape_failure_log_file: fail.log
+  # scrape_timeout is set to the global default (10s).
+
+runtime:
+  gogc: 42
+
+scrape_configs:
+  - job_name: node
+
+    file_sd_configs:
+      - files:
+          - clients.yml
+        refresh_interval: 10m
+
+  - job_name: prometheus
+
+    static_configs:
+      - targets: ["localhost:9090"]
+        labels: {}
+
+    honor_labels: true
+    fallback_scrape_protocol: PrometheusText0.0.4
+    scrape_failure_log_file: fail_prom.log
+
+storage:
+  tsdb:
+    out_of_order_time_window: 30m
+    retention:
+      time: 1d
+      size: 1GB
diff --git a/cpu01/monitoring/docker-compose.yaml b/cpu01/monitoring/docker-compose.yaml
new file mode 100644
index 0000000..b95573e
--- /dev/null
+++ b/cpu01/monitoring/docker-compose.yaml
@@ -0,0 +1,19 @@
+---
+services:
+  node_exporter:
+    image: quay.io/prometheus/node-exporter:v1.10.2
+    command: ['--path.rootfs=/host']
+    network_mode: host
+    pid: host
+    restart: unless-stopped
+    volumes:
+      - '/:/host:ro,rslave'
+  process_exporter:
+    image: docker.io/ncabatoff/process-exporter:v0.8.7
+    privileged: true
+    command: ['-procfs', '/host/proc', '-config.path', '/config/config.yml']
+    ports:
+      - 9256:9256
+    volumes:
+      - '/proc:/host/proc:ro,rslave'
+      - './docker/process_exporter:/config:ro'
diff --git a/cpu01/monitoring/docker/process_exporter/config.yml b/cpu01/monitoring/docker/process_exporter/config.yml
new file mode 100644
index 0000000..b7837fe
--- /dev/null
+++ b/cpu01/monitoring/docker/process_exporter/config.yml
@@ -0,0 +1,5 @@
+process_names:
+  - comm:
+    - bash
+    - prometheus
+    - vim
diff --git a/gpu01/monitoring/docker-compose.yaml b/gpu01/monitoring/docker-compose.yaml
new file mode 100644
index 0000000..ef0227d
--- /dev/null
+++ b/gpu01/monitoring/docker-compose.yaml
@@ -0,0 +1,30 @@
+---
+services:
+  node_exporter:
+    image: quay.io/prometheus/node-exporter:v1.10.2
+    command: ['--path.rootfs=/host']
+    network_mode: host
+    pid: host
+    restart: unless-stopped
+    volumes:
+      - '/:/host:ro,rslave'
+  process_exporter:
+    image: docker.io/ncabatoff/process-exporter:v0.8.7
+    privileged: true
+    command: ['-procfs', '/host/proc', '-config.path', '/config/config.yml']
+    ports:
+      - 9256:9256
+    volumes:
+      - '/proc:/host/proc:ro,rslave'
+      - './docker/process_exporter:/config:ro'
+  nvidia_gpu_exporter:
+    image: ghcr.io/utkuozdemir/nvidia_gpu_exporter:1.4.1-amd64
+    ports:
+      - 9835:9835
+    devices:
+      - '/dev/nvidiactl:/dev/nvidiactl'
+      - '/dev/nvidia0:/dev/nvidia0'
+    volumes:
+      - '/usr/lib/x86_64-linux-gnu/libnvidia-ml.so:/usr/lib/x86_64-linux-gnu/libnvidia-ml.so'
+      - '/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1:/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1'
+      - '/usr/bin/nvidia-smi:/usr/bin/nvidia-smi'
diff --git a/gpu01/monitoring/docker/process_exporter/config.yml b/gpu01/monitoring/docker/process_exporter/config.yml
new file mode 100644
index 0000000..b7837fe
--- /dev/null
+++ b/gpu01/monitoring/docker/process_exporter/config.yml
@@ -0,0 +1,5 @@
+process_names:
+  - comm:
+    - bash
+    - prometheus
+    - vim