---
apiVersion: v1
kind: ConfigMap
metadata:
  name: tf-gpu-test-script
data:
  main_tf_gpu_test.py: |
    import os
    import tensorflow as tf

    print("--- Starting GPU and Library Check ---")

    gpus = tf.config.list_physical_devices("GPU")
    if gpus:
        try:
            tf.config.set_visible_devices(gpus[0], "GPU")
            logical_gpus = tf.config.list_logical_devices("GPU")
            print(
                f"TensorFlow detected {len(gpus)} Physical GPUs, {len(logical_gpus)} Logical GPUs. Using: {gpus[0].name}"
            )
        except RuntimeError as e:
            print(f"RuntimeError during GPU configuration: {e}")
    else:
        print("TensorFlow did not detect any GPU devices. Running on CPU.")

    print(f"XLA_FLAGS environment variable: {os.environ.get('XLA_FLAGS')}")
    print(f"TF_XLA_FLAGS environment variable: {os.environ.get('TF_XLA_FLAGS')}")

    print(f"TensorFlow version: {tf.__version__}")
    print(f"Built with CUDA: {tf.test.is_built_with_cuda()}")
    print(f"Is GPU available: {tf.config.list_physical_devices('GPU') != []}")

    print("--- GPU and Library Check Complete ---")
---
apiVersion: v1
kind: Pod
metadata:
  name: tf-gpu-test-pod
spec:
  restartPolicy: Never
  runtimeClassName: nvidia
  containers:
    - name: tensorflow-gpu-tester
      image: nvcr.io/nvidia/tensorflow:24.04-tf2-py3
      imagePullPolicy: IfNotPresent
      command: ['python']
      args: ['/app/main_tf_gpu_test.py']
      resources:
        limits:
          nvidia.com/gpu: '1'
      env:
        - name: NVIDIA_VISIBLE_DEVICES
          value: all
      volumeMounts:
        - name: tf-script-volume
          mountPath: /app
  volumes:
    - name: tf-script-volume
      configMap:
        name: tf-gpu-test-script
        items:
          - key: main_tf_gpu_test.py
            path: main_tf_gpu_test.py
