diff --git a/check.py b/check.py index 3afa9ec..a22e6e7 100644 --- a/check.py +++ b/check.py @@ -1,6 +1,7 @@ import threading import paramiko import argparse +import socket import json import time import os @@ -79,15 +80,15 @@ table_icon_3 = { table_icon = table_icon_3 -def check_gpu_utilization(server:dict): +def check_gpu_utilization(server:dict, timeout=2): # 建立SSH连接 client = paramiko.SSHClient() client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - client.connect(server['ip'], port=server['port'], username=server['username'], password=server.get('password', None), key_filename=server.get('key_filename', None)) + client.connect(server['ip'], port=server['port'], username=server['username'], password=server.get('password', None), key_filename=server.get('key_filename', None), timeout=timeout) # 执行命令查看显卡占用情况(这里以nvidia-smi为例) cmd = 'nvidia-smi --query-gpu=index,name,memory.total,memory.used,memory.free,utilization.gpu,utilization.memory,temperature.gpu --format=csv' - stdin, stdout, stderr = client.exec_command(cmd) + stdin, stdout, stderr = client.exec_command(cmd, timeout=timeout) output = stdout.read().decode() output = output.split('\n') start_idx = 0 @@ -493,8 +494,16 @@ def check_all(show_type='list'): try: res = check_gpu_utilization(server_data) print_res(res) - except: - print('连接出现问题.') + except paramiko.AuthenticationException: + print("Authentication failed.") + except paramiko.SSHException as ssh_err: + print(f"SSH error: {ssh_err}") + except socket.timeout: + print("Connection timed out.") + except Exception as e: + print(f"{e}") + # except: + # print('连接出现问题.') print() elif show_type == 'table': pass