Browse Source

update

master
lxbhahaha 1 year ago
parent
commit
45d4481cd3
  1. 106
      check.py

106
check.py

@ -1,4 +1,3 @@
import multiprocessing
import threading import threading
import paramiko import paramiko
import argparse import argparse
@ -187,7 +186,7 @@ def print_table_res(data_list):
# print('TODO') # print('TODO')
print(time.strftime("%Y%m-%d%H:%M:%S", time.localtime(time.time()))) print(time.strftime("%Y%m-%d%H:%M:%S", time.localtime(time.time())))
cell_width_list = [10, 20, 24, 15] cell_width_list = [7, 20, 24, 15]
len_last3 = cell_width_list[1] + cell_width_list[2] + cell_width_list[3] + 2 len_last3 = cell_width_list[1] + cell_width_list[2] + cell_width_list[3] + 2
# 输出head ------------------------------------------ # 输出head ------------------------------------------
print_line('up', cell_width_list) print_line('up', cell_width_list)
@ -275,7 +274,8 @@ def print_table_res(data_list):
str_list.append(table_icon['vline']) str_list.append(table_icon['vline'])
str_list.append(clamp_str(title, cell_width_list[0], True)) str_list.append(clamp_str(title, cell_width_list[0], True))
str_list.append(table_icon['vline']) str_list.append(table_icon['vline'])
str_list.append(clamp_str('erro', len_last3, True)) err_info = data.get('err_info', 'error')
str_list.append(clamp_str(err_info, len_last3, True))
str_list.append(table_icon['vline']) str_list.append(table_icon['vline'])
print(''.join(str_list)) print(''.join(str_list))
@ -291,50 +291,53 @@ def keep_check_one(server: dict, shared_data_list: list, server_idx: int, interv
# 循环检测 # 循环检测
while run_realtime: while run_realtime:
stdin, stdout, stderr = client.exec_command(cmd) try:
output = stdout.read().decode() stdin, stdout, stderr = client.exec_command(cmd)
output = output.split('\n') output = stdout.read().decode()
start_idx = 0 output = output.split('\n')
for i in range(len(output)): start_idx = 0
if output[i] == 'index, name, memory.total [MiB], memory.used [MiB], memory.free [MiB], utilization.gpu [%], utilization.memory [%], temperature.gpu': for i in range(len(output)):
start_idx = i + 1 if output[i] == 'index, name, memory.total [MiB], memory.used [MiB], memory.free [MiB], utilization.gpu [%], utilization.memory [%], temperature.gpu':
break start_idx = i + 1
output = output[start_idx:-1] break
# 解析数据 ----------------------------- output = output[start_idx:-1]
result = [] # 解析数据 -----------------------------
for data in output: result = []
data_list = data.split(', ') for data in output:
idx = int(data_list[0]) data_list = data.split(', ')
gpu_name = data_list[1] idx = int(data_list[0])
total_mem = int(data_list[2].split(' ')[0]) gpu_name = data_list[1]
used_mem = int(data_list[3].split(' ')[0]) total_mem = int(data_list[2].split(' ')[0])
free_mem = int(data_list[4].split(' ')[0]) used_mem = int(data_list[3].split(' ')[0])
util_gpu = int(data_list[5].split(' ')[0]) free_mem = int(data_list[4].split(' ')[0])
util_mem = int(data_list[6].split(' ')[0]) util_gpu = int(data_list[5].split(' ')[0])
temperature = int(data_list[7]) util_mem = int(data_list[6].split(' ')[0])
temperature = int(data_list[7])
# 简化GPU名称
if gpu_name.startswith('NVIDIA '): # 简化GPU名称
gpu_name = gpu_name[7:] if gpu_name.startswith('NVIDIA '):
if gpu_name.startswith('GeForce '): gpu_name = gpu_name[7:]
gpu_name = gpu_name[8:] if gpu_name.startswith('GeForce '):
gpu_name = gpu_name[8:]
result.append({
'idx': idx, result.append({
'gpu_name': gpu_name, 'idx': idx,
'total_mem': total_mem, 'gpu_name': gpu_name,
'used_mem': used_mem, 'total_mem': total_mem,
'free_mem': free_mem, 'used_mem': used_mem,
'util_gpu': util_gpu, 'free_mem': free_mem,
'util_mem': util_mem, 'util_gpu': util_gpu,
'temperature': temperature 'util_mem': util_mem,
}) 'temperature': temperature
})
# locked = False
with data_list_lock: # locked = False
# locked = True with data_list_lock:
shared_data_list[server_idx]['info_list'] = result # locked = True
# locked = False shared_data_list[server_idx]['info_list'] = result
# locked = False
except:
shared_data_list[server_idx].pop('info_list')
time.sleep(interval) time.sleep(interval)
@ -343,15 +346,15 @@ def keep_check_one(server: dict, shared_data_list: list, server_idx: int, interv
except Exception as e: except Exception as e:
# if data_list_lock.locked and locked: # if data_list_lock.locked and locked:
# data_list_lock.release() # data_list_lock.release()
print(e) # print(e)
shared_data_list[server_idx]['err_info'] = str(e)
def realtime(args): def realtime(args):
global run_realtime global run_realtime
try: try:
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('-n', type=float, default=2, help='服务器多久刷新一次') parser.add_argument('-n', type=float, default=2, help='多久刷新一次')
parser.add_argument('-f', type=float, default=2, help='显示多久刷新一次')
parser.add_argument('-e', '--exclude', type=str, default='', help='不需要显示的服务器(title)用,分割') parser.add_argument('-e', '--exclude', type=str, default='', help='不需要显示的服务器(title)用,分割')
parser.add_argument('-t', '--table', action='store_true', help='以表格形式绘制') parser.add_argument('-t', '--table', action='store_true', help='以表格形式绘制')
args = parser.parse_args(args) args = parser.parse_args(args)
@ -366,7 +369,6 @@ def realtime(args):
server_list = json.load(f) server_list = json.load(f)
# 共享list # 共享list
manager = multiprocessing.Manager()
data_list = [] data_list = []
run_realtime = True run_realtime = True
@ -398,7 +400,7 @@ def realtime(args):
# print(info_list) # print(info_list)
else: else:
print('出错') print('出错')
time.sleep(args.f) time.sleep(args.n)
run_realtime = False run_realtime = False

Loading…
Cancel
Save