Browse Source

update

master
lxbhahaha 1 year ago
parent
commit
45d4481cd3
  1. 106
      check.py

106
check.py

@ -1,4 +1,3 @@
import multiprocessing
import threading
import paramiko
import argparse
@ -187,7 +186,7 @@ def print_table_res(data_list):
# print('TODO')
print(time.strftime("%Y%m-%d%H:%M:%S", time.localtime(time.time())))
cell_width_list = [10, 20, 24, 15]
cell_width_list = [7, 20, 24, 15]
len_last3 = cell_width_list[1] + cell_width_list[2] + cell_width_list[3] + 2
# 输出head ------------------------------------------
print_line('up', cell_width_list)
@ -275,7 +274,8 @@ def print_table_res(data_list):
str_list.append(table_icon['vline'])
str_list.append(clamp_str(title, cell_width_list[0], True))
str_list.append(table_icon['vline'])
str_list.append(clamp_str('erro', len_last3, True))
err_info = data.get('err_info', 'error')
str_list.append(clamp_str(err_info, len_last3, True))
str_list.append(table_icon['vline'])
print(''.join(str_list))
@ -291,50 +291,53 @@ def keep_check_one(server: dict, shared_data_list: list, server_idx: int, interv
# 循环检测
while run_realtime:
stdin, stdout, stderr = client.exec_command(cmd)
output = stdout.read().decode()
output = output.split('\n')
start_idx = 0
for i in range(len(output)):
if output[i] == 'index, name, memory.total [MiB], memory.used [MiB], memory.free [MiB], utilization.gpu [%], utilization.memory [%], temperature.gpu':
start_idx = i + 1
break
output = output[start_idx:-1]
# 解析数据 -----------------------------
result = []
for data in output:
data_list = data.split(', ')
idx = int(data_list[0])
gpu_name = data_list[1]
total_mem = int(data_list[2].split(' ')[0])
used_mem = int(data_list[3].split(' ')[0])
free_mem = int(data_list[4].split(' ')[0])
util_gpu = int(data_list[5].split(' ')[0])
util_mem = int(data_list[6].split(' ')[0])
temperature = int(data_list[7])
# 简化GPU名称
if gpu_name.startswith('NVIDIA '):
gpu_name = gpu_name[7:]
if gpu_name.startswith('GeForce '):
gpu_name = gpu_name[8:]
result.append({
'idx': idx,
'gpu_name': gpu_name,
'total_mem': total_mem,
'used_mem': used_mem,
'free_mem': free_mem,
'util_gpu': util_gpu,
'util_mem': util_mem,
'temperature': temperature
})
# locked = False
with data_list_lock:
# locked = True
shared_data_list[server_idx]['info_list'] = result
# locked = False
try:
stdin, stdout, stderr = client.exec_command(cmd)
output = stdout.read().decode()
output = output.split('\n')
start_idx = 0
for i in range(len(output)):
if output[i] == 'index, name, memory.total [MiB], memory.used [MiB], memory.free [MiB], utilization.gpu [%], utilization.memory [%], temperature.gpu':
start_idx = i + 1
break
output = output[start_idx:-1]
# 解析数据 -----------------------------
result = []
for data in output:
data_list = data.split(', ')
idx = int(data_list[0])
gpu_name = data_list[1]
total_mem = int(data_list[2].split(' ')[0])
used_mem = int(data_list[3].split(' ')[0])
free_mem = int(data_list[4].split(' ')[0])
util_gpu = int(data_list[5].split(' ')[0])
util_mem = int(data_list[6].split(' ')[0])
temperature = int(data_list[7])
# 简化GPU名称
if gpu_name.startswith('NVIDIA '):
gpu_name = gpu_name[7:]
if gpu_name.startswith('GeForce '):
gpu_name = gpu_name[8:]
result.append({
'idx': idx,
'gpu_name': gpu_name,
'total_mem': total_mem,
'used_mem': used_mem,
'free_mem': free_mem,
'util_gpu': util_gpu,
'util_mem': util_mem,
'temperature': temperature
})
# locked = False
with data_list_lock:
# locked = True
shared_data_list[server_idx]['info_list'] = result
# locked = False
except:
shared_data_list[server_idx].pop('info_list')
time.sleep(interval)
@ -343,15 +346,15 @@ def keep_check_one(server: dict, shared_data_list: list, server_idx: int, interv
except Exception as e:
# if data_list_lock.locked and locked:
# data_list_lock.release()
print(e)
# print(e)
shared_data_list[server_idx]['err_info'] = str(e)
def realtime(args):
global run_realtime
try:
parser = argparse.ArgumentParser()
parser.add_argument('-n', type=float, default=2, help='服务器多久刷新一次')
parser.add_argument('-f', type=float, default=2, help='显示多久刷新一次')
parser.add_argument('-n', type=float, default=2, help='多久刷新一次')
parser.add_argument('-e', '--exclude', type=str, default='', help='不需要显示的服务器(title)用,分割')
parser.add_argument('-t', '--table', action='store_true', help='以表格形式绘制')
args = parser.parse_args(args)
@ -366,7 +369,6 @@ def realtime(args):
server_list = json.load(f)
# 共享list
manager = multiprocessing.Manager()
data_list = []
run_realtime = True
@ -398,7 +400,7 @@ def realtime(args):
# print(info_list)
else:
print('出错')
time.sleep(args.f)
time.sleep(args.n)
run_realtime = False

Loading…
Cancel
Save