Browse Source

初步增加了存储空间的显示

master
鱼骨剪 10 months ago
parent
commit
a0f28e1a84
  1. 85
      app.py
  2. 19
      index.html

85
app.py

@ -36,25 +36,10 @@ def connect_server():
#endregion #endregion
def keep_check_one(server: dict, shared_data_list: dict, server_title: str, interval: float, re_connect_time: float=5): def get_gpus_info(client, timeout):
re_try_count = 0
# 循环连接
while True:
try:
# 建立SSH连接
client = paramiko.SSHClient()
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
client.connect(server['ip'], port=server['port'], username=server['username'], password=server.get('password', None), key_filename=server.get('key_filename', None), timeout=interval*3)
cmd = 'nvidia-smi --query-gpu=index,name,memory.total,memory.used,memory.free,utilization.gpu,utilization.memory,temperature.gpu --format=csv' cmd = 'nvidia-smi --query-gpu=index,name,memory.total,memory.used,memory.free,utilization.gpu,utilization.memory,temperature.gpu --format=csv'
shared_data_list[server_title]['err_info'] = '' stdin, stdout, stderr = client.exec_command(cmd, timeout=timeout)
re_try_count = 0
# 循环检测
keep_run = True
while keep_run:
try:
stdin, stdout, stderr = client.exec_command(cmd, timeout=interval*3)
output = stdout.read().decode() output = stdout.read().decode()
output = output.split('\n') output = output.split('\n')
start_idx = 0 start_idx = 0
@ -93,18 +78,69 @@ def keep_check_one(server: dict, shared_data_list: dict, server_title: str, inte
'temperature': temperature 'temperature': temperature
}) })
return result
def get_storage_info(client, timeout, path_list):
result = []
for target_path in path_list:
stdin, stdout, stderr = client.exec_command(f'df {target_path} | grep \'{target_path}\'', timeout=timeout)
output = stdout.read().decode()
if output == "":
continue
data = output.split()
tmp_res = {
"path": target_path,
"total": int(data[1]),
"available": int(data[3])
}
result.append(tmp_res)
return result
# 持续获取一个服务器的信息
def keep_check_one(server: dict, shared_data_list: dict, server_title: str, interval: float, re_connect_time: float=5):
# 处理一下需要检查的存储空间路径
if not 'storage_list' in server:
server['storage_list'] = []
if not '/' in server['storage_list']:
server['storage_list'].append('/')
re_try_count = 0
# 循环连接
while True:
try:
# 建立SSH连接
client = paramiko.SSHClient()
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
client.connect(server['ip'], port=server['port'], username=server['username'], password=server.get('password', None), key_filename=server.get('key_filename', None), timeout=interval*3)
cmd = 'nvidia-smi --query-gpu=index,name,memory.total,memory.used,memory.free,utilization.gpu,utilization.memory,temperature.gpu --format=csv'
shared_data_list[server_title]['err_info'] = ''
re_try_count = 0
# 循环检测
keep_run = True
while keep_run:
try:
# gpu 信息
gpu_info = get_gpus_info(client, interval*3)
# 存储空间信息
storage_info = get_storage_info(client, interval*3, server['storage_list'])
# locked = False # locked = False
with data_list_lock: with data_list_lock:
# locked = True # locked = True
shared_data_list[server_title]['info_list'] = result shared_data_list[server_title]['gpu_info_list'] = gpu_info
shared_data_list[server_title]['storage_info_list'] = storage_info
shared_data_list[server_title]['updated'] = True shared_data_list[server_title]['updated'] = True
shared_data_list[server_title]['maxGPU'] = len(output) shared_data_list[server_title]['maxGPU'] = len(gpu_info)
# locked = False # locked = False
except Exception as e: except Exception as e:
keep_run = False keep_run = False
shared_data_list[server_title]['err_info'] = f'{e}' shared_data_list[server_title]['err_info'] = f'{e}'
if 'info_list' in shared_data_list[server_title]: if 'gpu_info_list' in shared_data_list[server_title]:
shared_data_list[server_title].pop('info_list') shared_data_list[server_title].pop('gpu_info_list')
time.sleep(interval) time.sleep(interval)
@ -130,8 +166,8 @@ def filter_data(title_list: list):
server_data[title]['err_info'] = f'title \'{title}\' not exist!' server_data[title]['err_info'] = f'title \'{title}\' not exist!'
continue continue
# 还没获取到数据 # 还没获取到数据
info_list = data_dict[title].get('info_list', None) gpu_info_list = data_dict[title].get('gpu_info_list', None)
if info_list is None: if gpu_info_list is None:
err_info = data_dict[title].get('err_info', None) err_info = data_dict[title].get('err_info', None)
if err_info is not None: if err_info is not None:
server_data[title]['err_info'] = data_dict[title]['err_info'] server_data[title]['err_info'] = data_dict[title]['err_info']
@ -142,7 +178,8 @@ def filter_data(title_list: list):
# 记录数据 # 记录数据
data_updated = data_dict[title].get('updated', False) data_updated = data_dict[title].get('updated', False)
err_info = data_dict[title].get('err_info', '') err_info = data_dict[title].get('err_info', '')
server_data[title]['info_list'] = info_list server_data[title]['gpu_info_list'] = gpu_info_list
server_data[title]['storage_info_list'] = data_dict[title].get('storage_info_list', [])
server_data[title]['updated'] = data_updated server_data[title]['updated'] = data_updated
server_data[title]['err_info'] = err_info server_data[title]['err_info'] = err_info
result['time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') result['time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

19
index.html

@ -79,8 +79,23 @@
serverName.textContent = key + updateFlag; serverName.textContent = key + updateFlag;
serverCard.appendChild(serverName); serverCard.appendChild(serverName);
if ('info_list' in serverData[key]){ // 存储空间
serverData[key].info_list.forEach(function(gpu){ if ('storage_info_list' in serverData[key]){
let storageInfo = document.createElement('div');
storageInfo.classList.add('storage-info');
for (let i = 0; i < serverData[key].storage_info_list.length; i++) {
let targetPath = serverData[key].storage_info_list[i].path;
let totalStorage = serverData[key].storage_info_list[i].total;
let availableStorage = serverData[key].storage_info_list[i].available;
storageInfo.innerHTML += targetPath + " : " + availableStorage + " / " + totalStorage + "<br>";
}
serverCard.appendChild(storageInfo);
}
// gpu
if ('gpu_info_list' in serverData[key]){
serverData[key].gpu_info_list.forEach(function(gpu){
let gpuInfo = document.createElement('div'); let gpuInfo = document.createElement('div');
gpuInfo.classList.add('gpu-info'); gpuInfo.classList.add('gpu-info');
let colorDot = greenDot; let colorDot = greenDot;

Loading…
Cancel
Save