|
@ -98,13 +98,27 @@ def get_storage_info(client, timeout, path_list): |
|
|
|
|
|
|
|
|
return result |
|
|
return result |
|
|
|
|
|
|
|
|
|
|
|
def get_memory_info(client, timeout): |
|
|
|
|
|
|
|
|
|
|
|
stdin, stdout, stderr = client.exec_command('free', timeout=timeout) |
|
|
|
|
|
output = stdout.read().decode().split('\n')[1] |
|
|
|
|
|
if output == "": |
|
|
|
|
|
return None |
|
|
|
|
|
data = output.split() |
|
|
|
|
|
result = { |
|
|
|
|
|
"total": int(data[1]), |
|
|
|
|
|
"used": int(data[2]) |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
return result |
|
|
|
|
|
|
|
|
# 持续获取一个服务器的信息 |
|
|
# 持续获取一个服务器的信息 |
|
|
def keep_check_one(server: dict, shared_data_list: dict, server_title: str, interval: float, re_connect_time: float=5): |
|
|
def keep_check_one(server: dict, shared_data_list: dict, server_title: str, interval: float, re_connect_time: float=5): |
|
|
# 处理一下需要检查的存储空间路径 |
|
|
# 处理一下需要检查的存储空间路径 |
|
|
if not 'storage_list' in server: |
|
|
if not 'storage_list' in server: |
|
|
server['storage_list'] = [] |
|
|
server['storage_list'] = [] |
|
|
if not '/' in server['storage_list']: |
|
|
if not '/' in server['storage_list']: |
|
|
server['storage_list'].append('/') |
|
|
server['storage_list'].insert(0, '/') |
|
|
|
|
|
|
|
|
re_try_count = 0 |
|
|
re_try_count = 0 |
|
|
# 循环连接 |
|
|
# 循环连接 |
|
@ -127,12 +141,14 @@ def keep_check_one(server: dict, shared_data_list: dict, server_title: str, inte |
|
|
gpu_info = get_gpus_info(client, interval*3) |
|
|
gpu_info = get_gpus_info(client, interval*3) |
|
|
# 存储空间信息 |
|
|
# 存储空间信息 |
|
|
storage_info = get_storage_info(client, interval*3, server['storage_list']) |
|
|
storage_info = get_storage_info(client, interval*3, server['storage_list']) |
|
|
|
|
|
memory_info = get_memory_info(client, interval*3) |
|
|
|
|
|
|
|
|
# locked = False |
|
|
# locked = False |
|
|
with data_list_lock: |
|
|
with data_list_lock: |
|
|
# locked = True |
|
|
# locked = True |
|
|
shared_data_list[server_title]['gpu_info_list'] = gpu_info |
|
|
shared_data_list[server_title]['gpu_info_list'] = gpu_info |
|
|
shared_data_list[server_title]['storage_info_list'] = storage_info |
|
|
shared_data_list[server_title]['storage_info_list'] = storage_info |
|
|
|
|
|
shared_data_list[server_title]['memory_info'] = memory_info |
|
|
shared_data_list[server_title]['updated'] = True |
|
|
shared_data_list[server_title]['updated'] = True |
|
|
shared_data_list[server_title]['maxGPU'] = len(gpu_info) |
|
|
shared_data_list[server_title]['maxGPU'] = len(gpu_info) |
|
|
# locked = False |
|
|
# locked = False |
|
@ -180,6 +196,7 @@ def filter_data(title_list: list): |
|
|
err_info = data_dict[title].get('err_info', '') |
|
|
err_info = data_dict[title].get('err_info', '') |
|
|
server_data[title]['gpu_info_list'] = gpu_info_list |
|
|
server_data[title]['gpu_info_list'] = gpu_info_list |
|
|
server_data[title]['storage_info_list'] = data_dict[title].get('storage_info_list', []) |
|
|
server_data[title]['storage_info_list'] = data_dict[title].get('storage_info_list', []) |
|
|
|
|
|
server_data[title]['memory_info'] = data_dict[title].get('memory_info', {}) |
|
|
server_data[title]['updated'] = data_updated |
|
|
server_data[title]['updated'] = data_updated |
|
|
server_data[title]['err_info'] = err_info |
|
|
server_data[title]['err_info'] = err_info |
|
|
result['time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') |
|
|
result['time'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') |
|
|