Current state: Under Discussion
...
service MilvusService {
rpc GetMetrics(GetMetricsRequest) returns (GetMetricsResponse) {}
}
message GetMetricsRequest {
// request is the jsonic format string, in this way, we can also extend request easier,
string request = 1;
}
message GetMetricResponse {
common.Status status = 1;
// response is the jsonic format string, in this way, we can also extend response easier
string response = 2;
}
I will describe how this interface should be used later in Design Details.
...
request:
{
"metric_type": "system_info"
}
response:
Wiki Markupcode | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|
| ||||||||||
{
"nodes_info": [
{
"identifier": 1, // unique in the list of nodes_info
"name": "root coordinator",
"hardware_info": {
"ip": "192.168.1.1",
"cpu_core_count": 2,
"cpu_core_usage": "10%",
"memory": "13124124",
"memory_usage": "234123",
"disk": "234123",
"disk_usage": "123123",
},
"system_info": {
"system_version": "rc2 a3c662c7b",
"deploy_mode": "cluster",
},
"system_configurations": {
"maxPartitionNum": 4096,
"timeTickInterval": 200
},
"created_time": "2021-04-13 08:41:34.51+00",
"updated_time": "2021-04-13 08:41:34.51+00",
"type": "coordinator",
"connected": []
},
{
"identifier": 2,
"name": "data coordinator",
"hardware_info": {
"ip": "192.168.1.1",
"cpu_core_count": 2,
"cpu_core_usage": "10%",
"memory": "13124124",
"memory_usage": "234123",
"disk": "234123",
"disk_usage": "123123",
},
"system_info": {
"system_version": "rc2 a3c662c7b",
"deploy_mode": "cluster",
},
"system_configurations": {
"maxPartitionNum": 4096,
"timeTickInterval": 200
},
"created_time": "2021-04-13 08:41:34.51+00",
"updated_time": "2021-04-13 08:41:34.51+00",
"type": "coordinator",
"connected": [
{
"parent": 1,
"method": "manage"
}
]
},
{
"identifier": 3,
"name": "proxy",
"hardware_info": {
"ip": "192.168.1.1",
"cpu_core_count": 2,
"cpu_core_usage": "10%",
"memory": "13124124",
"memory_usage": "234123",
"disk": "234123",
"disk_usage": "123123",
},
"system_info": {
"system_version": "rc2 a3c662c7b",
"deploy_mode": "cluster",
},
"system_configurations": {
"maxPartitionNum": 4096,
"timeTickInterval": 200
},
"created_time": "2021-04-13 08:41:34.51+00",
"updated_time": "2021-04-13 08:41:34.51+00",
"type": "proxy",
"connected": [
{
"parent": 1,
"method": "notification"
},
{
"parent": 2,
"method": "notification"
}
]
},
{
"identifier": 4,
"name": "data node 1",
"hardware_info": {
"ip": "192.168.1.1",
"cpu_core_count": 2,
"cpu_core_usage": "10%",
"memory": "13124124",
"memory_usage": "234123",
"disk": "234123",
"disk_usage": "123123",
},
"system_info": {
"system_version": "rc2 a3c662c7b",
"deploy_mode": "cluster",
},
"system_configurations": {
"maxPartitionNum": 4096,
"timeTickInterval": 200
},
"created_time": "2021-04-13 08:41:34.51+00",
"updated_time": "2021-04-13 08:41:34.51+00",
"type": "data node",
"connected": [
{
"parent": 2,
"method": "manage"
}
]
},
{
"identifier": 5,
"name": "data node 2",
"hardware_info": {
"ip": "192.168.1.1",
"cpu_core_count": 2,
"cpu_core_usage": "10%",
"memory": "13124124",
"memory_usage": "234123",
"disk": "234123",
"disk_usage": "123123",
},
"system_info": {
"system_version": "rc2 a3c662c7b",
"deploy_mode": "cluster",
},
"system_configurations": {
"maxPartitionNum": 4096,
"timeTickInterval": 200
},
"created_time": "2021-04-13 08:41:34.51+00",
"updated_time": "2021-04-13 08:41:34.51+00",
"type": "data node",
"connected": [
{
"parent": 2,
"method": "manage"
}
]
}
]
}
|
In order to show the connection topology of Milvus, we have the nodes_info
in response. nodes_info
is a list and every item in list indicates a node in Milvus cluster. Every item has a identifier which is unique in nodes_info
. The identifier can be used in connected
content, for example, proxy has connected to root coordinator and data coordinator, so the connected
content is [1, 2]
, 1 is the identifier of root coordinator, 2 is the identifier of data coordinator.
System Statistics
{
"metric_type": "system_statistics"
}
response:
{
"hardware_statistics": [
{
"identifier": 1, // unique in the list of hardware_statistics
"name": "root coordinator",
"hardware_usage": {
"cpu": {
"type": "Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz",
"usage": 6
},
"memory": {
"total": 320000, // in mega bytes
"usage": 120000, // in mega bytes
}
}
}
// ...
],
"loaded_collections": [
{
"name": "coll1",
"loaded_time": "2021/07/05 11:13:44.372 +08:00",
"loaded_partitions": [
{
"name": "partition1",
"loaded_time": "2021/07/05 11:13:44.372 +08:00",
},
// ...
]
},
// ...
],
"collection_count": 3,
"partitions_count": [
{
"coll1": 2,
},
// ...
],
"indexes_count": [
{
"coll1": 2,
},
// ...
],
"qps": 10096,
"latency": 0.1
}
System Event Log
{
"metric_type": "system_log"
}
response:
{
"dd": [
"create collection 1 at ts1",
"create collection 2 at ts2"
],
"dm": [
"insert 20 records into collection 1",
"insert 30 records into collection 2"
],
"dq": [
"search on collection 1, nq: 10, topk = 5",
"search on collection 2, nq: 10, topk = 5"
]
}
Test Plan
test script written with pymilvus
:
#!/usr/bin/env python
import ujson
from pymilvus.grpc_gen import milvus_pb2 as milvus_types
ip = "127.0.0.1"
port = "19530"
if __name__ == "__main__":
client = Milvus(host=ip, port=port)
with client._connection() as handler:
system_info_req = ujson.dumps({"metric_type": "system_info"})
req = milvus_types.GetMetricsRequest(request=system_info_req)
resp = handler._stub.GetMetrics(req, wait_for_ready=True, timeout=None)
print(resp)
system_statistics_req = ujson.dumps({"metric_type": "system_statistics"})
req = milvus_types.GetMetricsRequest(request=system_statistics_req)
resp = handler._stub.GetMetrics(req, wait_for_ready=True, timeout=None)
print(resp)
system_logs_req = ujson.dumps({"metric_type": "system_logs"})
req = milvus_types.GetMetricsRequest(request=system_logs_req)
resp = handler._stub.GetMetrics(req, wait_for_ready=True, timeout=None)
print(resp)
client.close()