Current state: Under Discussion
ISSUE:
PRs:
...
service MilvusService {
rpc GetMetrics(GetMetricsRequest) returns (GetMetricsResponse) {}
}
message GetMetricsRequest {
// request is the jsonic format string, in this way, we can also extend request easier,
string request = 1;
}
message GetMetricResponse {
common.Status status = 1;
// response is the jsonic format string, in this way, we can also extend response easier
string response = 2;
}
I will describe how this interface should be used later in Design Details.
...
request:
{
"metric_type": "system_info"
}
response:
Code Block | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|
| ||||||||||
{ "nodes_info": [ { "identifier": 1, // unique in the list of nodes_info "name": "root coordinator", "hardware_info": { "ip": "192.168.1.1", "cpu_core_count": 2, "cpu_core_usage": "10%", "memory": "13124124", "memory_usage": "234123", "disk": "234123", "disk_usage": "123123", }, "system_info": { "system_version": "rc2 a3c662c7b", "deploy_mode": "cluster", }, "system_configurations": { "maxPartitionNum": 4096, "timeTickInterval": 200 }, "created_time": "2021-04-13 08:41:34.51+00", "updated_time": "2021-04-13 08:41:34.51+00", "type": "coordinator", "connected": [] }, { "identifier": 2, "name": "data coordinator", "hardware_info": { "ip": "192.168.1.1", "cpu_core_count": 2, "cpu_core_usage": "10%", "memory": "13124124", "memory_usage": "234123", "disk": "234123", "disk_usage": "123123", }, "system_info": { "system_version": "rc2 a3c662c7b", "deploy_mode": "cluster", }, "system_configurations": { "maxPartitionNum": 4096, "timeTickInterval": 200 }, "created_time": "2021-04-13 08:41:34.51+00", "updated_time": "2021-04-13 08:41:34.51+00", "type": "coordinator", "connected": [ { "parent": 1, "method": "manage" } ] }, { "identifier": 3, "name": "proxy", "hardware_info": { "ip": "192.168.1.1", "cpu_core_count": 2, "cpu_core_usage": "10%", "memory": "13124124", "memory_usage": "234123", "disk": "234123", "disk_usage": "123123", }, "system_info": { "system_version": "rc2 a3c662c7b", "deploy_mode": "cluster", }, "system_configurations": { "maxPartitionNum": 4096, "timeTickInterval": 200 }, "created_time": "2021-04-13 08:41:34.51+00", "updated_time": "2021-04-13 08:41:34.51+00", "type": "proxy", "connected": [ { "parent": 1, "method": "notification" }, { "parent": 2, "method": "notification" } ] }, { "identifier": 4, "name": "data node 1", "hardware_info": { "ip": "192.168.1.1", "cpu_core_count": 2, "cpu_core_usage": "10%", "memory": "13124124", "memory_usage": "234123", "disk": "234123", "disk_usage": "123123", }, "system_info": { "system_version": "rc2 a3c662c7b", "deploy_mode": "cluster", }, "system_configurations": { "maxPartitionNum": 4096, "timeTickInterval": 200 }, "created_time": "2021-04-13 08:41:34.51+00", "updated_time": "2021-04-13 08:41:34.51+00", "type": "data node", "connected": [ { "parent": 2, "method": "manage" } ] }, { "identifier": 5, "name": "data node 2", "hardware_info": { "ip": "192.168.1.1", "cpu_core_count": 2, "cpu_core_usage": "10%", "memory": "13124124", "memory_usage": "234123", "disk": "234123", "disk_usage": "123123", }, "system_info": { "system_version": "rc2 a3c662c7b", "deploy_mode": "cluster", }, "system_configurations": { "maxPartitionNum": 4096, "timeTickInterval": 200 }, "created_time": "2021-04-13 08:41:34.51+00", "updated_time": "2021-04-13 08:41:34.51+00", "type": "data node", "connected": [ { "parent": 2, "method": "manage" } ] } ] } |
...
System Statistics
{
"metric_type": "system_statistics"
}
response:
{
"hardware_statistics": [
{
"identifier": 1, // unique in the list of hardware_statistics
"name": "root coordinator",
"hardware_usage": {
"cpu": {
"type": "Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz",
"usage": 6
},
"memory": {
"total": 320000, // in mega bytes
"usage": 120000, // in mega bytes
}
}
}
// ...
],
"loaded_collections": [
{
"name": "coll1",
"loaded_time": "2021/07/05 11:13:44.372 +08:00",
"loaded_partitions": [
{
"name": "partition1",
"loaded_time": "2021/07/05 11:13:44.372 +08:00",
},
// ...
]
},
// ...
],
"collection_count": 3,
"partitions_count": [
{
"coll1": 2,
},
// ...
],
"indexes_count": [
{
"coll1": 2,
},
// ...
],
"qps": 10096,
"latency": 0.1
}
System Event Log
{
"metric_type": "system_log"
}
response:
{
"dd": [
"create collection 1 at ts1",
"create collection 2 at ts2"
],
"dm": [
"insert 20 records into collection 1",
"insert 30 records into collection 2"
],
"dq": [
"search on collection 1, nq: 10, topk = 5",
"search on collection 2, nq: 10, topk = 5"
]
}
Test Plan
test script written with pymilvus
:
#!/usr/bin/env python
import ujson
from pymilvus.grpc_gen import milvus_pb2 as milvus_types
ip = "127.0.0.1"
port = "19530"
if __name__ == "__main__":
client = Milvus(host=ip, port=port)
with client._connection() as handler:
system_info_req = ujson.dumps({"metric_type": "system_info"})
req = milvus_types.GetMetricsRequest(request=system_info_req)
resp = handler._stub.GetMetrics(req, wait_for_ready=True, timeout=None)
print(resp)
system_statistics_req = ujson.dumps({"metric_type": "system_statistics"})
req = milvus_types.GetMetricsRequest(request=system_statistics_req)
resp = handler._stub.GetMetrics(req, wait_for_ready=True, timeout=None)
print(resp)
system_logs_req = ujson.dumps({"metric_type": "system_logs"})
req = milvus_types.GetMetricsRequest(request=system_logs_req)
resp = handler._stub.GetMetrics(req, wait_for_ready=True, timeout=None)
print(resp)
client.close()