From 4f8d3ad77a249c630fb88667f3ff630607e3d420 Mon Sep 17 00:00:00 2001 From: Ghazanfar Ali Date: Wed, 8 Jan 2020 17:58:03 -0600 Subject: [PATCH] Hostfrequencyscaling (#140) * Added Host Frequency Scaling Module and Related Extensions. * buildGraph now looks for in mutations as well as out * buildGraph: also search for in-mutations when building * adding cpuburn module in yaml file * error due to const name changes * Adds websocket * revamping frequency scaling * Error correction * error fixing * fixing none to performance * adding exclude of temp none * mapping * getting back NONE mutation * set logging to 10 * making freq scaler discoverable * adding freqscaler disc * bug fix * debug statement * adding more freq * making discoverable by one module * no msg * reverse the discs * changing freq limits * Removing check * removing long debug messages * remove dup discovery of freq scal * Adding debugging statement * buildGraphStage1 rewrite of buildGraph; also no longer require unique start * adding cpuburn * thermalLog info * making thermal log configable * bug fix * fixing variable names * fixing variable * redeclaration issue * createmode * fixing format issue * add file open init * opening file single time * bug * bug * bug * naming convention * error dec * scalerconfigable plus changing thresholds * bug fix * better edge elimination in stage2 graph build; graph sanity check * buildGraph stage 2 now generates sane, discovery dependency aware graphs * freqdiscovery * time in ns * powersave only * adding schedutil scaler * perf * adjusting the thresholds * reversing back scalers to normal flow * changing normal/high temp thresholds * remove debug print statements * making thresholds config flat * error fixing * PS OVER DURATION * PSENFORCE Bug * bug fix * PSENFORCE * Bug fixing in PS Enforcement * Added time bound and thermal bound scaling * bug fixing * Debug statment * bug fix * debug statement * converting current thermal into decimal * DEBUG statement * Bug fix * bug fix * discovering perf after ps * adding node * changing mutation timeout * Adding ticker for thermal bound scenario * setting the muttaion timeout to "1s" * adding condition for powersave * renaming parameters * bug fix * fixing errors from rebase * fixing weird whitespace issue * general cleanup * removing pipxe-child and some other changes * fixing whitespacing * setting child log level back to 7 Co-authored-by: J. Lowell Wofford Co-authored-by: Kevin Pelzel --- config/pipxe.yaml | 2 + .../HostFrequencyScaler.go | 32 + .../proto/HostFrequencyScaler.pb.go | 110 +++ .../proto/HostFrequencyScaler.proto | 23 + .../HostThermal/proto/HostThermal.pb.go | 58 +- .../HostThermal/proto/HostThermal.proto | 5 +- modules/cpuburn/cpuburn.go | 2 +- .../hostfrequencyscaling.go | 629 ++++++++++++++++++ .../proto/hostfrequencyscaling.pb.go | 253 +++++++ .../proto/hostfrequencyscaling.proto | 34 + .../hostthermaldiscovery.go | 136 +++- .../proto/hostthermaldiscovery.pb.go | 134 ++-- .../proto/hostthermaldiscovery.proto | 21 +- 13 files changed, 1293 insertions(+), 146 deletions(-) create mode 100644 extensions/HostFrequencyScaler/HostFrequencyScaler.go create mode 100644 extensions/HostFrequencyScaler/proto/HostFrequencyScaler.pb.go create mode 100644 extensions/HostFrequencyScaler/proto/HostFrequencyScaler.proto create mode 100644 modules/hostfrequencyscaling/hostfrequencyscaling.go create mode 100644 modules/hostfrequencyscaling/proto/hostfrequencyscaling.pb.go create mode 100644 modules/hostfrequencyscaling/proto/hostfrequencyscaling.proto diff --git a/config/pipxe.yaml b/config/pipxe.yaml index d6d5febd4..28d2474d4 100644 --- a/config/pipxe.yaml +++ b/config/pipxe.yaml @@ -15,6 +15,7 @@ extensions: - github.com/hpc/kraken/extensions/IPv4 - github.com/hpc/kraken/extensions/RPi3 - github.com/hpc/kraken/extensions/HostThermal + - github.com/hpc/kraken/extensions/HostFrequencyScaler # included modules modules: - github.com/hpc/kraken/modules/restapi @@ -22,4 +23,5 @@ modules: - github.com/hpc/kraken/modules/rfpipower - github.com/hpc/kraken/modules/pipxe - github.com/hpc/kraken/modules/hostthermaldiscovery + - github.com/hpc/kraken/modules/hostfrequencyscaling - github.com/hpc/kraken/modules/cpuburn diff --git a/extensions/HostFrequencyScaler/HostFrequencyScaler.go b/extensions/HostFrequencyScaler/HostFrequencyScaler.go new file mode 100644 index 000000000..8b4e7a910 --- /dev/null +++ b/extensions/HostFrequencyScaler/HostFrequencyScaler.go @@ -0,0 +1,32 @@ +package hostfrequencyscaler + +import ( + "github.com/golang/protobuf/proto" + "github.com/golang/protobuf/ptypes" + "github.com/hpc/kraken/core" + pb "github.com/hpc/kraken/extensions/HostFrequencyScaler/proto" + "github.com/hpc/kraken/lib" +) + +//go:generate protoc -I ../../core/proto/include -I proto --go_out=plugins=grpc:proto proto/HostFrequencyScaler.proto + +///////////////// +// HostFrequencyScaler Object / +/////////////// + +var _ lib.Extension = HostFrequencyScaler{} + +type HostFrequencyScaler struct{} + +func (HostFrequencyScaler) New() proto.Message { + return &pb.HostFrequencyScaler{} +} + +func (r HostFrequencyScaler) Name() string { + a, _ := ptypes.MarshalAny(r.New()) + return a.GetTypeUrl() +} + +func init() { + core.Registry.RegisterExtension(HostFrequencyScaler{}) +} diff --git a/extensions/HostFrequencyScaler/proto/HostFrequencyScaler.pb.go b/extensions/HostFrequencyScaler/proto/HostFrequencyScaler.pb.go new file mode 100644 index 000000000..2f83d3b7f --- /dev/null +++ b/extensions/HostFrequencyScaler/proto/HostFrequencyScaler.pb.go @@ -0,0 +1,110 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// source: HostFrequencyScaler.proto + +package proto + +import proto "github.com/golang/protobuf/proto" +import fmt "fmt" +import math "math" + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package + +type HostFrequencyScaler_ScalerState int32 + +const ( + HostFrequencyScaler_NONE HostFrequencyScaler_ScalerState = 0 + HostFrequencyScaler_POWER_SAVE HostFrequencyScaler_ScalerState = 1 + HostFrequencyScaler_PERFORMANCE HostFrequencyScaler_ScalerState = 2 + HostFrequencyScaler_SCHEDUTIL HostFrequencyScaler_ScalerState = 3 +) + +var HostFrequencyScaler_ScalerState_name = map[int32]string{ + 0: "NONE", + 1: "POWER_SAVE", + 2: "PERFORMANCE", + 3: "SCHEDUTIL", +} +var HostFrequencyScaler_ScalerState_value = map[string]int32{ + "NONE": 0, + "POWER_SAVE": 1, + "PERFORMANCE": 2, + "SCHEDUTIL": 3, +} + +func (x HostFrequencyScaler_ScalerState) String() string { + return proto.EnumName(HostFrequencyScaler_ScalerState_name, int32(x)) +} +func (HostFrequencyScaler_ScalerState) EnumDescriptor() ([]byte, []int) { + return fileDescriptor_HostFrequencyScaler_4de6e1b7f651efb6, []int{0, 0} +} + +type HostFrequencyScaler struct { + State HostFrequencyScaler_ScalerState `protobuf:"varint,1,opt,name=state,proto3,enum=proto.HostFrequencyScaler_ScalerState" json:"state,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *HostFrequencyScaler) Reset() { *m = HostFrequencyScaler{} } +func (m *HostFrequencyScaler) String() string { return proto.CompactTextString(m) } +func (*HostFrequencyScaler) ProtoMessage() {} +func (*HostFrequencyScaler) Descriptor() ([]byte, []int) { + return fileDescriptor_HostFrequencyScaler_4de6e1b7f651efb6, []int{0} +} +func (m *HostFrequencyScaler) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_HostFrequencyScaler.Unmarshal(m, b) +} +func (m *HostFrequencyScaler) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_HostFrequencyScaler.Marshal(b, m, deterministic) +} +func (dst *HostFrequencyScaler) XXX_Merge(src proto.Message) { + xxx_messageInfo_HostFrequencyScaler.Merge(dst, src) +} +func (m *HostFrequencyScaler) XXX_Size() int { + return xxx_messageInfo_HostFrequencyScaler.Size(m) +} +func (m *HostFrequencyScaler) XXX_DiscardUnknown() { + xxx_messageInfo_HostFrequencyScaler.DiscardUnknown(m) +} + +var xxx_messageInfo_HostFrequencyScaler proto.InternalMessageInfo + +func (m *HostFrequencyScaler) GetState() HostFrequencyScaler_ScalerState { + if m != nil { + return m.State + } + return HostFrequencyScaler_NONE +} + +func init() { + proto.RegisterType((*HostFrequencyScaler)(nil), "proto.HostFrequencyScaler") + proto.RegisterEnum("proto.HostFrequencyScaler_ScalerState", HostFrequencyScaler_ScalerState_name, HostFrequencyScaler_ScalerState_value) +} + +func init() { + proto.RegisterFile("HostFrequencyScaler.proto", fileDescriptor_HostFrequencyScaler_4de6e1b7f651efb6) +} + +var fileDescriptor_HostFrequencyScaler_4de6e1b7f651efb6 = []byte{ + // 161 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0x92, 0xf4, 0xc8, 0x2f, 0x2e, + 0x71, 0x2b, 0x4a, 0x2d, 0x2c, 0x4d, 0xcd, 0x4b, 0xae, 0x0c, 0x4e, 0x4e, 0xcc, 0x49, 0x2d, 0xd2, + 0x2b, 0x28, 0xca, 0x2f, 0xc9, 0x17, 0x62, 0x05, 0x53, 0x4a, 0x73, 0x18, 0xb9, 0x84, 0xb1, 0x28, + 0x12, 0xb2, 0xe1, 0x62, 0x2d, 0x2e, 0x49, 0x2c, 0x49, 0x95, 0x60, 0x54, 0x60, 0xd4, 0xe0, 0x33, + 0x52, 0x83, 0xe8, 0xd2, 0xc3, 0x66, 0x1e, 0x84, 0x0a, 0x06, 0xa9, 0x0e, 0x82, 0x68, 0x52, 0x72, + 0xe7, 0xe2, 0x46, 0x12, 0x15, 0xe2, 0xe0, 0x62, 0xf1, 0xf3, 0xf7, 0x73, 0x15, 0x60, 0x10, 0xe2, + 0xe3, 0xe2, 0x0a, 0xf0, 0x0f, 0x77, 0x0d, 0x8a, 0x0f, 0x76, 0x0c, 0x73, 0x15, 0x60, 0x14, 0xe2, + 0xe7, 0xe2, 0x0e, 0x70, 0x0d, 0x72, 0xf3, 0x0f, 0xf2, 0x75, 0xf4, 0x73, 0x76, 0x15, 0x60, 0x12, + 0xe2, 0xe5, 0xe2, 0x0c, 0x76, 0xf6, 0x70, 0x75, 0x09, 0x0d, 0xf1, 0xf4, 0x11, 0x60, 0x4e, 0x62, + 0x03, 0x5b, 0x6b, 0x0c, 0x08, 0x00, 0x00, 0xff, 0xff, 0x61, 0x02, 0x2d, 0xc6, 0xc9, 0x00, 0x00, + 0x00, +} diff --git a/extensions/HostFrequencyScaler/proto/HostFrequencyScaler.proto b/extensions/HostFrequencyScaler/proto/HostFrequencyScaler.proto new file mode 100644 index 000000000..8bca56cc3 --- /dev/null +++ b/extensions/HostFrequencyScaler/proto/HostFrequencyScaler.proto @@ -0,0 +1,23 @@ +/* HostFrequencyScaler.proto: describes host specific CPU frquency scaling policy objects + * + * Author: Ghazanfar Ali , Kevin Pelzel ;J. Lowell Wofford + * + * This software is open source software available under the BSD-3 license. + * Copyright (c) 2019, Triad National Security, LLC + * See LICENSE file for details. + */ + +syntax = "proto3"; +package proto; + +message HostFrequencyScaler{ + enum ScalerState { + NONE = 0; + POWER_SAVE = 1; + PERFORMANCE = 2; + SCHEDUTIL = 3; + + } + ScalerState state = 1; + +} \ No newline at end of file diff --git a/extensions/HostThermal/proto/HostThermal.pb.go b/extensions/HostThermal/proto/HostThermal.pb.go index 22319cfb9..4e6c53519 100644 --- a/extensions/HostThermal/proto/HostThermal.pb.go +++ b/extensions/HostThermal/proto/HostThermal.pb.go @@ -18,47 +18,47 @@ var _ = math.Inf // proto package needs to be updated. const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package -type HostThermal_CPU_TEMP_STATE int32 +type HostThermalCpuState int32 const ( - HostThermal_CPU_TEMP_NONE HostThermal_CPU_TEMP_STATE = 0 - HostThermal_CPU_TEMP_NORMAL HostThermal_CPU_TEMP_STATE = 1 - HostThermal_CPU_TEMP_HIGH HostThermal_CPU_TEMP_STATE = 2 - HostThermal_CPU_TEMP_CRITICAL HostThermal_CPU_TEMP_STATE = 3 + HostThermal_CPU_TEMP_NONE HostThermalCpuState = 0 + HostThermal_CPU_TEMP_NORMAL HostThermalCpuState = 1 + HostThermal_CPU_TEMP_HIGH HostThermalCpuState = 2 + HostThermal_CPU_TEMP_CRITICAL HostThermalCpuState = 3 ) -var HostThermal_CPU_TEMP_STATE_name = map[int32]string{ +var HostThermalCpuState_name = map[int32]string{ 0: "CPU_TEMP_NONE", 1: "CPU_TEMP_NORMAL", 2: "CPU_TEMP_HIGH", 3: "CPU_TEMP_CRITICAL", } -var HostThermal_CPU_TEMP_STATE_value = map[string]int32{ +var HostThermalCpuState_value = map[string]int32{ "CPU_TEMP_NONE": 0, "CPU_TEMP_NORMAL": 1, "CPU_TEMP_HIGH": 2, "CPU_TEMP_CRITICAL": 3, } -func (x HostThermal_CPU_TEMP_STATE) String() string { - return proto.EnumName(HostThermal_CPU_TEMP_STATE_name, int32(x)) +func (x HostThermalCpuState) String() string { + return proto.EnumName(HostThermalCpuState_name, int32(x)) } -func (HostThermal_CPU_TEMP_STATE) EnumDescriptor() ([]byte, []int) { - return fileDescriptor_HostThermal_a0a9c7392ef05d23, []int{0, 0} +func (HostThermalCpuState) EnumDescriptor() ([]byte, []int) { + return fileDescriptor_HostThermal_8fa7e7c7b793310f, []int{0, 0} } type HostThermal struct { - State HostThermal_CPU_TEMP_STATE `protobuf:"varint,1,opt,name=state,proto3,enum=proto.HostThermal_CPU_TEMP_STATE" json:"state,omitempty"` - XXX_NoUnkeyedLiteral struct{} `json:"-"` - XXX_unrecognized []byte `json:"-"` - XXX_sizecache int32 `json:"-"` + State HostThermalCpuState `protobuf:"varint,1,opt,name=state,proto3,enum=proto.HostThermalCpuState" json:"state,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` } func (m *HostThermal) Reset() { *m = HostThermal{} } func (m *HostThermal) String() string { return proto.CompactTextString(m) } func (*HostThermal) ProtoMessage() {} func (*HostThermal) Descriptor() ([]byte, []int) { - return fileDescriptor_HostThermal_a0a9c7392ef05d23, []int{0} + return fileDescriptor_HostThermal_8fa7e7c7b793310f, []int{0} } func (m *HostThermal) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_HostThermal.Unmarshal(m, b) @@ -78,7 +78,7 @@ func (m *HostThermal) XXX_DiscardUnknown() { var xxx_messageInfo_HostThermal proto.InternalMessageInfo -func (m *HostThermal) GetState() HostThermal_CPU_TEMP_STATE { +func (m *HostThermal) GetState() HostThermalCpuState { if m != nil { return m.State } @@ -87,21 +87,21 @@ func (m *HostThermal) GetState() HostThermal_CPU_TEMP_STATE { func init() { proto.RegisterType((*HostThermal)(nil), "proto.HostThermal") - proto.RegisterEnum("proto.HostThermal_CPU_TEMP_STATE", HostThermal_CPU_TEMP_STATE_name, HostThermal_CPU_TEMP_STATE_value) + proto.RegisterEnum("proto.HostThermalCpuState", HostThermalCpuState_name, HostThermalCpuState_value) } -func init() { proto.RegisterFile("HostThermal.proto", fileDescriptor_HostThermal_a0a9c7392ef05d23) } +func init() { proto.RegisterFile("HostThermal.proto", fileDescriptor_HostThermal_8fa7e7c7b793310f) } -var fileDescriptor_HostThermal_a0a9c7392ef05d23 = []byte{ - // 152 bytes of a gzipped FileDescriptorProto +var fileDescriptor_HostThermal_8fa7e7c7b793310f = []byte{ + // 151 bytes of a gzipped FileDescriptorProto 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0x12, 0xf4, 0xc8, 0x2f, 0x2e, 0x09, 0xc9, 0x48, 0x2d, 0xca, 0x4d, 0xcc, 0xd1, 0x2b, 0x28, 0xca, 0x2f, 0xc9, 0x17, 0x62, 0x05, - 0x53, 0x4a, 0xab, 0x18, 0xb9, 0xb8, 0x91, 0x24, 0x85, 0xcc, 0xb9, 0x58, 0x8b, 0x4b, 0x12, 0x4b, - 0x52, 0x25, 0x18, 0x15, 0x18, 0x35, 0xf8, 0x8c, 0x14, 0x21, 0xaa, 0xf5, 0x90, 0xf5, 0x3b, 0x07, - 0x84, 0xc6, 0x87, 0xb8, 0xfa, 0x06, 0xc4, 0x07, 0x87, 0x38, 0x86, 0xb8, 0x06, 0x41, 0xd4, 0x2b, - 0x25, 0x71, 0xf1, 0xa1, 0x4a, 0x08, 0x09, 0x72, 0xf1, 0xc2, 0x45, 0xfc, 0xfc, 0xfd, 0x5c, 0x05, - 0x18, 0x84, 0x84, 0xb9, 0xf8, 0x91, 0x84, 0x82, 0x7c, 0x1d, 0x7d, 0x04, 0x18, 0x51, 0xd4, 0x79, - 0x78, 0xba, 0x7b, 0x08, 0x30, 0x09, 0x89, 0x72, 0x09, 0xc2, 0x85, 0x9c, 0x83, 0x3c, 0x43, 0x3c, - 0x9d, 0x1d, 0x7d, 0x04, 0x98, 0x93, 0xd8, 0xc0, 0x8e, 0x31, 0x06, 0x04, 0x00, 0x00, 0xff, 0xff, - 0x26, 0x83, 0x23, 0x81, 0xcf, 0x00, 0x00, 0x00, + 0x53, 0x4a, 0xf3, 0x18, 0xb9, 0xb8, 0x91, 0x24, 0x85, 0x0c, 0xb9, 0x58, 0x8b, 0x4b, 0x12, 0x4b, + 0x52, 0x25, 0x18, 0x15, 0x18, 0x35, 0xf8, 0x8c, 0xa4, 0x21, 0xaa, 0xf5, 0x90, 0xf5, 0x27, 0x17, + 0x94, 0x06, 0x83, 0x94, 0x04, 0x41, 0x54, 0x2a, 0xc5, 0x70, 0x71, 0xc0, 0x84, 0x84, 0x04, 0xb9, + 0x78, 0x9d, 0x03, 0x42, 0xe3, 0x43, 0x5c, 0x7d, 0x03, 0xe2, 0xfd, 0xfc, 0xfd, 0x5c, 0x05, 0x18, + 0x84, 0x84, 0xb9, 0xf8, 0x91, 0x84, 0x82, 0x7c, 0x1d, 0x7d, 0x04, 0x18, 0x51, 0xd4, 0x79, 0x78, + 0xba, 0x7b, 0x08, 0x30, 0x09, 0x89, 0x72, 0x09, 0xc2, 0x85, 0x9c, 0x83, 0x3c, 0x43, 0x3c, 0x9d, + 0x1d, 0x7d, 0x04, 0x98, 0x93, 0xd8, 0xc0, 0x0e, 0x30, 0x06, 0x04, 0x00, 0x00, 0xff, 0xff, 0xe7, + 0xf7, 0x25, 0x52, 0xc3, 0x00, 0x00, 0x00, } diff --git a/extensions/HostThermal/proto/HostThermal.proto b/extensions/HostThermal/proto/HostThermal.proto index 607114ae6..897f0221f 100644 --- a/extensions/HostThermal/proto/HostThermal.proto +++ b/extensions/HostThermal/proto/HostThermal.proto @@ -11,13 +11,12 @@ package proto; message HostThermal { - enum hostThermalState { + enum cpuState { CPU_TEMP_NONE = 0; CPU_TEMP_NORMAL = 1; CPU_TEMP_HIGH = 2; CPU_TEMP_CRITICAL = 3; - //TEMP_WAIT = 1; // Thermal request sent to node } - hostThermalState state = 1; + cpuState state = 1; } \ No newline at end of file diff --git a/modules/cpuburn/cpuburn.go b/modules/cpuburn/cpuburn.go index 2d4cc7bb5..4d05713e5 100644 --- a/modules/cpuburn/cpuburn.go +++ b/modules/cpuburn/cpuburn.go @@ -124,7 +124,7 @@ func (*CPUBurn) NewConfig() (r proto.Message) { ThermalThrottle: true, ThermalPoll: 1, ThermalResume: 60000, - ThermalCrit: 85000, + ThermalCrit: 98000, Workers: 4, WorkersThrottled: 0, } diff --git a/modules/hostfrequencyscaling/hostfrequencyscaling.go b/modules/hostfrequencyscaling/hostfrequencyscaling.go new file mode 100644 index 000000000..ff0a754ee --- /dev/null +++ b/modules/hostfrequencyscaling/hostfrequencyscaling.go @@ -0,0 +1,629 @@ +/* hostfrequencyscaling.go: performs mutations related to scaling of CPU frequency to control CPU thermal conditions of HPC node using in-band mechanism. + * + * This module boots HPC nodes with "schedutil" scaling governor and whenever CPU temperature reaches to high (warning) condition, module mutates the scaling governor to "powersave". + * Current implementation handles critical CPU temperature same as high CPU temperature. + * + * Additionally, there are many other mutations intended for different use cases (e.g. switching back to "schedutil" after "powersave") are under considerations and investinations. + * + * Authors: Ghazanfar Ali, ghazanfar.ali@ttu.edu; Kevin Pelzel ; J. Lowell Wofford + * + * This software is open source software available under the BSD-3 license. + * Copyright (c) 2019, Triad National Security, LLC + * See LICENSE file for details. + */ + +package hostfrequencyscaling + +import ( + "fmt" + "io/ioutil" + "log" + "net" + "os" + "reflect" + "strconv" + "strings" + "sync" + "time" + + "github.com/golang/protobuf/proto" + "github.com/golang/protobuf/ptypes" + "github.com/hpc/kraken/core" + cpb "github.com/hpc/kraken/core/proto" + scalpb "github.com/hpc/kraken/extensions/HostFrequencyScaler/proto" + hostthpb "github.com/hpc/kraken/extensions/HostThermal/proto" + "github.com/hpc/kraken/lib" + pb "github.com/hpc/kraken/modules/hostfrequencyscaling/proto" +) + +// CPUPerfScalingReq is payload for RFAggregator API call +type CPUPerfScalingReq struct { + ScalingGovernor string `json:"scalinggovernor"` + ScalingMinFreq string `json:"scalingminfreq"` + ScalingMaxFreq string `json:"scalingmaxfreq"` + NodesAddressList []string `json:"nodesaddresslist,omitempty"` + Timeout int `json:"timeout,omitempty"` +} + +// CPUPerfScalingResp structure for API response from RFAggregator +type CPUPerfScalingResp struct { + TimeStamp time.Time `json:"timestamp"` + HostAddress string `json:"hostaddress"` + CurScalingGovernor string `json:"curscalinggovernor"` + ScalingMinFreq string `json:"scalingminfreq"` + ScalingMaxFreq string `json:"scalingmaxfreq"` + ScalingCurFreq string `json:"scalingcurfreq"` + + CPUCurFreq string `json:"cpucurfreq"` + CPUMinFreq string `json:"cpuminfreq"` + CPUMaxFreq string `json:"cpumaxfreq"` +} + +// CPUPerfScalingRespColl structure for collection of response +type CPUPerfScalingRespColl struct { + CPUPerfScalingRespCollection []CPUPerfScalingResp `json:"cpuperfscalingrespcollection"` +} + +const ( + // PxeURL refers to PXE object + pxeURL string = "type.googleapis.com/proto.RPi3/Pxe" + + // ModuleStateURL refers to module state + moduleStateURL string = "/Services/hostfrequencyscaling/State" + + // HostThermalStateURL points to Thermal extension + hostThermalStateURL string = "type.googleapis.com/proto.HostThermal/State" + + // NodeIPURL provides node IP address + nodeIPURL string = "type.googleapis.com/proto.IPv4OverEthernet/Ifaces/0/Ip/Ip" + + // hostFreqScalerURL provides URL for host frequency scaler at host run time + hostFreqScalerURL string = "type.googleapis.com/proto.HostFrequencyScaler/State" + + // freqSensorPath holds frequency sensor path on pi node + freqSensorPath string = "/sys/devices/system/cpu/cpufreq/policy0/" + + // thermalSensorUrl holds thermal sensor path on pi node + thermalSensorURL string = "/sys/devices/virtual/thermal/thermal_zone0/temp" +) + +var profileMap = map[string]string{ + "performance": scalpb.HostFrequencyScaler_PERFORMANCE.String(), + "powersave": scalpb.HostFrequencyScaler_POWER_SAVE.String(), + "schedutil": scalpb.HostFrequencyScaler_SCHEDUTIL.String(), +} + +type hfscalmut struct { + f scalpb.HostFrequencyScaler_ScalerState + t scalpb.HostFrequencyScaler_ScalerState + reqs map[string]reflect.Value + timeout string + failTo string +} + +// modify these if you want different requires for mutations +var scalerReqs = map[string]reflect.Value{ + "/PhysState": reflect.ValueOf(cpb.Node_POWER_ON), + "/RunState": reflect.ValueOf(cpb.Node_SYNC), + moduleStateURL: reflect.ValueOf(cpb.ServiceInstance_RUN), +} +var scalMuts = map[string]hfscalmut{ + "NONEtoPOWERSAVE": { + f: scalpb.HostFrequencyScaler_NONE, + t: scalpb.HostFrequencyScaler_POWER_SAVE, + reqs: scalerReqs, + timeout: "1s", + failTo: scalpb.HostFrequencyScaler_NONE.String(), + }, + "PERFORMANCEtoPOWERSAVE": { + f: scalpb.HostFrequencyScaler_PERFORMANCE, + t: scalpb.HostFrequencyScaler_POWER_SAVE, + reqs: scalerReqs, + timeout: "1s", + failTo: scalpb.HostFrequencyScaler_PERFORMANCE.String(), + }, + "NONEtoPERFORMANCE": { + f: scalpb.HostFrequencyScaler_NONE, + t: scalpb.HostFrequencyScaler_PERFORMANCE, + reqs: scalerReqs, + timeout: "1s", + failTo: scalpb.HostFrequencyScaler_NONE.String(), + }, + "POWERSAVEtoPERFORMANCE": { + f: scalpb.HostFrequencyScaler_POWER_SAVE, + t: scalpb.HostFrequencyScaler_PERFORMANCE, + reqs: map[string]reflect.Value{ + "/PhysState": reflect.ValueOf(cpb.Node_POWER_ON), + "/RunState": reflect.ValueOf(cpb.Node_SYNC), + moduleStateURL: reflect.ValueOf(cpb.ServiceInstance_RUN), + hostThermalStateURL: reflect.ValueOf(hostthpb.HostThermal_CPU_TEMP_NORMAL), + }, + timeout: "1s", + failTo: scalpb.HostFrequencyScaler_POWER_SAVE.String(), + }, +} + +// Structure for mutation defintion +type hfsmut struct { + f hostthpb.HostThermalCpuState + t hostthpb.HostThermalCpuState + reqs map[string]reflect.Value + timeout string + failTo string +} + +// Mutations supported by this module +var muts = map[string]hfsmut{ + + "CPU_TEMP_NONEtoCPU_TEMP_NORMAL": { + f: hostthpb.HostThermal_CPU_TEMP_NONE, + t: hostthpb.HostThermal_CPU_TEMP_NORMAL, + reqs: reqs, + timeout: "1s", + failTo: hostthpb.HostThermal_CPU_TEMP_NONE.String(), + }, + "CPU_TEMP_NONEtoCPU_TEMP_HIGH": { + f: hostthpb.HostThermal_CPU_TEMP_NONE, + t: hostthpb.HostThermal_CPU_TEMP_HIGH, + reqs: reqs, + timeout: "1s", + failTo: hostthpb.HostThermal_CPU_TEMP_NONE.String(), + }, + "CPU_TEMP_NONEtoCPU_TEMP_CRITICAL": { + f: hostthpb.HostThermal_CPU_TEMP_NONE, + t: hostthpb.HostThermal_CPU_TEMP_CRITICAL, + reqs: reqs, + timeout: "1s", + failTo: hostthpb.HostThermal_CPU_TEMP_NONE.String(), + }, + + "CPU_TEMP_HIGHtoCPU_TEMP_NORMAL": { + f: hostthpb.HostThermal_CPU_TEMP_HIGH, + t: hostthpb.HostThermal_CPU_TEMP_NORMAL, + reqs: greqs, + timeout: "1s", + failTo: hostthpb.HostThermal_CPU_TEMP_HIGH.String(), + }, + + "CPU_TEMP_CRITICALtoCPU_TEMP_HIGH": { + f: hostthpb.HostThermal_CPU_TEMP_CRITICAL, + t: hostthpb.HostThermal_CPU_TEMP_HIGH, + reqs: greqs, + timeout: "1s", + failTo: hostthpb.HostThermal_CPU_TEMP_CRITICAL.String(), + }, + + "CPU_TEMP_CRITICALtoCPU_TEMP_NORMAL": { + f: hostthpb.HostThermal_CPU_TEMP_CRITICAL, + t: hostthpb.HostThermal_CPU_TEMP_NORMAL, + reqs: greqs, + timeout: "1s", + failTo: hostthpb.HostThermal_CPU_TEMP_CRITICAL.String(), + }, +} + +// HFS provides rfcpufreqscaling module capabilities +type HFS struct { + api lib.APIClient + cfg *pb.HostFreqScalingConfig + mutex *sync.Mutex + psEnforced bool + mchan <-chan lib.Event + dchan chan<- lib.Event +} + +var _ lib.Module = (*HFS)(nil) +var _ lib.ModuleWithConfig = (*HFS)(nil) +var _ lib.ModuleWithMutations = (*HFS)(nil) +var _ lib.ModuleWithDiscovery = (*HFS)(nil) +var _ lib.ModuleSelfService = (*HFS)(nil) + +// Name returns the FQDN of the module +func (*HFS) Name() string { return "github.com/hpc/kraken/modules/hostfrequencyscaling" } + +// NewConfig returns a fully initialized default config +func (*HFS) NewConfig() proto.Message { + r := &pb.HostFreqScalingConfig{ + FreqSensorUrl: freqSensorPath, + ThermalSensorUrl: thermalSensorURL, + ScalingFreqPolicy: hostFreqScalerURL, + HighToLowScaler: "powersave", + LowToHighScaler: "performance", + TimeBoundThrottleRetentionDuration: 5, + ThrottleRetention: true, + TimeBoundThrottleRetention: false, + ThermalBoundThrottleRetention: true, + ThermalBoundThrottleRetentionThreshold: 66, + FreqScalPolicies: map[string]*pb.HostFreqScalingPolicy{ + "powersave": { + ScalingGovernor: "powersave", + ScalingMinFreq: "600000", + ScalingMaxFreq: "1400000", + NodeArch: "", + NodePlatform: "", + }, + "performance": { + ScalingGovernor: "performance", + ScalingMinFreq: "600000", + ScalingMaxFreq: "1400000", + NodeArch: "", + NodePlatform: "", + }, + "schedutil": { + ScalingGovernor: "schedutil", + ScalingMinFreq: "600000", + ScalingMaxFreq: "1400000", + NodeArch: "", + NodePlatform: "", + }, + }, + } + return r +} + +// UpdateConfig updates the running config +func (hfs *HFS) UpdateConfig(cfg proto.Message) (e error) { + if rcfg, ok := cfg.(*pb.HostFreqScalingConfig); ok { + hfs.cfg = rcfg + return + } + return fmt.Errorf("invalid config type") +} + +// ConfigURL gives the any resolver URL for the config +func (*HFS) ConfigURL() string { + cfg := &pb.HostFreqScalingConfig{} + any, _ := ptypes.MarshalAny(cfg) + return any.GetTypeUrl() +} + +// SetMutationChan sets the current mutation channel +// this is generally done by the API +func (hfs *HFS) SetMutationChan(c <-chan lib.Event) { hfs.mchan = c } + +// SetDiscoveryChan sets the current discovery channel +func (hfs *HFS) SetDiscoveryChan(d chan<- lib.Event) { hfs.dchan = d } + +// modify these if you want different requires for mutations +var reqs = map[string]reflect.Value{ + "/PhysState": reflect.ValueOf(cpb.Node_POWER_ON), + "/RunState": reflect.ValueOf(cpb.Node_SYNC), + moduleStateURL: reflect.ValueOf(cpb.ServiceInstance_RUN), +} + +var greqs = map[string]reflect.Value{ + "/PhysState": reflect.ValueOf(cpb.Node_POWER_ON), + "/RunState": reflect.ValueOf(cpb.Node_SYNC), + moduleStateURL: reflect.ValueOf(cpb.ServiceInstance_RUN), + hostFreqScalerURL: reflect.ValueOf(scalpb.HostFrequencyScaler_POWER_SAVE), +} + +// modify this if you want excludes +var excs = map[string]reflect.Value{} + +// Init is used to intialize an executable module prior to entrypoint +func (hfs *HFS) Init(api lib.APIClient) { + hfs.api = api + hfs.mutex = &sync.Mutex{} + hfs.psEnforced = false + // hfs.queue = make(map[string]map[string]NMut) + hfs.cfg = hfs.NewConfig().(*pb.HostFreqScalingConfig) +} + +// Stop should perform a graceful exit +func (hfs *HFS) Stop() { + os.Exit(0) +} + +func init() { + module := &HFS{} + mutations := make(map[string]lib.StateMutation) + discovers := make(map[string]map[string]reflect.Value) + hostFreqScalerDiscs := make(map[string]reflect.Value) + hostThermDiscs := make(map[string]reflect.Value) + si := core.NewServiceInstance("hostfrequencyscaling", module.Name(), module.Entry, nil) + + hostThermDiscs[hostthpb.HostThermal_CPU_TEMP_NONE.String()] = reflect.ValueOf(hostthpb.HostThermal_CPU_TEMP_NONE) + hostThermDiscs[hostthpb.HostThermal_CPU_TEMP_NORMAL.String()] = reflect.ValueOf(hostthpb.HostThermal_CPU_TEMP_NORMAL) + hostThermDiscs[hostthpb.HostThermal_CPU_TEMP_HIGH.String()] = reflect.ValueOf(hostthpb.HostThermal_CPU_TEMP_HIGH) + hostThermDiscs[hostthpb.HostThermal_CPU_TEMP_CRITICAL.String()] = reflect.ValueOf(hostthpb.HostThermal_CPU_TEMP_CRITICAL) + + discovers[hostThermalStateURL] = hostThermDiscs + discovers[moduleStateURL] = map[string]reflect.Value{ + "RUN": reflect.ValueOf(cpb.ServiceInstance_RUN)} + + hostFreqScalerDiscs[scalpb.HostFrequencyScaler_NONE.String()] = reflect.ValueOf(scalpb.HostFrequencyScaler_NONE) + hostFreqScalerDiscs[scalpb.HostFrequencyScaler_PERFORMANCE.String()] = reflect.ValueOf(scalpb.HostFrequencyScaler_PERFORMANCE) + hostFreqScalerDiscs[scalpb.HostFrequencyScaler_POWER_SAVE.String()] = reflect.ValueOf(scalpb.HostFrequencyScaler_POWER_SAVE) + discovers[hostFreqScalerURL] = hostFreqScalerDiscs + + for k, m := range muts { + dur, _ := time.ParseDuration(m.timeout) + mutations[k] = core.NewStateMutation( + map[string][2]reflect.Value{ + hostThermalStateURL: { + reflect.ValueOf(m.f), + reflect.ValueOf(m.t), + }, + }, + m.reqs, + excs, + lib.StateMutationContext_SELF, + dur, + [3]string{si.ID(), hostThermalStateURL, m.failTo}, + ) + } + + for k, m := range scalMuts { + dur, _ := time.ParseDuration(m.timeout) + mutations[k] = core.NewStateMutation( + map[string][2]reflect.Value{ + hostFreqScalerURL: { + reflect.ValueOf(m.f), + reflect.ValueOf(m.t), + }, + }, + m.reqs, + map[string]reflect.Value{ + hostThermalStateURL: reflect.ValueOf(hostthpb.HostThermal_CPU_TEMP_NONE), + }, + lib.StateMutationContext_SELF, + dur, + [3]string{si.ID(), hostFreqScalerURL, m.failTo}, + ) + } + + // Register it all + core.Registry.RegisterModule(module) + core.Registry.RegisterServiceInstance(module, map[string]lib.ServiceInstance{si.ID(): si}) + core.Registry.RegisterMutations(si, mutations) + core.Registry.RegisterDiscoverable(si, discovers) +} + +// Entry is the module's executable entrypoint +func (hfs *HFS) Entry() { + + url := lib.NodeURLJoin(hfs.api.Self().String(), moduleStateURL) + ev := core.NewEvent( + lib.Event_DISCOVERY, + url, + &core.DiscoveryEvent{ + + URL: url, + ValueID: "RUN", + }, + ) + hfs.dchan <- ev + + for { + + select { + case m := <-hfs.mchan: + + go hfs.mutateCPUFreq(m) + + if hfs.cfg.GetThrottleRetention() == true { + if hfs.cfg.GetThermalBoundThrottleRetention() == true && hfs.psEnforced == true { + go hfs.CheckThermalThreshold() + } + } + + break + + } + } + +} + +// aggregateHandler makes calls to aggregator for the given nodes with related mutation and frequecy scaling policy +func (hfs *HFS) mutateCPUFreq(m lib.Event) { + + if m.Type() != lib.Event_STATE_MUTATION { + hfs.api.Log(lib.LLERROR, "got unexpected non-mutation event") + return + } + me := m.Data().(*core.MutationEvent) + + enforceLowFreqScaler := hfs.cfg.GetThrottleRetention() + + if enforceLowFreqScaler == true { + switch me.Mutation[1] { + case "NONEtoPOWERSAVE": + highToLowScaler := hfs.cfg.GetHighToLowScaler() + hfs.HostFrequencyScaling(me.NodeCfg, highToLowScaler) + break + case "PERFORMANCEtoPOWERSAVE": + highToLowScaler := hfs.cfg.GetHighToLowScaler() + hfs.HostFrequencyScaling(me.NodeCfg, highToLowScaler) + + if hfs.cfg.GetTimeBoundThrottleRetention() == true { + go hfs.EnforceTimeBoundScaler() + } else if hfs.cfg.GetThermalBoundThrottleRetention() == true { + hfs.mutex.Lock() + hfs.psEnforced = true + hfs.mutex.Unlock() + } + + break + case "NONEtoPERFORMANCE": + lowToHighScaler := hfs.cfg.GetLowToHighScaler() + hfs.HostFrequencyScaling(me.NodeCfg, lowToHighScaler) + break + case "POWERSAVEtoPERFORMANCE": + + hfs.mutex.Lock() + psEnforced := hfs.psEnforced + hfs.mutex.Unlock() + if psEnforced == true { + highToLowScaler := hfs.cfg.GetHighToLowScaler() + hfs.HostFrequencyScaling(me.NodeCfg, highToLowScaler) + } else { + lowToHighScaler := hfs.cfg.GetLowToHighScaler() + hfs.HostFrequencyScaling(me.NodeCfg, lowToHighScaler) + } + + break + } + + } else { + switch me.Mutation[1] { + case "NONEtoPOWERSAVE": + fallthrough + case "PERFORMANCEtoPOWERSAVE": + highToLowScaler := hfs.cfg.GetHighToLowScaler() + hfs.HostFrequencyScaling(me.NodeCfg, highToLowScaler) + break + case "NONEtoPERFORMANCE": + fallthrough + case "POWERSAVEtoPERFORMANCE": + lowToHighScaler := hfs.cfg.GetLowToHighScaler() + hfs.HostFrequencyScaling(me.NodeCfg, lowToHighScaler) + break + } + } + +} + +// CheckThermalThreshold validates whether current thermal is less than preset threshold and if so set the PS enforcement to false +func (hfs *HFS) CheckThermalThreshold() { + currentThermal := hfs.ReadCPUTemp() + thresholdThermal := hfs.cfg.GetThermalBoundThrottleRetentionThreshold() + + if (currentThermal / 1000) < thresholdThermal { + hfs.mutex.Lock() + hfs.psEnforced = false + hfs.mutex.Unlock() + + } + +} + +// EnforceTimeBoundScaler keep low frequency scaler like "powersave" for a certain duration +func (hfs *HFS) EnforceTimeBoundScaler() { + hfs.mutex.Lock() + hfs.psEnforced = true + hfs.mutex.Unlock() + + timer := time.NewTimer(time.Minute * time.Duration(hfs.cfg.GetTimeBoundThrottleRetentionDuration())) + defer timer.Stop() + + for { + + select { + case <-timer.C: + hfs.mutex.Lock() + hfs.psEnforced = false + hfs.mutex.Unlock() + break + } + + } + +} + +// ReadCPUTemp returns the current CPU thermal +func (hfs *HFS) ReadCPUTemp() int32 { + + tempSensorPath := hfs.cfg.GetThermalSensorUrl() + + cpuTemp, err := ioutil.ReadFile(tempSensorPath) + + if err != nil { + hfs.api.Logf(lib.LLERROR, "Reading CPU thermal sensor failed: %v", err) + return 0 + } + cpuTempInt, err := strconv.Atoi(strings.TrimSuffix(string(cpuTemp), "\n")) + + if err != nil { + hfs.api.Logf(lib.LLERROR, "String to Int conversion failed: %v", err) + return 0 + } + + return int32(cpuTempInt) +} + +// HostFrequencyScaling scales CPU frequency according to given parameters +func (hfs *HFS) HostFrequencyScaling(node lib.Node, freqScalPolicy string) { + + hfs.api.Logf(lib.LLERROR, "POLICY: %s", freqScalPolicy) + + freqScalPolicies := hfs.cfg.GetFreqScalPolicies() + + scalingGovernor := freqScalPolicies[freqScalPolicy].GetScalingGovernor() + scalingMinFreq := freqScalPolicies[freqScalPolicy].GetScalingMinFreq() + scalingMaxFreq := freqScalPolicies[freqScalPolicy].GetScalingMaxFreq() + + basePath := hfs.cfg.GetFreqSensorUrl() //"/sys/devices/system/cpu/cpufreq/policy0/" + + // Set the CPU frequency scaling parameters + _ = ioutil.WriteFile(basePath+"scaling_governor", []byte(scalingGovernor), 0644) + _ = ioutil.WriteFile(basePath+"scaling_max_freq", []byte(scalingMaxFreq), 0644) + _ = ioutil.WriteFile(basePath+"scaling_min_freq", []byte(scalingMinFreq), 0644) + + // Get the CPU frequency scaling parameters + bscalingGovernor, _ := ioutil.ReadFile(basePath + "scaling_governor") + bscalingMaxFreq, _ := ioutil.ReadFile(basePath + "scaling_max_freq") + bscalingMinFreq, _ := ioutil.ReadFile(basePath + "scaling_min_freq") + + cpuCurFreq, _ := ioutil.ReadFile(basePath + "cpuinfo_cur_freq") + cpuMinFreq, _ := ioutil.ReadFile(basePath + "cpuinfo_min_freq") + cpuMaxFreq, _ := ioutil.ReadFile(basePath + "cpuinfo_max_freq") + scalingCurFreq, _ := ioutil.ReadFile(basePath + "scaling_cur_freq") + + fscalingGovernor := strings.TrimSuffix(string(bscalingGovernor), "\n") + + scalingMinFreqq := strings.TrimSuffix(string(bscalingMinFreq), "\n") + scalingMaxFreqq := strings.TrimSuffix(string(bscalingMaxFreq), "\n") + scalingCurFreqq := strings.TrimSuffix(string(scalingCurFreq), "\n") + cpuCurFreqq := strings.TrimSuffix(string(cpuCurFreq), "\n") + cpuMinFreqq := strings.TrimSuffix(string(cpuMinFreq), "\n") + cpuMaxFreqq := strings.TrimSuffix(string(cpuMaxFreq), "\n") + + hostIP := GetNodeIPAddress() + + currentScalingConfig := CPUPerfScalingResp{ + TimeStamp: time.Now(), + HostAddress: hostIP, + CurScalingGovernor: fscalingGovernor, + ScalingMinFreq: scalingMinFreqq, + ScalingMaxFreq: scalingMaxFreqq, + ScalingCurFreq: scalingCurFreqq, + + CPUCurFreq: cpuCurFreqq, + CPUMinFreq: cpuMinFreqq, + CPUMaxFreq: cpuMaxFreqq, + } + + url := lib.NodeURLJoin(node.ID().String(), hostFreqScalerURL) + ev := core.NewEvent( + lib.Event_DISCOVERY, + url, + &core.DiscoveryEvent{ + URL: url, + ValueID: profileMap[currentScalingConfig.CurScalingGovernor], + }, + ) + hfs.dchan <- ev + +} + +// GetNodeIPAddress acquires node IP address +func GetNodeIPAddress() string { + + addrs, err := net.InterfaceAddrs() + if err != nil { + log.Fatalf("could not obtain host IP address: %v", err) + } + ip := "" + for _, a := range addrs { + if ipnet, ok := a.(*net.IPNet); ok && !ipnet.IP.IsLoopback() { + if ipnet.IP.To4() != nil { + ip = ipnet.IP.String() + break + } + } + } + + return ip + +} diff --git a/modules/hostfrequencyscaling/proto/hostfrequencyscaling.pb.go b/modules/hostfrequencyscaling/proto/hostfrequencyscaling.pb.go new file mode 100644 index 000000000..6011bdbc3 --- /dev/null +++ b/modules/hostfrequencyscaling/proto/hostfrequencyscaling.pb.go @@ -0,0 +1,253 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// source: hostfrequencyscaling.proto + +package proto + +import proto "github.com/golang/protobuf/proto" +import fmt "fmt" +import math "math" + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the proto package it is being compiled against. +// A compilation error at this line likely means your copy of the +// proto package needs to be updated. +const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package + +type HostFreqScalingConfig struct { + FreqScalPolicies map[string]*HostFreqScalingPolicy `protobuf:"bytes,1,rep,name=freq_scal_policies,json=freqScalPolicies,proto3" json:"freq_scal_policies,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + FreqSensorUrl string `protobuf:"bytes,2,opt,name=freq_sensor_url,json=freqSensorUrl,proto3" json:"freq_sensor_url,omitempty"` + ScalingFreqPolicy string `protobuf:"bytes,3,opt,name=scaling_freq_policy,json=scalingFreqPolicy,proto3" json:"scaling_freq_policy,omitempty"` + LowToHighScaler string `protobuf:"bytes,4,opt,name=low_to_high_scaler,json=lowToHighScaler,proto3" json:"low_to_high_scaler,omitempty"` + HighToLowScaler string `protobuf:"bytes,5,opt,name=high_to_low_scaler,json=highToLowScaler,proto3" json:"high_to_low_scaler,omitempty"` + TimeBoundThrottleRetentionDuration int32 `protobuf:"varint,6,opt,name=time_bound_throttle_retention_duration,json=timeBoundThrottleRetentionDuration,proto3" json:"time_bound_throttle_retention_duration,omitempty"` + ThrottleRetention bool `protobuf:"varint,7,opt,name=throttle_retention,json=throttleRetention,proto3" json:"throttle_retention,omitempty"` + ThermalSensorUrl string `protobuf:"bytes,8,opt,name=thermal_sensor_url,json=thermalSensorUrl,proto3" json:"thermal_sensor_url,omitempty"` + TimeBoundThrottleRetention bool `protobuf:"varint,9,opt,name=time_bound_throttle_retention,json=timeBoundThrottleRetention,proto3" json:"time_bound_throttle_retention,omitempty"` + ThermalBoundThrottleRetention bool `protobuf:"varint,10,opt,name=thermal_bound_throttle_retention,json=thermalBoundThrottleRetention,proto3" json:"thermal_bound_throttle_retention,omitempty"` + ThermalBoundThrottleRetentionThreshold int32 `protobuf:"varint,11,opt,name=thermal_bound_throttle_retention_threshold,json=thermalBoundThrottleRetentionThreshold,proto3" json:"thermal_bound_throttle_retention_threshold,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *HostFreqScalingConfig) Reset() { *m = HostFreqScalingConfig{} } +func (m *HostFreqScalingConfig) String() string { return proto.CompactTextString(m) } +func (*HostFreqScalingConfig) ProtoMessage() {} +func (*HostFreqScalingConfig) Descriptor() ([]byte, []int) { + return fileDescriptor_hostfrequencyscaling_c41765decf96698a, []int{0} +} +func (m *HostFreqScalingConfig) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_HostFreqScalingConfig.Unmarshal(m, b) +} +func (m *HostFreqScalingConfig) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_HostFreqScalingConfig.Marshal(b, m, deterministic) +} +func (dst *HostFreqScalingConfig) XXX_Merge(src proto.Message) { + xxx_messageInfo_HostFreqScalingConfig.Merge(dst, src) +} +func (m *HostFreqScalingConfig) XXX_Size() int { + return xxx_messageInfo_HostFreqScalingConfig.Size(m) +} +func (m *HostFreqScalingConfig) XXX_DiscardUnknown() { + xxx_messageInfo_HostFreqScalingConfig.DiscardUnknown(m) +} + +var xxx_messageInfo_HostFreqScalingConfig proto.InternalMessageInfo + +func (m *HostFreqScalingConfig) GetFreqScalPolicies() map[string]*HostFreqScalingPolicy { + if m != nil { + return m.FreqScalPolicies + } + return nil +} + +func (m *HostFreqScalingConfig) GetFreqSensorUrl() string { + if m != nil { + return m.FreqSensorUrl + } + return "" +} + +func (m *HostFreqScalingConfig) GetScalingFreqPolicy() string { + if m != nil { + return m.ScalingFreqPolicy + } + return "" +} + +func (m *HostFreqScalingConfig) GetLowToHighScaler() string { + if m != nil { + return m.LowToHighScaler + } + return "" +} + +func (m *HostFreqScalingConfig) GetHighToLowScaler() string { + if m != nil { + return m.HighToLowScaler + } + return "" +} + +func (m *HostFreqScalingConfig) GetTimeBoundThrottleRetentionDuration() int32 { + if m != nil { + return m.TimeBoundThrottleRetentionDuration + } + return 0 +} + +func (m *HostFreqScalingConfig) GetThrottleRetention() bool { + if m != nil { + return m.ThrottleRetention + } + return false +} + +func (m *HostFreqScalingConfig) GetThermalSensorUrl() string { + if m != nil { + return m.ThermalSensorUrl + } + return "" +} + +func (m *HostFreqScalingConfig) GetTimeBoundThrottleRetention() bool { + if m != nil { + return m.TimeBoundThrottleRetention + } + return false +} + +func (m *HostFreqScalingConfig) GetThermalBoundThrottleRetention() bool { + if m != nil { + return m.ThermalBoundThrottleRetention + } + return false +} + +func (m *HostFreqScalingConfig) GetThermalBoundThrottleRetentionThreshold() int32 { + if m != nil { + return m.ThermalBoundThrottleRetentionThreshold + } + return 0 +} + +type HostFreqScalingPolicy struct { + ScalingGovernor string `protobuf:"bytes,1,opt,name=scaling_governor,json=scalingGovernor,proto3" json:"scaling_governor,omitempty"` + ScalingMinFreq string `protobuf:"bytes,2,opt,name=scaling_min_freq,json=scalingMinFreq,proto3" json:"scaling_min_freq,omitempty"` + ScalingMaxFreq string `protobuf:"bytes,3,opt,name=scaling_max_freq,json=scalingMaxFreq,proto3" json:"scaling_max_freq,omitempty"` + NodeArch string `protobuf:"bytes,4,opt,name=node_arch,json=nodeArch,proto3" json:"node_arch,omitempty"` + NodePlatform string `protobuf:"bytes,5,opt,name=node_platform,json=nodePlatform,proto3" json:"node_platform,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *HostFreqScalingPolicy) Reset() { *m = HostFreqScalingPolicy{} } +func (m *HostFreqScalingPolicy) String() string { return proto.CompactTextString(m) } +func (*HostFreqScalingPolicy) ProtoMessage() {} +func (*HostFreqScalingPolicy) Descriptor() ([]byte, []int) { + return fileDescriptor_hostfrequencyscaling_c41765decf96698a, []int{1} +} +func (m *HostFreqScalingPolicy) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_HostFreqScalingPolicy.Unmarshal(m, b) +} +func (m *HostFreqScalingPolicy) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_HostFreqScalingPolicy.Marshal(b, m, deterministic) +} +func (dst *HostFreqScalingPolicy) XXX_Merge(src proto.Message) { + xxx_messageInfo_HostFreqScalingPolicy.Merge(dst, src) +} +func (m *HostFreqScalingPolicy) XXX_Size() int { + return xxx_messageInfo_HostFreqScalingPolicy.Size(m) +} +func (m *HostFreqScalingPolicy) XXX_DiscardUnknown() { + xxx_messageInfo_HostFreqScalingPolicy.DiscardUnknown(m) +} + +var xxx_messageInfo_HostFreqScalingPolicy proto.InternalMessageInfo + +func (m *HostFreqScalingPolicy) GetScalingGovernor() string { + if m != nil { + return m.ScalingGovernor + } + return "" +} + +func (m *HostFreqScalingPolicy) GetScalingMinFreq() string { + if m != nil { + return m.ScalingMinFreq + } + return "" +} + +func (m *HostFreqScalingPolicy) GetScalingMaxFreq() string { + if m != nil { + return m.ScalingMaxFreq + } + return "" +} + +func (m *HostFreqScalingPolicy) GetNodeArch() string { + if m != nil { + return m.NodeArch + } + return "" +} + +func (m *HostFreqScalingPolicy) GetNodePlatform() string { + if m != nil { + return m.NodePlatform + } + return "" +} + +func init() { + proto.RegisterType((*HostFreqScalingConfig)(nil), "proto.HostFreqScalingConfig") + proto.RegisterMapType((map[string]*HostFreqScalingPolicy)(nil), "proto.HostFreqScalingConfig.FreqScalPoliciesEntry") + proto.RegisterType((*HostFreqScalingPolicy)(nil), "proto.HostFreqScalingPolicy") +} + +func init() { + proto.RegisterFile("hostfrequencyscaling.proto", fileDescriptor_hostfrequencyscaling_c41765decf96698a) +} + +var fileDescriptor_hostfrequencyscaling_c41765decf96698a = []byte{ + // 501 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x84, 0x93, 0xcf, 0x6e, 0xd3, 0x40, + 0x10, 0xc6, 0xe5, 0x86, 0x94, 0x64, 0x43, 0x49, 0xba, 0xa8, 0x92, 0x15, 0xa8, 0x64, 0x05, 0x29, + 0x32, 0xff, 0x72, 0x08, 0x17, 0xc4, 0xad, 0xfc, 0x6b, 0x0f, 0x20, 0x55, 0x6e, 0xb8, 0x70, 0x59, + 0xb6, 0xce, 0xc6, 0x5e, 0xb1, 0xd9, 0x49, 0xd7, 0xeb, 0xa6, 0x7e, 0x22, 0x5e, 0x89, 0xc7, 0x41, + 0x3b, 0x5e, 0xab, 0xb4, 0x4d, 0xca, 0x29, 0xab, 0x6f, 0x7e, 0xf3, 0xcd, 0x64, 0xf4, 0x99, 0x0c, + 0x73, 0x28, 0xec, 0xc2, 0x88, 0x8b, 0x52, 0xe8, 0xb4, 0x2a, 0x52, 0xae, 0xa4, 0xce, 0x26, 0x2b, + 0x03, 0x16, 0x68, 0x1b, 0x7f, 0x46, 0xbf, 0x77, 0xc9, 0xc1, 0x09, 0x14, 0xf6, 0x8b, 0x11, 0x17, + 0x67, 0x35, 0xf0, 0x11, 0xf4, 0x42, 0x66, 0xf4, 0x27, 0xa1, 0xae, 0x95, 0xb9, 0x36, 0xb6, 0x02, + 0x25, 0x53, 0x29, 0x8a, 0x30, 0x88, 0x5a, 0x71, 0x6f, 0x3a, 0xad, 0x4d, 0x26, 0x1b, 0x3b, 0x27, + 0x8d, 0x72, 0xea, 0x9b, 0x3e, 0x6b, 0x6b, 0xaa, 0x64, 0xb0, 0xb8, 0x25, 0xd3, 0x31, 0xe9, 0xd7, + 0x13, 0x84, 0x2e, 0xc0, 0xb0, 0xd2, 0xa8, 0x70, 0x27, 0x0a, 0xe2, 0x6e, 0xb2, 0x87, 0x28, 0xaa, + 0xdf, 0x8d, 0xa2, 0x13, 0xf2, 0xc4, 0xef, 0xce, 0x90, 0xc7, 0x65, 0xaa, 0xb0, 0x85, 0xec, 0xbe, + 0x2f, 0xb9, 0xa1, 0xe8, 0x5c, 0xd1, 0x57, 0x84, 0x2a, 0x58, 0x33, 0x0b, 0x2c, 0x97, 0x59, 0x8e, + 0x7f, 0x40, 0x98, 0xf0, 0x01, 0xe2, 0x7d, 0x05, 0xeb, 0x19, 0x9c, 0xc8, 0x2c, 0x3f, 0x43, 0xd9, + 0xc1, 0x48, 0x59, 0x60, 0xae, 0xc9, 0xc3, 0xed, 0x1a, 0x76, 0x95, 0x19, 0x7c, 0x85, 0xb5, 0x87, + 0x13, 0x32, 0xb6, 0x72, 0x29, 0xd8, 0x39, 0x94, 0x7a, 0xce, 0x6c, 0x6e, 0xc0, 0x5a, 0x25, 0x98, + 0x11, 0x56, 0x68, 0x2b, 0x41, 0xb3, 0x79, 0x69, 0xb8, 0x7b, 0x84, 0xbb, 0x51, 0x10, 0xb7, 0x93, + 0x91, 0xa3, 0x3f, 0x38, 0x78, 0xe6, 0xd9, 0xa4, 0x41, 0x3f, 0x79, 0x92, 0xbe, 0x21, 0xf4, 0xae, + 0x51, 0xf8, 0x30, 0x0a, 0xe2, 0x4e, 0xb2, 0x6f, 0x6f, 0xb7, 0xd1, 0xd7, 0x0e, 0x17, 0x66, 0xc9, + 0xd5, 0xbf, 0x77, 0xeb, 0xe0, 0xbe, 0x03, 0x5f, 0xb9, 0x3e, 0xdd, 0x11, 0x39, 0xbc, 0x77, 0xe1, + 0xb0, 0x8b, 0x73, 0x86, 0xdb, 0xf7, 0xa4, 0xc7, 0x24, 0x6a, 0x06, 0x6e, 0x75, 0x21, 0xe8, 0x72, + 0xe8, 0xb9, 0x2d, 0x46, 0x3f, 0xc8, 0xcb, 0xff, 0x19, 0x39, 0x49, 0x14, 0x39, 0xa8, 0x79, 0xd8, + 0xc3, 0x03, 0x8e, 0xef, 0xb5, 0x9c, 0x35, 0xf4, 0x90, 0x93, 0x83, 0x8d, 0xa9, 0xa3, 0x03, 0xd2, + 0xfa, 0x25, 0xaa, 0x30, 0xc0, 0xfb, 0xb8, 0x27, 0x9d, 0x92, 0xf6, 0x25, 0x57, 0xa5, 0xc0, 0xac, + 0xf5, 0xa6, 0xcf, 0x36, 0x47, 0xb9, 0x8e, 0x52, 0x52, 0xa3, 0xef, 0x77, 0xde, 0x05, 0xa3, 0x3f, + 0xc1, 0x9d, 0x2f, 0xc5, 0xe7, 0xed, 0x05, 0x19, 0x34, 0xf9, 0xcc, 0xe0, 0x52, 0x18, 0x0d, 0xc6, + 0x0f, 0xec, 0x7b, 0xfd, 0xd8, 0xcb, 0x34, 0xbe, 0x46, 0x97, 0x52, 0x63, 0x9c, 0x7d, 0xe6, 0x1f, + 0x7b, 0xfd, 0x9b, 0xd4, 0x6e, 0xc2, 0x0d, 0x92, 0x5f, 0xd5, 0x64, 0xeb, 0x26, 0xc9, 0xaf, 0x90, + 0x7c, 0x4a, 0xba, 0x1a, 0xe6, 0x82, 0x71, 0x93, 0xe6, 0x3e, 0xe5, 0x1d, 0x27, 0x1c, 0x99, 0x34, + 0xa7, 0xcf, 0xc9, 0x1e, 0x16, 0x57, 0x8a, 0xdb, 0x05, 0x98, 0xa5, 0x4f, 0xf6, 0x23, 0x27, 0x9e, + 0x7a, 0xed, 0x7c, 0x17, 0x4f, 0xf0, 0xf6, 0x6f, 0x00, 0x00, 0x00, 0xff, 0xff, 0xdb, 0xf3, 0x89, + 0x18, 0x30, 0x04, 0x00, 0x00, +} diff --git a/modules/hostfrequencyscaling/proto/hostfrequencyscaling.proto b/modules/hostfrequencyscaling/proto/hostfrequencyscaling.proto new file mode 100644 index 000000000..04830860e --- /dev/null +++ b/modules/hostfrequencyscaling/proto/hostfrequencyscaling.proto @@ -0,0 +1,34 @@ +/* hostfrequencyscaling.proto: describes the hostfrequencyscaling object + * + * Authors: Ghazanfar Ali ,Kevin Pelzel ; J. Lowell Wofford + * + * This software is open source software available under the BSD-3 license. + * Copyright (c) 2019, Triad National Security, LLC + * See LICENSE file for details. + */ + + syntax = "proto3"; + package proto; + + message HostFreqScalingConfig { + map freq_scal_policies = 1; + string freq_sensor_url = 2; + string scaling_freq_policy = 3; + string low_to_high_scaler = 4; + string high_to_low_scaler = 5; + int32 time_bound_throttle_retention_duration = 6; + bool throttle_retention = 7; + string thermal_sensor_url = 8; + bool time_bound_throttle_retention = 9; + bool thermal_bound_throttle_retention = 10; + int32 thermal_bound_throttle_retention_threshold = 11; + +} + + message HostFreqScalingPolicy { + string scaling_governor = 1; + string scaling_min_freq = 2; + string scaling_max_freq = 3; + string node_arch = 4; + string node_platform = 5; +} \ No newline at end of file diff --git a/modules/hostthermaldiscovery/hostthermaldiscovery.go b/modules/hostthermaldiscovery/hostthermaldiscovery.go index 71c29e8c4..90e513f5a 100644 --- a/modules/hostthermaldiscovery/hostthermaldiscovery.go +++ b/modules/hostthermaldiscovery/hostthermaldiscovery.go @@ -7,8 +7,6 @@ * See LICENSE file for details. */ -//go:generate protoc -I ../../core/proto/include -I proto --go_out=plugins=grpc:proto proto/pxe.proto - package hostthermaldiscovery import ( @@ -25,7 +23,9 @@ import ( "github.com/golang/protobuf/ptypes" "github.com/hpc/kraken/core" cpb "github.com/hpc/kraken/core/proto" + scalpb "github.com/hpc/kraken/extensions/HostFrequencyScaler/proto" thpb "github.com/hpc/kraken/extensions/HostThermal/proto" + "github.com/hpc/kraken/lib" pb "github.com/hpc/kraken/modules/hostthermaldiscovery/proto" ) @@ -42,6 +42,14 @@ const ( HostThermalStateURL = "type.googleapis.com/proto.HostThermal/State" // ModuleStateURL refers to module state ModuleStateURL = "/Services/hostthermaldiscovery/State" + // hostFreqScalerURL provides URL for host frequency scaler at host run time + hostFreqScalerURL string = "type.googleapis.com/proto.HostFrequencyScaler/State" + + // freqSensorPath holds frequency sensor path on pi node + freqSensorPath string = "/sys/devices/system/cpu/cpufreq/policy0/" + + // thermalSensorUrl holds thermal sensor path on pi node + thermalSensorUrl string = "/sys/devices/virtual/thermal/thermal_zone0/temp" ) var _ lib.Module = (*HostDisc)(nil) @@ -49,13 +57,21 @@ var _ lib.ModuleWithConfig = (*HostDisc)(nil) var _ lib.ModuleWithDiscovery = (*HostDisc)(nil) var _ lib.ModuleSelfService = (*HostDisc)(nil) +var profileMap = map[string]string{ + "performance": scalpb.HostFrequencyScaler_PERFORMANCE.String(), + "powersave": scalpb.HostFrequencyScaler_POWER_SAVE.String(), + "schedutil": scalpb.HostFrequencyScaler_SCHEDUTIL.String(), +} + // HostDisc provides hostdiscovery module capabilities type HostDisc struct { - prevTemp int32 - api lib.APIClient - cfg *pb.HostDiscoveryConfig - dchan chan<- lib.Event - pollTicker *time.Ticker + prevTemp int32 + file *os.File + preFreqScaler string + api lib.APIClient + cfg *pb.HostDiscoveryConfig + dchan chan<- lib.Event + pollTicker *time.Ticker } // Name returns the FQDN of the module @@ -64,18 +80,17 @@ func (*HostDisc) Name() string { return "github.com/hpc/kraken/modules/hosttherm // NewConfig returns a fully initialized default config func (*HostDisc) NewConfig() proto.Message { r := &pb.HostDiscoveryConfig{ - PollingInterval: "10s", - TempSensorPath: "/sys/devices/virtual/thermal/thermal_zone0/temp", - ThermalThresholds: map[string]*pb.HostThermalThresholds{ - "CPUThermalThresholds": { - LowerNormal: 3000, - UpperNormal: 60000, - LowerHigh: 60000, - UpperHigh: 70000, - LowerCritical: 3000, - UpperCritical: 70000, - }, - }, + PollingInterval: "1s", + TempSensorPath: thermalSensorUrl, + FreqSensorUrl: freqSensorPath, + LogThermalData: true, + LogHere: "/tmp/ThermalLog.txt", + LowerNormal: 3000, + UpperNormal: 80000, + LowerHigh: 80000, + UpperHigh: 98000, + LowerCritical: 3000, + UpperCritical: 98000, } return r } @@ -104,6 +119,7 @@ func init() { module := &HostDisc{} discovers := make(map[string]map[string]reflect.Value) hostThermDisc := make(map[string]reflect.Value) + hostFreqScalerDiscs := make(map[string]reflect.Value) hostThermDisc[thpb.HostThermal_CPU_TEMP_NONE.String()] = reflect.ValueOf(thpb.HostThermal_CPU_TEMP_NONE) hostThermDisc[thpb.HostThermal_CPU_TEMP_NORMAL.String()] = reflect.ValueOf(thpb.HostThermal_CPU_TEMP_NORMAL) @@ -112,6 +128,11 @@ func init() { discovers[HostThermalStateURL] = hostThermDisc + hostFreqScalerDiscs[scalpb.HostFrequencyScaler_NONE.String()] = reflect.ValueOf(scalpb.HostFrequencyScaler_NONE) + hostFreqScalerDiscs[scalpb.HostFrequencyScaler_PERFORMANCE.String()] = reflect.ValueOf(scalpb.HostFrequencyScaler_PERFORMANCE) + hostFreqScalerDiscs[scalpb.HostFrequencyScaler_POWER_SAVE.String()] = reflect.ValueOf(scalpb.HostFrequencyScaler_POWER_SAVE) + discovers[hostFreqScalerURL] = hostFreqScalerDiscs + discovers[ModuleStateURL] = map[string]reflect.Value{ "RUN": reflect.ValueOf(cpb.ServiceInstance_RUN)} @@ -127,6 +148,13 @@ func init() { func (hostDisc *HostDisc) Init(api lib.APIClient) { hostDisc.api = api hostDisc.cfg = hostDisc.NewConfig().(*pb.HostDiscoveryConfig) + + var err error + hostDisc.file, err = os.OpenFile(hostDisc.cfg.GetLogHere(), os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0644) + if err != nil { + hostDisc.api.Logf(lib.LLERROR, "failed opening file: %v", err) + } + } // Stop should perform a graceful exit @@ -157,11 +185,68 @@ func (hostDisc *HostDisc) Entry() { select { case <-hostDisc.pollTicker.C: go hostDisc.discoverHostCPUTemp() + go hostDisc.DiscFreqScaler() + if hostDisc.cfg.GetLogThermalData() == true { + go hostDisc.CapturingStatData() + } + break } } } +// CapturingStatData logs thermal information +func (hostDisc *HostDisc) CapturingStatData() { + freqScaler := hostDisc.preFreqScaler + temp := hostDisc.prevTemp + t := time.Now() // / int64(time.Millisecond) + record := fmt.Sprintf("%s,%d,%s\n", t.String(), temp, freqScaler) + + _, err := hostDisc.file.WriteString(record) + if err != nil { + hostDisc.api.Logf(lib.LLERROR, "failed opening file: %v", err) + } + +} + +// DiscFreqScaler cpu frequency scaler +func (hostDisc *HostDisc) DiscFreqScaler() { + + hostFreqScaler := hostDisc.ReadFreqScaler() + + hostDisc.preFreqScaler = hostFreqScaler + + vid := profileMap[hostFreqScaler] + + url := lib.NodeURLJoin(hostDisc.api.Self().String(), hostFreqScalerURL) + + // Generating discovery event for CPU Thermal state + v := core.NewEvent( + lib.Event_DISCOVERY, + url, + &core.DiscoveryEvent{ + + URL: url, + ValueID: vid, + }, + ) + hostDisc.dchan <- v + +} + +// ReadFreqScaler cpu frequency scaler +func (hostDisc *HostDisc) ReadFreqScaler() string { + + basePath := hostDisc.cfg.GetFreqSensorUrl() + bscalingGovernor, err := ioutil.ReadFile(basePath + "scaling_governor") + if err != nil { + hostDisc.api.Logf(lib.LLERROR, "Reading CPU thermal sensor failed: %v", err) + return "" + } + + return strings.TrimSuffix(string(bscalingGovernor), "\n") +} + // discoverHostCPUTemp is used to acquire CPU thermal locally. func (hostDisc *HostDisc) discoverHostCPUTemp() { hostCPUTemp := hostDisc.GetCPUTemp() @@ -251,15 +336,14 @@ func (hostDisc *HostDisc) lambdaStateDiscovery(v CPUTempObj) (string, int32) { cpuTemp := v.CPUTemp cpuTempState := thpb.HostThermal_CPU_TEMP_NONE - cpuThermalThresholds := hostDisc.cfg.GetThermalThresholds() - lowerNormal := cpuThermalThresholds["CPUThermalThresholds"].GetLowerNormal() - upperNormal := cpuThermalThresholds["CPUThermalThresholds"].GetUpperNormal() + lowerNormal := hostDisc.cfg.GetLowerNormal() + upperNormal := hostDisc.cfg.GetUpperNormal() - lowerHigh := cpuThermalThresholds["CPUThermalThresholds"].GetLowerHigh() - upperHigh := cpuThermalThresholds["CPUThermalThresholds"].GetUpperHigh() + lowerHigh := hostDisc.cfg.GetLowerHigh() + upperHigh := hostDisc.cfg.GetUpperHigh() - lowerCritical := cpuThermalThresholds["CPUThermalThresholds"].GetLowerCritical() - upperCritical := cpuThermalThresholds["CPUThermalThresholds"].GetUpperCritical() + lowerCritical := hostDisc.cfg.GetLowerCritical() + upperCritical := hostDisc.cfg.GetUpperCritical() if cpuTemp <= lowerCritical || cpuTemp >= upperCritical { cpuTempState = thpb.HostThermal_CPU_TEMP_CRITICAL diff --git a/modules/hostthermaldiscovery/proto/hostthermaldiscovery.pb.go b/modules/hostthermaldiscovery/proto/hostthermaldiscovery.pb.go index 251613715..e7ae34582 100644 --- a/modules/hostthermaldiscovery/proto/hostthermaldiscovery.pb.go +++ b/modules/hostthermaldiscovery/proto/hostthermaldiscovery.pb.go @@ -19,19 +19,27 @@ var _ = math.Inf const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package type HostDiscoveryConfig struct { - PollingInterval string `protobuf:"bytes,1,opt,name=polling_interval,json=pollingInterval,proto3" json:"polling_interval,omitempty"` - TempSensorPath string `protobuf:"bytes,2,opt,name=temp_sensor_path,json=tempSensorPath,proto3" json:"temp_sensor_path,omitempty"` - ThermalThresholds map[string]*HostThermalThresholds `protobuf:"bytes,3,rep,name=thermal_thresholds,json=thermalThresholds,proto3" json:"thermal_thresholds,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` - XXX_NoUnkeyedLiteral struct{} `json:"-"` - XXX_unrecognized []byte `json:"-"` - XXX_sizecache int32 `json:"-"` + PollingInterval string `protobuf:"bytes,1,opt,name=polling_interval,json=pollingInterval,proto3" json:"polling_interval,omitempty"` + TempSensorPath string `protobuf:"bytes,2,opt,name=temp_sensor_path,json=tempSensorPath,proto3" json:"temp_sensor_path,omitempty"` + FreqSensorUrl string `protobuf:"bytes,3,opt,name=freq_sensor_url,json=freqSensorUrl,proto3" json:"freq_sensor_url,omitempty"` + LogThermalData bool `protobuf:"varint,4,opt,name=log_thermal_data,json=logThermalData,proto3" json:"log_thermal_data,omitempty"` + LogHere string `protobuf:"bytes,5,opt,name=log_here,json=logHere,proto3" json:"log_here,omitempty"` + LowerNormal int32 `protobuf:"varint,6,opt,name=lower_normal,json=lowerNormal,proto3" json:"lower_normal,omitempty"` + UpperNormal int32 `protobuf:"varint,7,opt,name=upper_normal,json=upperNormal,proto3" json:"upper_normal,omitempty"` + LowerHigh int32 `protobuf:"varint,8,opt,name=lower_high,json=lowerHigh,proto3" json:"lower_high,omitempty"` + UpperHigh int32 `protobuf:"varint,9,opt,name=upper_high,json=upperHigh,proto3" json:"upper_high,omitempty"` + LowerCritical int32 `protobuf:"varint,10,opt,name=lower_critical,json=lowerCritical,proto3" json:"lower_critical,omitempty"` + UpperCritical int32 `protobuf:"varint,11,opt,name=upper_critical,json=upperCritical,proto3" json:"upper_critical,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` } func (m *HostDiscoveryConfig) Reset() { *m = HostDiscoveryConfig{} } func (m *HostDiscoveryConfig) String() string { return proto.CompactTextString(m) } func (*HostDiscoveryConfig) ProtoMessage() {} func (*HostDiscoveryConfig) Descriptor() ([]byte, []int) { - return fileDescriptor_hostthermaldiscovery_063889daa550b856, []int{0} + return fileDescriptor_hostthermaldiscovery_01a159d510988b58, []int{0} } func (m *HostDiscoveryConfig) XXX_Unmarshal(b []byte) error { return xxx_messageInfo_HostDiscoveryConfig.Unmarshal(m, b) @@ -65,85 +73,63 @@ func (m *HostDiscoveryConfig) GetTempSensorPath() string { return "" } -func (m *HostDiscoveryConfig) GetThermalThresholds() map[string]*HostThermalThresholds { +func (m *HostDiscoveryConfig) GetFreqSensorUrl() string { if m != nil { - return m.ThermalThresholds + return m.FreqSensorUrl } - return nil + return "" } -type HostThermalThresholds struct { - LowerNormal int32 `protobuf:"varint,1,opt,name=lower_normal,json=lowerNormal,proto3" json:"lower_normal,omitempty"` - UpperNormal int32 `protobuf:"varint,2,opt,name=upper_normal,json=upperNormal,proto3" json:"upper_normal,omitempty"` - LowerHigh int32 `protobuf:"varint,3,opt,name=lower_high,json=lowerHigh,proto3" json:"lower_high,omitempty"` - UpperHigh int32 `protobuf:"varint,4,opt,name=upper_high,json=upperHigh,proto3" json:"upper_high,omitempty"` - LowerCritical int32 `protobuf:"varint,5,opt,name=lower_critical,json=lowerCritical,proto3" json:"lower_critical,omitempty"` - UpperCritical int32 `protobuf:"varint,6,opt,name=upper_critical,json=upperCritical,proto3" json:"upper_critical,omitempty"` - XXX_NoUnkeyedLiteral struct{} `json:"-"` - XXX_unrecognized []byte `json:"-"` - XXX_sizecache int32 `json:"-"` +func (m *HostDiscoveryConfig) GetLogThermalData() bool { + if m != nil { + return m.LogThermalData + } + return false } -func (m *HostThermalThresholds) Reset() { *m = HostThermalThresholds{} } -func (m *HostThermalThresholds) String() string { return proto.CompactTextString(m) } -func (*HostThermalThresholds) ProtoMessage() {} -func (*HostThermalThresholds) Descriptor() ([]byte, []int) { - return fileDescriptor_hostthermaldiscovery_063889daa550b856, []int{1} -} -func (m *HostThermalThresholds) XXX_Unmarshal(b []byte) error { - return xxx_messageInfo_HostThermalThresholds.Unmarshal(m, b) -} -func (m *HostThermalThresholds) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { - return xxx_messageInfo_HostThermalThresholds.Marshal(b, m, deterministic) -} -func (dst *HostThermalThresholds) XXX_Merge(src proto.Message) { - xxx_messageInfo_HostThermalThresholds.Merge(dst, src) -} -func (m *HostThermalThresholds) XXX_Size() int { - return xxx_messageInfo_HostThermalThresholds.Size(m) -} -func (m *HostThermalThresholds) XXX_DiscardUnknown() { - xxx_messageInfo_HostThermalThresholds.DiscardUnknown(m) +func (m *HostDiscoveryConfig) GetLogHere() string { + if m != nil { + return m.LogHere + } + return "" } -var xxx_messageInfo_HostThermalThresholds proto.InternalMessageInfo - -func (m *HostThermalThresholds) GetLowerNormal() int32 { +func (m *HostDiscoveryConfig) GetLowerNormal() int32 { if m != nil { return m.LowerNormal } return 0 } -func (m *HostThermalThresholds) GetUpperNormal() int32 { +func (m *HostDiscoveryConfig) GetUpperNormal() int32 { if m != nil { return m.UpperNormal } return 0 } -func (m *HostThermalThresholds) GetLowerHigh() int32 { +func (m *HostDiscoveryConfig) GetLowerHigh() int32 { if m != nil { return m.LowerHigh } return 0 } -func (m *HostThermalThresholds) GetUpperHigh() int32 { +func (m *HostDiscoveryConfig) GetUpperHigh() int32 { if m != nil { return m.UpperHigh } return 0 } -func (m *HostThermalThresholds) GetLowerCritical() int32 { +func (m *HostDiscoveryConfig) GetLowerCritical() int32 { if m != nil { return m.LowerCritical } return 0 } -func (m *HostThermalThresholds) GetUpperCritical() int32 { +func (m *HostDiscoveryConfig) GetUpperCritical() int32 { if m != nil { return m.UpperCritical } @@ -152,35 +138,31 @@ func (m *HostThermalThresholds) GetUpperCritical() int32 { func init() { proto.RegisterType((*HostDiscoveryConfig)(nil), "proto.HostDiscoveryConfig") - proto.RegisterMapType((map[string]*HostThermalThresholds)(nil), "proto.HostDiscoveryConfig.ThermalThresholdsEntry") - proto.RegisterType((*HostThermalThresholds)(nil), "proto.HostThermalThresholds") } func init() { - proto.RegisterFile("hostthermaldiscovery.proto", fileDescriptor_hostthermaldiscovery_063889daa550b856) -} - -var fileDescriptor_hostthermaldiscovery_063889daa550b856 = []byte{ - // 327 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x6c, 0x90, 0xd1, 0x4a, 0xc3, 0x30, - 0x14, 0x86, 0x69, 0x6b, 0x07, 0x3b, 0xd3, 0x39, 0x23, 0x4a, 0x19, 0x0a, 0x73, 0x20, 0xcc, 0x9b, - 0x81, 0xf3, 0x46, 0xbc, 0x9d, 0xc2, 0xbc, 0x11, 0xa9, 0xbb, 0xaf, 0xdd, 0x16, 0x9b, 0x60, 0xd6, - 0x94, 0xe4, 0x6c, 0xb2, 0xb7, 0xf2, 0xb5, 0x7c, 0x0b, 0xe9, 0x49, 0x37, 0x45, 0x7b, 0xd5, 0xf2, - 0xfd, 0xdf, 0x39, 0xf9, 0x13, 0xe8, 0x0a, 0x6d, 0x11, 0x05, 0x37, 0xcb, 0x54, 0x2d, 0xa4, 0x9d, - 0xeb, 0x35, 0x37, 0x9b, 0x61, 0x61, 0x34, 0x6a, 0x16, 0xd2, 0xa7, 0xff, 0xe9, 0xc3, 0xf1, 0x44, - 0x5b, 0xbc, 0xdf, 0xc6, 0x63, 0x9d, 0xbf, 0xc9, 0x8c, 0x5d, 0x41, 0xa7, 0xd0, 0x4a, 0xc9, 0x3c, - 0x4b, 0x64, 0x8e, 0xdc, 0xac, 0x53, 0x15, 0x79, 0x3d, 0x6f, 0xd0, 0x8c, 0x0f, 0x2b, 0xfe, 0x58, - 0x61, 0x36, 0x80, 0x0e, 0xf2, 0x65, 0x91, 0x58, 0x9e, 0x5b, 0x6d, 0x92, 0x22, 0x45, 0x11, 0xf9, - 0xa4, 0xb6, 0x4b, 0xfe, 0x42, 0xf8, 0x39, 0x45, 0xc1, 0x5e, 0x81, 0x55, 0x6d, 0x12, 0x14, 0x86, - 0x5b, 0xa1, 0xd5, 0xc2, 0x46, 0x41, 0x2f, 0x18, 0xb4, 0x46, 0xd7, 0xae, 0xd7, 0xb0, 0xa6, 0xcc, - 0x70, 0xea, 0x86, 0xa6, 0xbb, 0x99, 0x87, 0x1c, 0xcd, 0x26, 0x3e, 0xc2, 0xbf, 0xbc, 0x3b, 0x83, - 0xd3, 0x7a, 0x99, 0x75, 0x20, 0x78, 0xe7, 0x9b, 0xea, 0x0e, 0xe5, 0x2f, 0x1b, 0x41, 0xb8, 0x4e, - 0xd5, 0x8a, 0x53, 0xd9, 0xd6, 0xe8, 0xec, 0x57, 0x81, 0x7f, 0x3b, 0x62, 0xa7, 0xde, 0xf9, 0xb7, - 0x5e, 0xff, 0xcb, 0x83, 0x93, 0x5a, 0x89, 0x5d, 0xc0, 0xbe, 0xd2, 0x1f, 0xdc, 0x24, 0xb9, 0x2e, - 0x13, 0x3a, 0x2c, 0x8c, 0x5b, 0xc4, 0x9e, 0x08, 0x95, 0xca, 0xaa, 0x28, 0x7e, 0x14, 0xdf, 0x29, - 0xc4, 0x2a, 0xe5, 0x1c, 0xc0, 0x6d, 0x11, 0x32, 0x13, 0x51, 0x40, 0x42, 0x93, 0xc8, 0x44, 0x66, - 0xa2, 0x8c, 0xdd, 0x06, 0x8a, 0xf7, 0x5c, 0x4c, 0x84, 0xe2, 0x4b, 0x68, 0xbb, 0xe9, 0xb9, 0x91, - 0x28, 0xe7, 0xa9, 0x8a, 0x42, 0x52, 0x0e, 0x88, 0x8e, 0x2b, 0x58, 0x6a, 0x6e, 0xcb, 0x4e, 0x6b, - 0x38, 0x8d, 0xe8, 0x56, 0x9b, 0x35, 0xe8, 0x4d, 0x6e, 0xbe, 0x03, 0x00, 0x00, 0xff, 0xff, 0xac, - 0x5a, 0x78, 0xab, 0x4a, 0x02, 0x00, 0x00, + proto.RegisterFile("hostthermaldiscovery.proto", fileDescriptor_hostthermaldiscovery_01a159d510988b58) +} + +var fileDescriptor_hostthermaldiscovery_01a159d510988b58 = []byte{ + // 298 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x44, 0xd1, 0xcb, 0x4a, 0x03, 0x31, + 0x14, 0xc6, 0x71, 0xc6, 0xda, 0x5b, 0x6a, 0x2f, 0xc4, 0x4d, 0x14, 0x84, 0x2a, 0x28, 0xe3, 0xc6, + 0x8d, 0x8f, 0xd0, 0x2e, 0xea, 0x46, 0xa4, 0xea, 0x3a, 0xc4, 0x36, 0x4d, 0x02, 0xe9, 0x9c, 0x78, + 0x26, 0xad, 0xf8, 0x48, 0xbe, 0xa5, 0xe4, 0x64, 0x46, 0x57, 0x03, 0xff, 0xf3, 0x9b, 0x8f, 0x81, + 0x61, 0x97, 0x16, 0xea, 0x18, 0xad, 0xc6, 0xbd, 0xf2, 0x5b, 0x57, 0x6f, 0xe0, 0xa8, 0xf1, 0xfb, + 0x21, 0x20, 0x44, 0xe0, 0x5d, 0x7a, 0xdc, 0xfc, 0x74, 0xd8, 0xf9, 0x0a, 0xea, 0xb8, 0x6c, 0xcf, + 0x0b, 0xa8, 0x76, 0xce, 0xf0, 0x7b, 0x36, 0x0b, 0xe0, 0xbd, 0xab, 0x8c, 0x74, 0x55, 0xd4, 0x78, + 0x54, 0x5e, 0x14, 0xf3, 0xa2, 0x1c, 0xae, 0xa7, 0x4d, 0x7f, 0x6a, 0x32, 0x2f, 0xd9, 0x2c, 0xea, + 0x7d, 0x90, 0xb5, 0xae, 0x6a, 0x40, 0x19, 0x54, 0xb4, 0xe2, 0x84, 0xe8, 0x24, 0xf5, 0x57, 0xca, + 0x2f, 0x2a, 0x5a, 0x7e, 0xc7, 0xa6, 0x3b, 0xd4, 0x9f, 0xad, 0x3c, 0xa0, 0x17, 0x1d, 0x82, 0xe3, + 0x94, 0x33, 0x7c, 0x47, 0x5a, 0xf4, 0x60, 0x64, 0xf3, 0xe5, 0x72, 0xab, 0xa2, 0x12, 0xa7, 0xf3, + 0xa2, 0x1c, 0xac, 0x27, 0x1e, 0xcc, 0x5b, 0xce, 0x4b, 0x15, 0x15, 0xbf, 0x60, 0x83, 0x24, 0xad, + 0x46, 0x2d, 0xba, 0x34, 0xd5, 0xf7, 0x60, 0x56, 0x1a, 0x35, 0xbf, 0x66, 0x67, 0x1e, 0xbe, 0x34, + 0xca, 0x0a, 0x12, 0x17, 0xbd, 0x79, 0x51, 0x76, 0xd7, 0x23, 0x6a, 0xcf, 0x94, 0x12, 0x39, 0x84, + 0xf0, 0x4f, 0xfa, 0x99, 0x50, 0x6b, 0xc8, 0x15, 0x63, 0x79, 0xc5, 0x3a, 0x63, 0xc5, 0x80, 0xc0, + 0x90, 0xca, 0xca, 0x19, 0x9b, 0xce, 0x79, 0x81, 0xce, 0xc3, 0x7c, 0xa6, 0x42, 0xe7, 0x5b, 0x36, + 0xc9, 0x6f, 0x6f, 0xd0, 0x45, 0xb7, 0x51, 0x5e, 0x30, 0x22, 0x63, 0xaa, 0x8b, 0x26, 0x26, 0x96, + 0x57, 0xfe, 0xd8, 0x28, 0x33, 0xaa, 0x2d, 0xfb, 0xe8, 0xd1, 0x2f, 0x7b, 0xfc, 0x0d, 0x00, 0x00, + 0xff, 0xff, 0x43, 0xeb, 0x7e, 0x04, 0xd7, 0x01, 0x00, 0x00, } diff --git a/modules/hostthermaldiscovery/proto/hostthermaldiscovery.proto b/modules/hostthermaldiscovery/proto/hostthermaldiscovery.proto index 639621eef..e1d6b8595 100644 --- a/modules/hostthermaldiscovery/proto/hostthermaldiscovery.proto +++ b/modules/hostthermaldiscovery/proto/hostthermaldiscovery.proto @@ -13,17 +13,16 @@ package proto; message HostDiscoveryConfig { string polling_interval = 1; string temp_sensor_path = 2; - map thermal_thresholds = 3; -} - -message HostThermalThresholds { - int32 lower_normal = 1; - int32 upper_normal = 2; + string freq_sensor_url = 3; + bool log_thermal_data = 4; + string log_here = 5; + int32 lower_normal = 6; + int32 upper_normal = 7; - int32 lower_high = 3; - int32 upper_high = 4; + int32 lower_high = 8; + int32 upper_high = 9; - int32 lower_critical = 5; - int32 upper_critical = 6; + int32 lower_critical = 10; + int32 upper_critical = 11; +} -} \ No newline at end of file