Make the number of ramp steps configurable

In #371 we introduced steps to make the scaling up possible even when
the HPA forces a 10% change. The problem is that 10% might not be
sufficient for some specific scaling scenarios.

For example, a an application targeting 12 pods and using a
ScalingSchedule with the value of 10000 to achieve that, will require a
target of 833. With 10 ramp steps the 90% bucket will return a metric of
9000 and the HPA calculates (9000/833) 10.8 pods, rounding to 11 pods.
Once the metric reaches the time to return 100% it will won't be
effective, since the change of the current number of pods (11) and the
desired one (12) is less than 10%.

This commit does not try to tackle this problem completely, since the
10% rule is not fixed, might change among different clusters and is also
dependent on the value given to each ScalingSchedule. Therefore, this
commit makes the number of ramp steps configurable via the
`--scaling-schedule-ramp-steps` config flag, defaulting to 10.

Signed-off-by: Jonathan Juares Beber <jonathanbeber@gmail.com>
This commit is contained in:
Jonathan Juares Beber
2021-10-18 18:31:55 +02:00
parent e04cd10bfc
commit fd4ead837e
4 changed files with 126 additions and 36 deletions
+7 -4
View File
@@ -128,7 +128,8 @@ func NewCommandStartAdapterServer(stopCh <-chan struct{}) *cobra.Command {
flags.DurationVar(&o.GCInterval, "garbage-collector-interval", 10*time.Minute, "Interval to clean up metrics that are stored in in-memory cache.")
flags.BoolVar(&o.ScalingScheduleMetrics, "scaling-schedule", o.ScalingScheduleMetrics, ""+
"whether to enable time-based ScalingSchedule metrics")
flags.DurationVar(&o.DefaultScheduledScalingWindow, "scaling-schedule-default-scaling-window", 10*time.Minute, "Default scale-up/scale-down window duration for scheduled metrics")
flags.DurationVar(&o.DefaultScheduledScalingWindow, "scaling-schedule-default-scaling-window", 10*time.Minute, "Default rampup and rampdown window duration for ScalingSchedules")
flags.IntVar(&o.RampSteps, "scaling-schedule-ramp-steps", 10, "Number of steps used to rampup and rampdown ScalingSchedules. It's used to guarantee won't avoid reaching the max scaling due to the 10% minimum change rule.")
return cmd
}
@@ -294,7 +295,7 @@ func (o AdapterServerOptions) RunCustomMetricsAdapterServer(stopCh <-chan struct
)
go reflector.Run(ctx.Done())
clusterPlugin, err := collector.NewClusterScalingScheduleCollectorPlugin(clusterScalingSchedulesStore, time.Now, o.DefaultScheduledScalingWindow)
clusterPlugin, err := collector.NewClusterScalingScheduleCollectorPlugin(clusterScalingSchedulesStore, time.Now, o.DefaultScheduledScalingWindow, o.RampSteps)
if err != nil {
return fmt.Errorf("unable to create ClusterScalingScheduleCollector plugin: %v", err)
}
@@ -303,7 +304,7 @@ func (o AdapterServerOptions) RunCustomMetricsAdapterServer(stopCh <-chan struct
return fmt.Errorf("failed to register ClusterScalingSchedule object collector plugin: %v", err)
}
plugin, err := collector.NewScalingScheduleCollectorPlugin(scalingSchedulesStore, time.Now, o.DefaultScheduledScalingWindow)
plugin, err := collector.NewScalingScheduleCollectorPlugin(scalingSchedulesStore, time.Now, o.DefaultScheduledScalingWindow, o.RampSteps)
if err != nil {
return fmt.Errorf("unable to create ScalingScheduleCollector plugin: %v", err)
}
@@ -429,6 +430,8 @@ type AdapterServerOptions struct {
GCInterval time.Duration
// Time-based scaling based on the CRDs ScheduleScaling and ClusterScheduleScaling.
ScalingScheduleMetrics bool
// Default scale-up/scale-down window duration for scheduled metrics
// Default ramp-up/ramp-down window duration for scheduled metrics
DefaultScheduledScalingWindow time.Duration
// Number of steps utilized during the rampup and rampdown for scheduled metrics
RampSteps int
}