mirror of
https://github.com/zalando-incubator/kube-metrics-adapter.git
synced 2024-12-22 19:16:06 +00:00
Add support for scaling based on Nakadi
Signed-off-by: Mikkel Oscar Lyderik Larsen <mikkel.larsen@zalando.de>
This commit is contained in:
parent
45a09c12a0
commit
4aeebc853c
86
README.md
86
README.md
@ -702,6 +702,92 @@ you need to define a `key` or other `tag` with a "star" query syntax like
|
|||||||
metric label definitions. If both annotations and corresponding label is
|
metric label definitions. If both annotations and corresponding label is
|
||||||
defined, then the annotation takes precedence.
|
defined, then the annotation takes precedence.
|
||||||
|
|
||||||
|
|
||||||
|
## Nakadi collector
|
||||||
|
|
||||||
|
The Nakadi collector allows scaling based on [Nakadi](https://nakadi.io/)
|
||||||
|
`consumer_lag_seconds` or `unconsumed_events`.
|
||||||
|
|
||||||
|
### Supported metrics
|
||||||
|
|
||||||
|
| Metric Type | Description | Type | K8s Versions |
|
||||||
|
| ------------ | ------- | -- | -- |
|
||||||
|
| `unconsumed-events` | Scale based on number of unconsumed events for a Nakadi subscription | External | `>=1.24` |
|
||||||
|
| `consumer-lag-seconds` | Scale based on number of max consumer lag seconds for a Nakadi subscription | External | `>=1.24` |
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: autoscaling/v2
|
||||||
|
kind: HorizontalPodAutoscaler
|
||||||
|
metadata:
|
||||||
|
name: myapp-hpa
|
||||||
|
annotations:
|
||||||
|
# metric-config.<metricType>.<metricName>.<collectorType>/<configKey>
|
||||||
|
metric-config.external.my-nakadi-consumer.nakadi/interval: "60s" # optional
|
||||||
|
spec:
|
||||||
|
scaleTargetRef:
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
name: custom-metrics-consumer
|
||||||
|
minReplicas: 0
|
||||||
|
maxReplicas: 8 # should match number of partitions for the event type
|
||||||
|
metrics:
|
||||||
|
- type: External
|
||||||
|
external:
|
||||||
|
metric:
|
||||||
|
name: my-nakadi-consumer
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
type: nakadi
|
||||||
|
subscription-id: "708095f6-cece-4d02-840e-ee488d710b29"
|
||||||
|
metric-type: "consumer-lag-seconds|unconsumed-events"
|
||||||
|
target:
|
||||||
|
# value is compatible with the consumer-lag-seconds metric type.
|
||||||
|
# It describes the amount of consumer lag in seconds before scaling
|
||||||
|
# additionally up.
|
||||||
|
# if an event-type has multiple partitions the value of
|
||||||
|
# consumer-lag-seconds is the max of all the partitions.
|
||||||
|
value: "600" # 10m
|
||||||
|
# averageValue is compatible with unconsumed-events metric type.
|
||||||
|
# This means for every 30 unconsumed events a pod is scaled up.
|
||||||
|
# unconsumed-events is the sum of of unconsumed_events over all
|
||||||
|
# partitions.
|
||||||
|
averageValue: "30"
|
||||||
|
type: AverageValue
|
||||||
|
```
|
||||||
|
|
||||||
|
The `subscription-id` is the ID of the relevant consumer subscription. The
|
||||||
|
`metric-type` indicates whether to scale on `consumer-lag-seconds` or
|
||||||
|
`unconsumed-events` as outlined below.
|
||||||
|
|
||||||
|
`unconsumed-events` - is the total number of unconsumed events over all
|
||||||
|
partitions. When using this `metric-type` you should also use the target
|
||||||
|
`averageValue` which indicates the number of events which can be handled per
|
||||||
|
pod. To best estimate the number of events per pods, you need to understand the
|
||||||
|
average time for processing an event as well as the rate of events.
|
||||||
|
|
||||||
|
*Example*: You have an event type producing 100 events per second between 00:00
|
||||||
|
and 08:00. Between 08:01 to 23:59 it produces 400 events per second.
|
||||||
|
Let's assume that on average a single pod can consume 100 events per second,
|
||||||
|
then we can define 100 as `averageValue` and the HPA would scale to 1 between
|
||||||
|
00:00 and 08:00, and scale to 4 between 08:01 and 23:59. If there for some
|
||||||
|
reason is a short spike of 800 events per second, then it would scale to 8 pods
|
||||||
|
to process those events until the rate goes down again.
|
||||||
|
|
||||||
|
`consumer-lag-seconds` - describes the age of the oldest unconsumed event for
|
||||||
|
a subscription. If the event type has multiple partitions the lag is defined as
|
||||||
|
the max age over all partitions. When using this `metric-type` you should use
|
||||||
|
the target `value` to indicate the max lag (in seconds) before the HPA should
|
||||||
|
scale.
|
||||||
|
|
||||||
|
*Example*: You have a subscription with a defined SLO of "99.99 of events are
|
||||||
|
consumed within 30 min.". In this case you can define a target `value` of e.g.
|
||||||
|
20 min. (1200s) (to include a safety buffer) such that the HPA only scales up
|
||||||
|
from 1 to 2 if the target of 20 min. is breached and it needs to work faster
|
||||||
|
with more consumers.
|
||||||
|
For this case you should also account for the average time for processing an
|
||||||
|
event when defining the target.
|
||||||
|
|
||||||
|
|
||||||
## HTTP Collector
|
## HTTP Collector
|
||||||
|
|
||||||
The http collector allows collecting metrics from an external endpoint specified in the HPA.
|
The http collector allows collecting metrics from an external endpoint specified in the HPA.
|
||||||
|
120
pkg/collector/nakadi_collector.go
Normal file
120
pkg/collector/nakadi_collector.go
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
package collector
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/zalando-incubator/kube-metrics-adapter/pkg/nakadi"
|
||||||
|
autoscalingv2 "k8s.io/api/autoscaling/v2"
|
||||||
|
"k8s.io/apimachinery/pkg/api/resource"
|
||||||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
"k8s.io/metrics/pkg/apis/external_metrics"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// NakadiMetricType defines the metric type for metrics based on Nakadi
|
||||||
|
// subscriptions.
|
||||||
|
NakadiMetricType = "nakadi"
|
||||||
|
nakadiSubscriptionIDKey = "subscription-id"
|
||||||
|
nakadiMetricTypeKey = "metric-type"
|
||||||
|
nakadiMetricTypeConsumerLagSeconds = "consumer-lag-seconds"
|
||||||
|
nakadiMetricTypeUnconsumedEvents = "unconsumed-events"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NakadiCollectorPlugin defines a plugin for creating collectors that can get
|
||||||
|
// unconsumed events from Nakadi.
|
||||||
|
type NakadiCollectorPlugin struct {
|
||||||
|
nakadi nakadi.Nakadi
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewNakadiCollectorPlugin initializes a new NakadiCollectorPlugin.
|
||||||
|
func NewNakadiCollectorPlugin(nakadi nakadi.Nakadi) (*NakadiCollectorPlugin, error) {
|
||||||
|
return &NakadiCollectorPlugin{
|
||||||
|
nakadi: nakadi,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewCollector initializes a new Nakadi collector from the specified HPA.
|
||||||
|
func (c *NakadiCollectorPlugin) NewCollector(hpa *autoscalingv2.HorizontalPodAutoscaler, config *MetricConfig, interval time.Duration) (Collector, error) {
|
||||||
|
return NewNakadiCollector(c.nakadi, hpa, config, interval)
|
||||||
|
}
|
||||||
|
|
||||||
|
// NakadiCollector defines a collector that is able to collect metrics from
|
||||||
|
// Nakadi.
|
||||||
|
type NakadiCollector struct {
|
||||||
|
nakadi nakadi.Nakadi
|
||||||
|
interval time.Duration
|
||||||
|
subscriptionID string
|
||||||
|
nakadiMetricType string
|
||||||
|
metric autoscalingv2.MetricIdentifier
|
||||||
|
metricType autoscalingv2.MetricSourceType
|
||||||
|
namespace string
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewNakadiCollector initializes a new NakadiCollector.
|
||||||
|
func NewNakadiCollector(nakadi nakadi.Nakadi, hpa *autoscalingv2.HorizontalPodAutoscaler, config *MetricConfig, interval time.Duration) (*NakadiCollector, error) {
|
||||||
|
if config.Metric.Selector == nil {
|
||||||
|
return nil, fmt.Errorf("selector for nakadi is not specified")
|
||||||
|
}
|
||||||
|
|
||||||
|
subscriptionID, ok := config.Config[nakadiSubscriptionIDKey]
|
||||||
|
if !ok {
|
||||||
|
return nil, fmt.Errorf("subscription-id not specified on metric")
|
||||||
|
}
|
||||||
|
|
||||||
|
metricType, ok := config.Config[nakadiMetricTypeKey]
|
||||||
|
if !ok {
|
||||||
|
return nil, fmt.Errorf("metric-type not specified on metric")
|
||||||
|
}
|
||||||
|
|
||||||
|
if metricType != nakadiMetricTypeConsumerLagSeconds && metricType != nakadiMetricTypeUnconsumedEvents {
|
||||||
|
return nil, fmt.Errorf("metric-type must be either '%s' or '%s', was '%s'", nakadiMetricTypeConsumerLagSeconds, nakadiMetricTypeUnconsumedEvents, metricType)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &NakadiCollector{
|
||||||
|
nakadi: nakadi,
|
||||||
|
interval: interval,
|
||||||
|
subscriptionID: subscriptionID,
|
||||||
|
nakadiMetricType: metricType,
|
||||||
|
metric: config.Metric,
|
||||||
|
metricType: config.Type,
|
||||||
|
namespace: hpa.Namespace,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetMetrics returns a list of collected metrics for the Nakadi subscription ID.
|
||||||
|
func (c *NakadiCollector) GetMetrics() ([]CollectedMetric, error) {
|
||||||
|
var value int64
|
||||||
|
var err error
|
||||||
|
switch c.nakadiMetricType {
|
||||||
|
case nakadiMetricTypeConsumerLagSeconds:
|
||||||
|
value, err = c.nakadi.ConsumerLagSeconds(context.TODO(), c.subscriptionID)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
case nakadiMetricTypeUnconsumedEvents:
|
||||||
|
value, err = c.nakadi.UnconsumedEvents(context.TODO(), c.subscriptionID)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
metricValue := CollectedMetric{
|
||||||
|
Namespace: c.namespace,
|
||||||
|
Type: c.metricType,
|
||||||
|
External: external_metrics.ExternalMetricValue{
|
||||||
|
MetricName: c.metric.Name,
|
||||||
|
MetricLabels: c.metric.Selector.MatchLabels,
|
||||||
|
Timestamp: metav1.Now(),
|
||||||
|
Value: *resource.NewQuantity(value, resource.DecimalSI),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
return []CollectedMetric{metricValue}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Interval returns the interval at which the collector should run.
|
||||||
|
func (c *NakadiCollector) Interval() time.Duration {
|
||||||
|
return c.interval
|
||||||
|
}
|
124
pkg/nakadi/nakadi.go
Normal file
124
pkg/nakadi/nakadi.go
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
package nakadi
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Nakadi defines an interface for talking to the Nakadi API.
|
||||||
|
type Nakadi interface {
|
||||||
|
ConsumerLagSeconds(ctx context.Context, subscriptionID string) (int64, error)
|
||||||
|
UnconsumedEvents(ctx context.Context, subscriptionID string) (int64, error)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Client defines client for interfacing with the Nakadi API.
|
||||||
|
type Client struct {
|
||||||
|
nakadiEndpoint string
|
||||||
|
http *http.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewNakadiClient initializes a new Nakadi Client.
|
||||||
|
func NewNakadiClient(nakadiEndpoint string, client *http.Client) *Client {
|
||||||
|
return &Client{
|
||||||
|
nakadiEndpoint: nakadiEndpoint,
|
||||||
|
http: client,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Client) ConsumerLagSeconds(ctx context.Context, subscriptionID string) (int64, error) {
|
||||||
|
stats, err := c.stats(ctx, subscriptionID)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var maxConsumerLagSeconds int64
|
||||||
|
for _, eventType := range stats {
|
||||||
|
for _, partition := range eventType.Partitions {
|
||||||
|
maxConsumerLagSeconds = max(maxConsumerLagSeconds, partition.ConsumerLagSeconds)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return maxConsumerLagSeconds, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Client) UnconsumedEvents(ctx context.Context, subscriptionID string) (int64, error) {
|
||||||
|
stats, err := c.stats(ctx, subscriptionID)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var unconsumedEvents int64
|
||||||
|
for _, eventType := range stats {
|
||||||
|
for _, partition := range eventType.Partitions {
|
||||||
|
unconsumedEvents += partition.UnconsumedEvents
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return unconsumedEvents, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type statsResp struct {
|
||||||
|
Items []statsEventType `json:"items"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type statsEventType struct {
|
||||||
|
EventType string `json:"event_type"`
|
||||||
|
Partitions []statsPartition `json:"partitions"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type statsPartition struct {
|
||||||
|
Partiton string `json:"partition"`
|
||||||
|
State string `json:"state"`
|
||||||
|
UnconsumedEvents int64 `json:"unconsumed_events"`
|
||||||
|
ConsumerLagSeconds int64 `json:"consumer_lag_seconds"`
|
||||||
|
StreamID string `json:"stream_id"`
|
||||||
|
AssignmentType string `json:"assignment_type"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// stats returns the Nakadi stats for a given subscription ID.
|
||||||
|
//
|
||||||
|
// https://nakadi.io/manual.html#/subscriptions/subscription_id/stats_get
|
||||||
|
func (c *Client) stats(ctx context.Context, subscriptionID string) ([]statsEventType, error) {
|
||||||
|
endpoint, err := url.Parse(c.nakadiEndpoint)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint.Path = fmt.Sprintf("/subscriptions/%s/stats", subscriptionID)
|
||||||
|
|
||||||
|
q := endpoint.Query()
|
||||||
|
q.Set("show_time_lag", "true")
|
||||||
|
endpoint.RawQuery = q.Encode()
|
||||||
|
|
||||||
|
resp, err := c.http.Get(endpoint.String())
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
d, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, fmt.Errorf("[nakadi stats] unexpected response code: %d (%s)", resp.StatusCode, string(d))
|
||||||
|
}
|
||||||
|
|
||||||
|
var result statsResp
|
||||||
|
err = json.Unmarshal(d, &result)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(result.Items) == 0 {
|
||||||
|
return nil, errors.New("expected at least 1 event-type, 0 returned")
|
||||||
|
}
|
||||||
|
|
||||||
|
return result.Items, nil
|
||||||
|
}
|
141
pkg/nakadi/nakadi_test.go
Normal file
141
pkg/nakadi/nakadi_test.go
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
package nakadi
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestQuery(tt *testing.T) {
|
||||||
|
client := &http.Client{}
|
||||||
|
for _, ti := range []struct {
|
||||||
|
msg string
|
||||||
|
status int
|
||||||
|
responseBody string
|
||||||
|
err error
|
||||||
|
unconsumedEvents int64
|
||||||
|
consumerLagSeconds int64
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
msg: "test getting a single event-type",
|
||||||
|
status: http.StatusOK,
|
||||||
|
responseBody: `{
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"event_type": "example-event",
|
||||||
|
"partitions": [
|
||||||
|
{
|
||||||
|
"partition": "0",
|
||||||
|
"state": "assigned",
|
||||||
|
"unconsumed_events": 4,
|
||||||
|
"consumer_lag_seconds": 2,
|
||||||
|
"stream_id": "example-id",
|
||||||
|
"assignment_type": "auto"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"partition": "0",
|
||||||
|
"state": "assigned",
|
||||||
|
"unconsumed_events": 5,
|
||||||
|
"consumer_lag_seconds": 1,
|
||||||
|
"stream_id": "example-id",
|
||||||
|
"assignment_type": "auto"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}`,
|
||||||
|
unconsumedEvents: 9,
|
||||||
|
consumerLagSeconds: 2,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
msg: "test getting multiple event-types",
|
||||||
|
status: http.StatusOK,
|
||||||
|
responseBody: `{
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"event_type": "example-event",
|
||||||
|
"partitions": [
|
||||||
|
{
|
||||||
|
"partition": "0",
|
||||||
|
"state": "assigned",
|
||||||
|
"unconsumed_events": 4,
|
||||||
|
"consumer_lag_seconds": 2,
|
||||||
|
"stream_id": "example-id",
|
||||||
|
"assignment_type": "auto"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"partition": "0",
|
||||||
|
"state": "assigned",
|
||||||
|
"unconsumed_events": 5,
|
||||||
|
"consumer_lag_seconds": 1,
|
||||||
|
"stream_id": "example-id",
|
||||||
|
"assignment_type": "auto"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"event_type": "example-event-2",
|
||||||
|
"partitions": [
|
||||||
|
{
|
||||||
|
"partition": "0",
|
||||||
|
"state": "assigned",
|
||||||
|
"unconsumed_events": 4,
|
||||||
|
"consumer_lag_seconds": 6,
|
||||||
|
"stream_id": "example-id",
|
||||||
|
"assignment_type": "auto"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"partition": "0",
|
||||||
|
"state": "assigned",
|
||||||
|
"unconsumed_events": 5,
|
||||||
|
"consumer_lag_seconds": 1,
|
||||||
|
"stream_id": "example-id",
|
||||||
|
"assignment_type": "auto"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}`,
|
||||||
|
unconsumedEvents: 18,
|
||||||
|
consumerLagSeconds: 6,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
msg: "test call with invalid response",
|
||||||
|
status: http.StatusInternalServerError,
|
||||||
|
responseBody: `{"error": 500}`,
|
||||||
|
err: errors.New("[nakadi stats] unexpected response code: 500 ({\"error\": 500})"),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
msg: "test getting back a single data point",
|
||||||
|
status: http.StatusOK,
|
||||||
|
responseBody: `{
|
||||||
|
"items": []
|
||||||
|
}`,
|
||||||
|
err: errors.New("expected at least 1 event-type, 0 returned"),
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
tt.Run(ti.msg, func(t *testing.T) {
|
||||||
|
ts := httptest.NewServer(http.HandlerFunc(
|
||||||
|
func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(ti.status)
|
||||||
|
_, err := w.Write([]byte(ti.responseBody))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
defer ts.Close()
|
||||||
|
|
||||||
|
nakadiClient := NewNakadiClient(ts.URL, client)
|
||||||
|
consumerLagSeconds, err := nakadiClient.ConsumerLagSeconds(context.Background(), "id")
|
||||||
|
assert.Equal(t, ti.err, err)
|
||||||
|
assert.Equal(t, ti.consumerLagSeconds, consumerLagSeconds)
|
||||||
|
unconsumedEvents, err := nakadiClient.UnconsumedEvents(context.Background(), "id")
|
||||||
|
assert.Equal(t, ti.err, err)
|
||||||
|
assert.Equal(t, ti.unconsumedEvents, unconsumedEvents)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -35,6 +35,7 @@ import (
|
|||||||
"github.com/zalando-incubator/kube-metrics-adapter/pkg/client/clientset/versioned"
|
"github.com/zalando-incubator/kube-metrics-adapter/pkg/client/clientset/versioned"
|
||||||
"github.com/zalando-incubator/kube-metrics-adapter/pkg/collector"
|
"github.com/zalando-incubator/kube-metrics-adapter/pkg/collector"
|
||||||
"github.com/zalando-incubator/kube-metrics-adapter/pkg/controller/scheduledscaling"
|
"github.com/zalando-incubator/kube-metrics-adapter/pkg/controller/scheduledscaling"
|
||||||
|
"github.com/zalando-incubator/kube-metrics-adapter/pkg/nakadi"
|
||||||
"github.com/zalando-incubator/kube-metrics-adapter/pkg/provider"
|
"github.com/zalando-incubator/kube-metrics-adapter/pkg/provider"
|
||||||
"github.com/zalando-incubator/kube-metrics-adapter/pkg/zmon"
|
"github.com/zalando-incubator/kube-metrics-adapter/pkg/zmon"
|
||||||
"golang.org/x/oauth2"
|
"golang.org/x/oauth2"
|
||||||
@ -64,6 +65,7 @@ func NewCommandStartAdapterServer(stopCh <-chan struct{}) *cobra.Command {
|
|||||||
EnableExternalMetricsAPI: true,
|
EnableExternalMetricsAPI: true,
|
||||||
MetricsAddress: ":7979",
|
MetricsAddress: ":7979",
|
||||||
ZMONTokenName: "zmon",
|
ZMONTokenName: "zmon",
|
||||||
|
NakadiTokenName: "nakadi",
|
||||||
CredentialsDir: "/meta/credentials",
|
CredentialsDir: "/meta/credentials",
|
||||||
ExternalRPSMetricName: "skipper_serve_host_duration_seconds_count",
|
ExternalRPSMetricName: "skipper_serve_host_duration_seconds_count",
|
||||||
}
|
}
|
||||||
@ -110,8 +112,12 @@ func NewCommandStartAdapterServer(stopCh <-chan struct{}) *cobra.Command {
|
|||||||
"url of ZMON KariosDB endpoint to query for ZMON checks")
|
"url of ZMON KariosDB endpoint to query for ZMON checks")
|
||||||
flags.StringVar(&o.ZMONTokenName, "zmon-token-name", o.ZMONTokenName, ""+
|
flags.StringVar(&o.ZMONTokenName, "zmon-token-name", o.ZMONTokenName, ""+
|
||||||
"name of the token used to query ZMON")
|
"name of the token used to query ZMON")
|
||||||
|
flags.StringVar(&o.NakadiEndpoint, "nakadi-endpoint", o.NakadiEndpoint, ""+
|
||||||
|
"url of Nakadi endpoint to for nakadi subscription stats")
|
||||||
|
flags.StringVar(&o.NakadiTokenName, "nakadi-token-name", o.NakadiTokenName, ""+
|
||||||
|
"name of the token used to call nakadi subscription API")
|
||||||
flags.StringVar(&o.Token, "token", o.Token, ""+
|
flags.StringVar(&o.Token, "token", o.Token, ""+
|
||||||
"static oauth2 token to use when calling external services like ZMON")
|
"static oauth2 token to use when calling external services like ZMON and Nakadi")
|
||||||
flags.StringVar(&o.CredentialsDir, "credentials-dir", o.CredentialsDir, ""+
|
flags.StringVar(&o.CredentialsDir, "credentials-dir", o.CredentialsDir, ""+
|
||||||
"path to the credentials dir where tokens are stored")
|
"path to the credentials dir where tokens are stored")
|
||||||
flags.BoolVar(&o.SkipperIngressMetrics, "skipper-ingress-metrics", o.SkipperIngressMetrics, ""+
|
flags.BoolVar(&o.SkipperIngressMetrics, "skipper-ingress-metrics", o.SkipperIngressMetrics, ""+
|
||||||
@ -274,6 +280,27 @@ func (o AdapterServerOptions) RunCustomMetricsAdapterServer(stopCh <-chan struct
|
|||||||
collectorFactory.RegisterExternalCollector([]string{collector.ZMONMetricType, collector.ZMONCheckMetricLegacy}, zmonPlugin)
|
collectorFactory.RegisterExternalCollector([]string{collector.ZMONMetricType, collector.ZMONCheckMetricLegacy}, zmonPlugin)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// enable Nakadi based metrics
|
||||||
|
if o.NakadiEndpoint != "" {
|
||||||
|
var tokenSource oauth2.TokenSource
|
||||||
|
if o.Token != "" {
|
||||||
|
tokenSource = oauth2.StaticTokenSource(&oauth2.Token{AccessToken: o.Token})
|
||||||
|
} else {
|
||||||
|
tokenSource = platformiam.NewTokenSource(o.NakadiTokenName, o.CredentialsDir)
|
||||||
|
}
|
||||||
|
|
||||||
|
httpClient := newOauth2HTTPClient(ctx, tokenSource)
|
||||||
|
|
||||||
|
nakadiClient := nakadi.NewNakadiClient(o.NakadiEndpoint, httpClient)
|
||||||
|
|
||||||
|
nakadiPlugin, err := collector.NewNakadiCollectorPlugin(nakadiClient)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to initialize Nakadi collector plugin: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
collectorFactory.RegisterExternalCollector([]string{collector.NakadiMetricType}, nakadiPlugin)
|
||||||
|
}
|
||||||
|
|
||||||
awsSessions := make(map[string]*session.Session, len(o.AWSRegions))
|
awsSessions := make(map[string]*session.Session, len(o.AWSRegions))
|
||||||
for _, region := range o.AWSRegions {
|
for _, region := range o.AWSRegions {
|
||||||
awsSessions[region], err = session.NewSessionWithOptions(session.Options{
|
awsSessions[region], err = session.NewSessionWithOptions(session.Options{
|
||||||
@ -427,6 +454,10 @@ type AdapterServerOptions struct {
|
|||||||
ZMONKariosDBEndpoint string
|
ZMONKariosDBEndpoint string
|
||||||
// ZMONTokenName is the name of the token used to query ZMON
|
// ZMONTokenName is the name of the token used to query ZMON
|
||||||
ZMONTokenName string
|
ZMONTokenName string
|
||||||
|
// NakadiEndpoint enables Nakadi metrics from the specified endpoint
|
||||||
|
NakadiEndpoint string
|
||||||
|
// NakadiTokenName is the name of the token used to call Nakadi
|
||||||
|
NakadiTokenName string
|
||||||
// Token is an oauth2 token used to authenticate with services like
|
// Token is an oauth2 token used to authenticate with services like
|
||||||
// ZMON.
|
// ZMON.
|
||||||
Token string
|
Token string
|
||||||
|
Loading…
x
Reference in New Issue
Block a user