Skip to content

Commit

Permalink
Rename tracing enums
Browse files Browse the repository at this point in the history
This makes the enum names much more user-friendly on the SQL level.
We made them lowercase and removed the prefix.

See timescale/promscale_extension#225 for
more details.
  • Loading branch information
cevian committed May 3, 2022
1 parent 87dca5c commit 7b85613
Show file tree
Hide file tree
Showing 12 changed files with 153 additions and 80 deletions.
4 changes: 2 additions & 2 deletions docs/mixin/dashboards/apm-home.json
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@
"hide": false,
"metricColumn": "none",
"rawQuery": true,
"rawSql": "SELECT\n service_name AS \"Service\",\n COUNT(*)::numeric / (30 * 60) AS \"Requests\",\n AVG(duration_ms) AS \"Avg Duration\",\n ROUND(approx_percentile(0.90, percentile_agg(duration_ms))::numeric, 3) AS \"p90 Duration\",\n (count(*) filter (where status_code = 'STATUS_CODE_ERROR')::numeric / count(*)) AS \"Error rate\"\nFROM ps_trace.span s\nWHERE start_time > NOW() - INTERVAL '30m'\nAND (span_kind = 'SPAN_KIND_SERVER' OR parent_span_id is NULL)\nGROUP BY 1\nORDER BY 2",
"rawSql": "SELECT\n service_name AS \"Service\",\n COUNT(*)::numeric / (30 * 60) AS \"Requests\",\n AVG(duration_ms) AS \"Avg Duration\",\n ROUND(approx_percentile(0.90, percentile_agg(duration_ms))::numeric, 3) AS \"p90 Duration\",\n (count(*) filter (where status_code = 'error')::numeric / count(*)) AS \"Error rate\"\nFROM ps_trace.span s\nWHERE start_time > NOW() - INTERVAL '30m'\nAND (span_kind = 'server' OR parent_span_id is NULL)\nGROUP BY 1\nORDER BY 2",
"refId": "A",
"select": [
[
Expand Down Expand Up @@ -479,7 +479,7 @@
"group": [],
"metricColumn": "none",
"rawQuery": true,
"rawSql": "SELECT\n status_message as \"Error\",\n service_name as \"Service\",\n count(*) as \"Occurrences\" \nFROM ps_trace.span\nWHERE start_time > NOW() - INTERVAL '30m'\nAND status_code = 'STATUS_CODE_ERROR'\nGROUP BY 1, 2\nORDER BY 3\n;",
"rawSql": "SELECT\n status_message as \"Error\",\n service_name as \"Service\",\n count(*) as \"Occurrences\" \nFROM ps_trace.span\nWHERE start_time > NOW() - INTERVAL '30m'\nAND status_code = 'error'\nGROUP BY 1, 2\nORDER BY 3\n;",
"refId": "A",
"select": [
[
Expand Down
12 changes: 6 additions & 6 deletions docs/mixin/dashboards/apm-service-overview.json
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@
"group": [],
"metricColumn": "none",
"rawQuery": true,
"rawSql": "SELECT\n time_bucket_gapfill('$__interval', start_time) AS time,\n coalesce(count(*)::numeric / (EXTRACT(epoch FROM '$__interval'::interval)), 0) AS \"Requests\"\nFROM ps_trace.span s\nWHERE $__timeFilter(start_time)\nAND (span_kind = 'SPAN_KIND_SERVER' OR parent_span_id is NULL)\nAND service_name = '${service}'\nGROUP BY 1\nORDER BY 1",
"rawSql": "SELECT\n time_bucket_gapfill('$__interval', start_time) AS time,\n coalesce(count(*)::numeric / (EXTRACT(epoch FROM '$__interval'::interval)), 0) AS \"Requests\"\nFROM ps_trace.span s\nWHERE $__timeFilter(start_time)\nAND (span_kind = 'server' OR parent_span_id is NULL)\nAND service_name = '${service}'\nGROUP BY 1\nORDER BY 1",
"refId": "A",
"select": [],
"table": "event",
Expand Down Expand Up @@ -250,7 +250,7 @@
"group": [],
"metricColumn": "none",
"rawQuery": true,
"rawSql": "SELECT\n time_bucket_gapfill('$__interval', start_time) AS time,\n COALESCE(ROUND(approx_percentile(0.99, percentile_agg(duration_ms))::numeric, 3), 0) as \"p99\",\n COALESCE(ROUND(approx_percentile(0.90, percentile_agg(duration_ms))::numeric, 3), 0) as \"p90\",\n COALESCE(ROUND(approx_percentile(0.50, percentile_agg(duration_ms))::numeric, 3), 0) as \"p50\",\n COALESCE(AVG(duration_ms), 0) as \"Average\"\nFROM ps_trace.span s\nWHERE $__timeFilter(start_time)\nAND (span_kind = 'SPAN_KIND_SERVER' OR parent_span_id is NULL)\nAND service_name = '${service}'\nGROUP BY 1\nORDER BY 1",
"rawSql": "SELECT\n time_bucket_gapfill('$__interval', start_time) AS time,\n COALESCE(ROUND(approx_percentile(0.99, percentile_agg(duration_ms))::numeric, 3), 0) as \"p99\",\n COALESCE(ROUND(approx_percentile(0.90, percentile_agg(duration_ms))::numeric, 3), 0) as \"p90\",\n COALESCE(ROUND(approx_percentile(0.50, percentile_agg(duration_ms))::numeric, 3), 0) as \"p50\",\n COALESCE(AVG(duration_ms), 0) as \"Average\"\nFROM ps_trace.span s\nWHERE $__timeFilter(start_time)\nAND (span_kind = 'server' OR parent_span_id is NULL)\nAND service_name = '${service}'\nGROUP BY 1\nORDER BY 1",
"refId": "A",
"select": [],
"table": "event",
Expand Down Expand Up @@ -351,7 +351,7 @@
"group": [],
"metricColumn": "none",
"rawQuery": true,
"rawSql": "SELECT\n time_bucket('$__interval', start_time) as time,\n coalesce(count(*) filter (where status_code = 'STATUS_CODE_ERROR')::numeric / count(*), 0) as \"Error rate\"\nFROM ps_trace.span s\nWHERE $__timeFilter(start_time)\nAND (span_kind = 'SPAN_KIND_SERVER' OR parent_span_id is NULL)\nAND service_name = '${service}'\nGROUP BY 1\nORDER BY 1",
"rawSql": "SELECT\n time_bucket('$__interval', start_time) as time,\n coalesce(count(*) filter (where status_code = 'error')::numeric / count(*), 0) as \"Error rate\"\nFROM ps_trace.span s\nWHERE $__timeFilter(start_time)\nAND (span_kind = 'server' OR parent_span_id is NULL)\nAND service_name = '${service}'\nGROUP BY 1\nORDER BY 1",
"refId": "A",
"select": [],
"table": "event",
Expand Down Expand Up @@ -466,7 +466,7 @@
"group": [],
"metricColumn": "none",
"rawQuery": true,
"rawSql": "SELECT\n span_name as \"Operation\",\n count(*)::numeric / (${__to:date:seconds} - ${__from:date:seconds}) AS \"Requests\",\n sum(duration_ms) / count(*)::numeric as \"Avg Duration\",\n coalesce((count(*) filter (where status_code = 'STATUS_CODE_ERROR')::numeric / count(*)), 0) as \"Error rate\"\nFROM ps_trace.span s\nWHERE $__timeFilter(start_time)\nAND (span_kind = 'SPAN_KIND_SERVER' OR parent_span_id is NULL)\nAND service_name = '${service}'\nGROUP BY 1\nORDER BY 1",
"rawSql": "SELECT\n span_name as \"Operation\",\n count(*)::numeric / (${__to:date:seconds} - ${__from:date:seconds}) AS \"Requests\",\n sum(duration_ms) / count(*)::numeric as \"Avg Duration\",\n coalesce((count(*) filter (where status_code = 'error')::numeric / count(*)), 0) as \"Error rate\"\nFROM ps_trace.span s\nWHERE $__timeFilter(start_time)\nAND (span_kind = 'server' OR parent_span_id is NULL)\nAND service_name = '${service}'\nGROUP BY 1\nORDER BY 1",
"refId": "A",
"select": [],
"table": "event",
Expand Down Expand Up @@ -611,7 +611,7 @@
"group": [],
"metricColumn": "none",
"rawQuery": true,
"rawSql": "SELECT\n replace(trace_id::text, '-'::text, ''::text) as \"Trace ID\",\n span_name as \"Operation\",\n start_time as \"Time\",\n duration_ms as \"Duration\"\nFROM ps_trace.span\nWHERE $__timeFilter(start_time)\nAND (span_kind = 'SPAN_KIND_SERVER' OR parent_span_id is NULL)\nAND service_name = '${service}'\nORDER BY duration_ms DESC\nLIMIT 50\n;",
"rawSql": "SELECT\n replace(trace_id::text, '-'::text, ''::text) as \"Trace ID\",\n span_name as \"Operation\",\n start_time as \"Time\",\n duration_ms as \"Duration\"\nFROM ps_trace.span\nWHERE $__timeFilter(start_time)\nAND (span_kind = 'server' OR parent_span_id is NULL)\nAND service_name = '${service}'\nORDER BY duration_ms DESC\nLIMIT 50\n;",
"refId": "A",
"select": [],
"table": "event",
Expand Down Expand Up @@ -689,7 +689,7 @@
"group": [],
"metricColumn": "none",
"rawQuery": true,
"rawSql": "SELECT\n status_message as \"Error\",\n count(*) as \"Occurrences\"\nFROM ps_trace.span\nWHERE $__timeFilter(start_time) AND\nstatus_code = 'STATUS_CODE_ERROR' AND\nservice_name = '${service}'\nGROUP BY 1\nORDER BY 2 DESC\n;",
"rawSql": "SELECT\n status_message as \"Error\",\n count(*) as \"Occurrences\"\nFROM ps_trace.span\nWHERE $__timeFilter(start_time) AND\nstatus_code = 'error' AND\nservice_name = '${service}'\nGROUP BY 1\nORDER BY 2 DESC\n;",
"refId": "A",
"select": [],
"table": "event",
Expand Down
24 changes: 12 additions & 12 deletions misc/tracegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,16 +107,16 @@ def generate_instrumentation_lib() -> InstrumentationLib:

def generate_span_kind() -> str:
return random.choice([
'SPAN_KIND_UNSPECIFIED',
'SPAN_KIND_INTERNAL',
'SPAN_KIND_SERVER',
'SPAN_KIND_CLIENT',
'SPAN_KIND_PRODUCER',
'SPAN_KIND_CONSUMER'])
'unspecified',
'internal',
'server',
'client',
'producer',
'consumer'])


def generate_status_code() -> str:
return random.choice(['STATUS_CODE_UNSET', 'STATUS_CODE_OK', 'STATUS_CODE_ERROR'])
return random.choice(['unset', 'ok', 'error'])


def generate_span(trace: Trace, parent_span: Optional[Span], depth: int, child: int, siblings: int, min_breadth: int, max_breadth: int) -> None:
Expand Down Expand Up @@ -199,8 +199,8 @@ def save_span_name(service_name: str, span_name: str, span_kind: str, cur) -> in

def save_span(span: Span, cur) -> None:
sql = '''
insert into _ps_trace.schema_url (url)
values (%s)
insert into _ps_trace.schema_url (url)
values (%s)
on conflict (url) do nothing'''
cur.execute(sql, (span.resource.schema_url,))
sql = '''
Expand Down Expand Up @@ -230,10 +230,10 @@ def save_span(span: Span, cur) -> None:
%(trace_state)s,
%(parent_span_id)s,
(
select n.id
select n.id
from _ps_trace.operation n
inner join _ps_trace.tag t on (t.key = 'service.name' and n.service_name_id = t.id)
where n.span_name = %(span_name)s
inner join _ps_trace.tag t on (t.key = 'service.name' and n.service_name_id = t.id)
where n.span_name = %(span_name)s
and n.span_kind = %(span_kind)s
and t.value = to_jsonb(%(service_name)s::text)
limit 1
Expand Down
6 changes: 5 additions & 1 deletion pkg/jaeger/query/get_operations.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,11 @@ func getOperations(ctx context.Context, conn pgxconn.PgxConn, query spanstore.Op
args := []interface{}{query.ServiceName}
kindQual := "TRUE"
if len(query.SpanKind) > 0 {
args = append(args, query.SpanKind)
pgEnum, err := getPGKindEnum(query.SpanKind)
if err != nil {
return operationsResp, fmt.Errorf("converting query enum: %w", err)
}
args = append(args, pgEnum)
kindQual = "o.span_kind = $2"
}

Expand Down
57 changes: 41 additions & 16 deletions pkg/jaeger/query/trace_scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,16 +198,9 @@ func populateSpan(

func setStatus(ref pdata.Span, dbRes *spanDBResult) error {
if dbRes.statusCode != "" {
var code pdata.StatusCode
switch dbRes.statusCode {
case pdata.StatusCodeOk.String():
code = pdata.StatusCodeOk
case pdata.StatusCodeError.String():
code = pdata.StatusCodeError
case pdata.StatusCodeUnset.String():
code = pdata.StatusCodeUnset
default:
return fmt.Errorf("invalid status-code received: %s", dbRes.statusCode)
code, err := makeStatusCode(dbRes.statusCode)
if err != nil {
return err
}
ref.Status().SetCode(code)
}
Expand Down Expand Up @@ -311,21 +304,53 @@ func makeSpanId(s *int64) pdata.SpanID {
return pdata.NewSpanID(b8)
}

func getPGKindEnum(jaegerKind string) (string, error) {
switch jaegerKind {
case pdata.SpanKindClient.String():
return "client", nil
case pdata.SpanKindServer.String():
return "server", nil
case pdata.SpanKindInternal.String():
return "internal", nil
case pdata.SpanKindConsumer.String():
return "consumer", nil
case pdata.SpanKindProducer.String():
return "producer", nil
case pdata.SpanKindUnspecified.String():
return "unspecified", nil
default:
return "", fmt.Errorf("unknown span kind: %v", jaegerKind)
}
}

func makeKind(s string) (pdata.SpanKind, error) {
switch s {
case "SPAN_KIND_CLIENT":
case "client":
return pdata.SpanKindClient, nil
case "SPAN_KIND_SERVER":
case "server":
return pdata.SpanKindServer, nil
case "SPAN_KIND_INTERNAL":
case "internal":
return pdata.SpanKindInternal, nil
case "SPAN_KIND_CONSUMER":
case "consumer":
return pdata.SpanKindConsumer, nil
case "SPAN_KIND_PRODUCER":
case "producer":
return pdata.SpanKindProducer, nil
case "SPAN_KIND_UNSPECIFIED":
case "unspecified":
return pdata.SpanKindUnspecified, nil
default:
return pdata.SpanKindUnspecified, fmt.Errorf("unknown span kind: %s", s)
}
}

func makeStatusCode(s string) (pdata.StatusCode, error) {
switch s {
case "ok":
return pdata.StatusCodeOk, nil
case "error":
return pdata.StatusCodeError, nil
case "unset":
return pdata.StatusCodeUnset, nil
default:
return pdata.StatusCodeUnset, fmt.Errorf("unknown status-code kind: %s", s)
}
}
20 changes: 20 additions & 0 deletions pkg/pgmodel/ingestor/trace/operation_batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/jackc/pgtype"
pgx "github.com/jackc/pgx/v4"
"github.com/timescale/promscale/pkg/pgxconn"
"go.opentelemetry.io/collector/model/pdata"
)

const insertOperationSQL = `SELECT ps_trace.put_operation($1, $2, $3)`
Expand Down Expand Up @@ -77,3 +78,22 @@ func (o operationBatch) GetID(serviceName, spanName, spanKind string) (pgtype.In

return id, nil
}

func getPGKindEnum(pk pdata.SpanKind) (string, error) {
switch pk {
case pdata.SpanKindClient:
return "client", nil
case pdata.SpanKindServer:
return "server", nil
case pdata.SpanKindInternal:
return "internal", nil
case pdata.SpanKindConsumer:
return "consumer", nil
case pdata.SpanKindProducer:
return "producer", nil
case pdata.SpanKindUnspecified:
return "unspecified", nil
default:
return "", fmt.Errorf("unknown span kind: %v", pk)
}
}
30 changes: 27 additions & 3 deletions pkg/pgmodel/ingestor/trace/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,10 @@ func (t *traceWriterImpl) InsertTraces(ctx context.Context, traces pdata.Traces)
for k := 0; k < spans.Len(); k++ {
span := spans.At(k)
spanName := span.Name()
spanKind := span.Kind().String()
spanKind, err := getPGKindEnum(span.Kind())
if err != nil {
return err
}

operationBatch.Queue(serviceName, spanName, spanKind)

Expand Down Expand Up @@ -270,7 +273,10 @@ func (t *traceWriterImpl) InsertTraces(ctx context.Context, traces pdata.Traces)
}
parentSpanID := getSpanID(span.ParentSpanID().Bytes())
spanName := span.Name()
spanKind := span.Kind().String()
spanKind, err := getPGKindEnum(span.Kind())
if err != nil {
return err
}
operationID, err := operationBatch.GetID(serviceName, spanName, spanKind)
if err != nil {
return err
Expand Down Expand Up @@ -307,9 +313,14 @@ func (t *traceWriterImpl) InsertTraces(ctx context.Context, traces pdata.Traces)
maxEndTime = end
}

statusCode, err := getPGStatusCode(span.Status().Code())
if err != nil {
return err
}

spanRows = append(spanRows, []interface{}{traceID, spanID, parentSpanID,
operationID, start, end, float64(end.Sub(start).Nanoseconds()) / float64(1e6), getTraceStateValue(span.TraceState()), jsonTags, span.DroppedAttributesCount(), eventTimeRange, span.DroppedEventsCount(),
span.DroppedLinksCount(), span.Status().Code().String(), span.Status().Message(), instLibID, jsonResourceTags, 0, rSchemaURLID})
span.DroppedLinksCount(), statusCode, span.Status().Message(), instLibID, jsonResourceTags, 0, rSchemaURLID})

}
}
Expand Down Expand Up @@ -465,3 +476,16 @@ func getTraceStateValue(ts pdata.TraceState) (result pgtype.Text) {

return result
}

func getPGStatusCode(pk pdata.StatusCode) (string, error) {
switch pk {
case pdata.StatusCodeOk:
return "ok", nil
case pdata.StatusCodeError:
return "error", nil
case pdata.StatusCodeUnset:
return "unset", nil
default:
return "", fmt.Errorf("unknown status code: %v", pk)
}
}
6 changes: 3 additions & 3 deletions pkg/tests/end_to_end_tests/trace_delete_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func TestDeleteSpans(t *testing.T) {
insert into _ps_trace.instrumentation_lib (name, version, schema_url_id)
select 'inst_lib_1', '1.0.0', (select id from _ps_trace.schema_url where url = 'fake.url.com' limit 1);
select ps_trace.put_operation('my.service.name', 'my.span.name', 'SPAN_KIND_UNSPECIFIED');
select ps_trace.put_operation('my.service.name', 'my.span.name', 'unspecified');
select ps_trace.put_tag_key('my.tag.key', 1::ps_trace.tag_type);
Expand All @@ -49,14 +49,14 @@ func TestDeleteSpans(t *testing.T) {
now(),
0,
'{}'::jsonb::tag_map,
'STATUS_CODE_OK',
'ok',
'{}'::jsonb::tag_map,
-1
);
INSERT INTO _ps_trace.link
(
trace_id, span_id, span_start_time, linked_trace_id, linked_span_id, link_nbr, trace_state,
trace_id, span_id, span_start_time, linked_trace_id, linked_span_id, link_nbr, trace_state,
tags, dropped_tags_count
)
SELECT
Expand Down
Loading

0 comments on commit 7b85613

Please sign in to comment.