Joey Armstrong | 5f51f2e | 2023-01-17 17:06:26 -0500 | [diff] [blame] | 1 | // Copyright The OpenTelemetry Authors |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | package label |
| 16 | |
| 17 | import ( |
| 18 | "encoding/json" |
| 19 | "reflect" |
| 20 | "sort" |
| 21 | "sync" |
| 22 | ) |
| 23 | |
| 24 | type ( |
| 25 | // Set is the representation for a distinct label set. It |
| 26 | // manages an immutable set of labels, with an internal cache |
| 27 | // for storing label encodings. |
| 28 | // |
| 29 | // This type supports the `Equivalent` method of comparison |
| 30 | // using values of type `Distinct`. |
| 31 | // |
| 32 | // This type is used to implement: |
| 33 | // 1. Metric labels |
| 34 | // 2. Resource sets |
| 35 | // 3. Correlation map (TODO) |
| 36 | Set struct { |
| 37 | equivalent Distinct |
| 38 | |
| 39 | lock sync.Mutex |
| 40 | encoders [maxConcurrentEncoders]EncoderID |
| 41 | encoded [maxConcurrentEncoders]string |
| 42 | } |
| 43 | |
| 44 | // Distinct wraps a variable-size array of `KeyValue`, |
| 45 | // constructed with keys in sorted order. This can be used as |
| 46 | // a map key or for equality checking between Sets. |
| 47 | Distinct struct { |
| 48 | iface interface{} |
| 49 | } |
| 50 | |
| 51 | // Filter supports removing certain labels from label sets. |
| 52 | // When the filter returns true, the label will be kept in |
| 53 | // the filtered label set. When the filter returns false, the |
| 54 | // label is excluded from the filtered label set, and the |
| 55 | // label instead appears in the `removed` list of excluded labels. |
| 56 | Filter func(KeyValue) bool |
| 57 | |
| 58 | // Sortable implements `sort.Interface`, used for sorting |
| 59 | // `KeyValue`. This is an exported type to support a |
| 60 | // memory optimization. A pointer to one of these is needed |
| 61 | // for the call to `sort.Stable()`, which the caller may |
| 62 | // provide in order to avoid an allocation. See |
| 63 | // `NewSetWithSortable()`. |
| 64 | Sortable []KeyValue |
| 65 | ) |
| 66 | |
| 67 | var ( |
| 68 | // keyValueType is used in `computeDistinctReflect`. |
| 69 | keyValueType = reflect.TypeOf(KeyValue{}) |
| 70 | |
| 71 | // emptySet is returned for empty label sets. |
| 72 | emptySet = &Set{ |
| 73 | equivalent: Distinct{ |
| 74 | iface: [0]KeyValue{}, |
| 75 | }, |
| 76 | } |
| 77 | ) |
| 78 | |
| 79 | const maxConcurrentEncoders = 3 |
| 80 | |
| 81 | func EmptySet() *Set { |
| 82 | return emptySet |
| 83 | } |
| 84 | |
| 85 | // reflect abbreviates `reflect.ValueOf`. |
| 86 | func (d Distinct) reflect() reflect.Value { |
| 87 | return reflect.ValueOf(d.iface) |
| 88 | } |
| 89 | |
| 90 | // Valid returns true if this value refers to a valid `*Set`. |
| 91 | func (d Distinct) Valid() bool { |
| 92 | return d.iface != nil |
| 93 | } |
| 94 | |
| 95 | // Len returns the number of labels in this set. |
| 96 | func (l *Set) Len() int { |
| 97 | if l == nil || !l.equivalent.Valid() { |
| 98 | return 0 |
| 99 | } |
| 100 | return l.equivalent.reflect().Len() |
| 101 | } |
| 102 | |
| 103 | // Get returns the KeyValue at ordered position `idx` in this set. |
| 104 | func (l *Set) Get(idx int) (KeyValue, bool) { |
| 105 | if l == nil { |
| 106 | return KeyValue{}, false |
| 107 | } |
| 108 | value := l.equivalent.reflect() |
| 109 | |
| 110 | if idx >= 0 && idx < value.Len() { |
| 111 | // Note: The Go compiler successfully avoids an allocation for |
| 112 | // the interface{} conversion here: |
| 113 | return value.Index(idx).Interface().(KeyValue), true |
| 114 | } |
| 115 | |
| 116 | return KeyValue{}, false |
| 117 | } |
| 118 | |
| 119 | // Value returns the value of a specified key in this set. |
| 120 | func (l *Set) Value(k Key) (Value, bool) { |
| 121 | if l == nil { |
| 122 | return Value{}, false |
| 123 | } |
| 124 | rValue := l.equivalent.reflect() |
| 125 | vlen := rValue.Len() |
| 126 | |
| 127 | idx := sort.Search(vlen, func(idx int) bool { |
| 128 | return rValue.Index(idx).Interface().(KeyValue).Key >= k |
| 129 | }) |
| 130 | if idx >= vlen { |
| 131 | return Value{}, false |
| 132 | } |
| 133 | keyValue := rValue.Index(idx).Interface().(KeyValue) |
| 134 | if k == keyValue.Key { |
| 135 | return keyValue.Value, true |
| 136 | } |
| 137 | return Value{}, false |
| 138 | } |
| 139 | |
| 140 | // HasValue tests whether a key is defined in this set. |
| 141 | func (l *Set) HasValue(k Key) bool { |
| 142 | if l == nil { |
| 143 | return false |
| 144 | } |
| 145 | _, ok := l.Value(k) |
| 146 | return ok |
| 147 | } |
| 148 | |
| 149 | // Iter returns an iterator for visiting the labels in this set. |
| 150 | func (l *Set) Iter() Iterator { |
| 151 | return Iterator{ |
| 152 | storage: l, |
| 153 | idx: -1, |
| 154 | } |
| 155 | } |
| 156 | |
| 157 | // ToSlice returns the set of labels belonging to this set, sorted, |
| 158 | // where keys appear no more than once. |
| 159 | func (l *Set) ToSlice() []KeyValue { |
| 160 | iter := l.Iter() |
| 161 | return iter.ToSlice() |
| 162 | } |
| 163 | |
| 164 | // Equivalent returns a value that may be used as a map key. The |
| 165 | // Distinct type guarantees that the result will equal the equivalent |
| 166 | // Distinct value of any label set with the same elements as this, |
| 167 | // where sets are made unique by choosing the last value in the input |
| 168 | // for any given key. |
| 169 | func (l *Set) Equivalent() Distinct { |
| 170 | if l == nil || !l.equivalent.Valid() { |
| 171 | return emptySet.equivalent |
| 172 | } |
| 173 | return l.equivalent |
| 174 | } |
| 175 | |
| 176 | // Equals returns true if the argument set is equivalent to this set. |
| 177 | func (l *Set) Equals(o *Set) bool { |
| 178 | return l.Equivalent() == o.Equivalent() |
| 179 | } |
| 180 | |
| 181 | // Encoded returns the encoded form of this set, according to |
| 182 | // `encoder`. The result will be cached in this `*Set`. |
| 183 | func (l *Set) Encoded(encoder Encoder) string { |
| 184 | if l == nil || encoder == nil { |
| 185 | return "" |
| 186 | } |
| 187 | |
| 188 | id := encoder.ID() |
| 189 | if !id.Valid() { |
| 190 | // Invalid IDs are not cached. |
| 191 | return encoder.Encode(l.Iter()) |
| 192 | } |
| 193 | |
| 194 | var lookup *string |
| 195 | l.lock.Lock() |
| 196 | for idx := 0; idx < maxConcurrentEncoders; idx++ { |
| 197 | if l.encoders[idx] == id { |
| 198 | lookup = &l.encoded[idx] |
| 199 | break |
| 200 | } |
| 201 | } |
| 202 | l.lock.Unlock() |
| 203 | |
| 204 | if lookup != nil { |
| 205 | return *lookup |
| 206 | } |
| 207 | |
| 208 | r := encoder.Encode(l.Iter()) |
| 209 | |
| 210 | l.lock.Lock() |
| 211 | defer l.lock.Unlock() |
| 212 | |
| 213 | for idx := 0; idx < maxConcurrentEncoders; idx++ { |
| 214 | if l.encoders[idx] == id { |
| 215 | return l.encoded[idx] |
| 216 | } |
| 217 | if !l.encoders[idx].Valid() { |
| 218 | l.encoders[idx] = id |
| 219 | l.encoded[idx] = r |
| 220 | return r |
| 221 | } |
| 222 | } |
| 223 | |
| 224 | // TODO: This is a performance cliff. Find a way for this to |
| 225 | // generate a warning. |
| 226 | return r |
| 227 | } |
| 228 | |
| 229 | func empty() Set { |
| 230 | return Set{ |
| 231 | equivalent: emptySet.equivalent, |
| 232 | } |
| 233 | } |
| 234 | |
| 235 | // NewSet returns a new `Set`. See the documentation for |
| 236 | // `NewSetWithSortableFiltered` for more details. |
| 237 | // |
| 238 | // Except for empty sets, this method adds an additional allocation |
| 239 | // compared with calls that include a `*Sortable`. |
| 240 | func NewSet(kvs ...KeyValue) Set { |
| 241 | // Check for empty set. |
| 242 | if len(kvs) == 0 { |
| 243 | return empty() |
| 244 | } |
| 245 | s, _ := NewSetWithSortableFiltered(kvs, new(Sortable), nil) |
| 246 | return s //nolint |
| 247 | } |
| 248 | |
| 249 | // NewSetWithSortable returns a new `Set`. See the documentation for |
| 250 | // `NewSetWithSortableFiltered` for more details. |
| 251 | // |
| 252 | // This call includes a `*Sortable` option as a memory optimization. |
| 253 | func NewSetWithSortable(kvs []KeyValue, tmp *Sortable) Set { |
| 254 | // Check for empty set. |
| 255 | if len(kvs) == 0 { |
| 256 | return empty() |
| 257 | } |
| 258 | s, _ := NewSetWithSortableFiltered(kvs, tmp, nil) |
| 259 | return s //nolint |
| 260 | } |
| 261 | |
| 262 | // NewSetWithFiltered returns a new `Set`. See the documentation for |
| 263 | // `NewSetWithSortableFiltered` for more details. |
| 264 | // |
| 265 | // This call includes a `Filter` to include/exclude label keys from |
| 266 | // the return value. Excluded keys are returned as a slice of label |
| 267 | // values. |
| 268 | func NewSetWithFiltered(kvs []KeyValue, filter Filter) (Set, []KeyValue) { |
| 269 | // Check for empty set. |
| 270 | if len(kvs) == 0 { |
| 271 | return empty(), nil |
| 272 | } |
| 273 | return NewSetWithSortableFiltered(kvs, new(Sortable), filter) |
| 274 | } |
| 275 | |
| 276 | // NewSetWithSortableFiltered returns a new `Set`. |
| 277 | // |
| 278 | // Duplicate keys are eliminated by taking the last value. This |
| 279 | // re-orders the input slice so that unique last-values are contiguous |
| 280 | // at the end of the slice. |
| 281 | // |
| 282 | // This ensures the following: |
| 283 | // |
| 284 | // - Last-value-wins semantics |
| 285 | // - Caller sees the reordering, but doesn't lose values |
| 286 | // - Repeated call preserve last-value wins. |
| 287 | // |
| 288 | // Note that methods are defined on `*Set`, although this returns `Set`. |
| 289 | // Callers can avoid memory allocations by: |
| 290 | // |
| 291 | // - allocating a `Sortable` for use as a temporary in this method |
| 292 | // - allocating a `Set` for storing the return value of this |
| 293 | // constructor. |
| 294 | // |
| 295 | // The result maintains a cache of encoded labels, by label.EncoderID. |
| 296 | // This value should not be copied after its first use. |
| 297 | // |
| 298 | // The second `[]KeyValue` return value is a list of labels that were |
| 299 | // excluded by the Filter (if non-nil). |
| 300 | func NewSetWithSortableFiltered(kvs []KeyValue, tmp *Sortable, filter Filter) (Set, []KeyValue) { |
| 301 | // Check for empty set. |
| 302 | if len(kvs) == 0 { |
| 303 | return empty(), nil |
| 304 | } |
| 305 | |
| 306 | *tmp = kvs |
| 307 | |
| 308 | // Stable sort so the following de-duplication can implement |
| 309 | // last-value-wins semantics. |
| 310 | sort.Stable(tmp) |
| 311 | |
| 312 | *tmp = nil |
| 313 | |
| 314 | position := len(kvs) - 1 |
| 315 | offset := position - 1 |
| 316 | |
| 317 | // The requirements stated above require that the stable |
| 318 | // result be placed in the end of the input slice, while |
| 319 | // overwritten values are swapped to the beginning. |
| 320 | // |
| 321 | // De-duplicate with last-value-wins semantics. Preserve |
| 322 | // duplicate values at the beginning of the input slice. |
| 323 | for ; offset >= 0; offset-- { |
| 324 | if kvs[offset].Key == kvs[position].Key { |
| 325 | continue |
| 326 | } |
| 327 | position-- |
| 328 | kvs[offset], kvs[position] = kvs[position], kvs[offset] |
| 329 | } |
| 330 | if filter != nil { |
| 331 | return filterSet(kvs[position:], filter) |
| 332 | } |
| 333 | return Set{ |
| 334 | equivalent: computeDistinct(kvs[position:]), |
| 335 | }, nil |
| 336 | } |
| 337 | |
| 338 | // filterSet reorders `kvs` so that included keys are contiguous at |
| 339 | // the end of the slice, while excluded keys precede the included keys. |
| 340 | func filterSet(kvs []KeyValue, filter Filter) (Set, []KeyValue) { |
| 341 | var excluded []KeyValue |
| 342 | |
| 343 | // Move labels that do not match the filter so |
| 344 | // they're adjacent before calling computeDistinct(). |
| 345 | distinctPosition := len(kvs) |
| 346 | |
| 347 | // Swap indistinct keys forward and distinct keys toward the |
| 348 | // end of the slice. |
| 349 | offset := len(kvs) - 1 |
| 350 | for ; offset >= 0; offset-- { |
| 351 | if filter(kvs[offset]) { |
| 352 | distinctPosition-- |
| 353 | kvs[offset], kvs[distinctPosition] = kvs[distinctPosition], kvs[offset] |
| 354 | continue |
| 355 | } |
| 356 | } |
| 357 | excluded = kvs[:distinctPosition] |
| 358 | |
| 359 | return Set{ |
| 360 | equivalent: computeDistinct(kvs[distinctPosition:]), |
| 361 | }, excluded |
| 362 | } |
| 363 | |
| 364 | // Filter returns a filtered copy of this `Set`. See the |
| 365 | // documentation for `NewSetWithSortableFiltered` for more details. |
| 366 | func (l *Set) Filter(re Filter) (Set, []KeyValue) { |
| 367 | if re == nil { |
| 368 | return Set{ |
| 369 | equivalent: l.equivalent, |
| 370 | }, nil |
| 371 | } |
| 372 | |
| 373 | // Note: This could be refactored to avoid the temporary slice |
| 374 | // allocation, if it proves to be expensive. |
| 375 | return filterSet(l.ToSlice(), re) |
| 376 | } |
| 377 | |
| 378 | // computeDistinct returns a `Distinct` using either the fixed- or |
| 379 | // reflect-oriented code path, depending on the size of the input. |
| 380 | // The input slice is assumed to already be sorted and de-duplicated. |
| 381 | func computeDistinct(kvs []KeyValue) Distinct { |
| 382 | iface := computeDistinctFixed(kvs) |
| 383 | if iface == nil { |
| 384 | iface = computeDistinctReflect(kvs) |
| 385 | } |
| 386 | return Distinct{ |
| 387 | iface: iface, |
| 388 | } |
| 389 | } |
| 390 | |
| 391 | // computeDistinctFixed computes a `Distinct` for small slices. It |
| 392 | // returns nil if the input is too large for this code path. |
| 393 | func computeDistinctFixed(kvs []KeyValue) interface{} { |
| 394 | switch len(kvs) { |
| 395 | case 1: |
| 396 | ptr := new([1]KeyValue) |
| 397 | copy((*ptr)[:], kvs) |
| 398 | return *ptr |
| 399 | case 2: |
| 400 | ptr := new([2]KeyValue) |
| 401 | copy((*ptr)[:], kvs) |
| 402 | return *ptr |
| 403 | case 3: |
| 404 | ptr := new([3]KeyValue) |
| 405 | copy((*ptr)[:], kvs) |
| 406 | return *ptr |
| 407 | case 4: |
| 408 | ptr := new([4]KeyValue) |
| 409 | copy((*ptr)[:], kvs) |
| 410 | return *ptr |
| 411 | case 5: |
| 412 | ptr := new([5]KeyValue) |
| 413 | copy((*ptr)[:], kvs) |
| 414 | return *ptr |
| 415 | case 6: |
| 416 | ptr := new([6]KeyValue) |
| 417 | copy((*ptr)[:], kvs) |
| 418 | return *ptr |
| 419 | case 7: |
| 420 | ptr := new([7]KeyValue) |
| 421 | copy((*ptr)[:], kvs) |
| 422 | return *ptr |
| 423 | case 8: |
| 424 | ptr := new([8]KeyValue) |
| 425 | copy((*ptr)[:], kvs) |
| 426 | return *ptr |
| 427 | case 9: |
| 428 | ptr := new([9]KeyValue) |
| 429 | copy((*ptr)[:], kvs) |
| 430 | return *ptr |
| 431 | case 10: |
| 432 | ptr := new([10]KeyValue) |
| 433 | copy((*ptr)[:], kvs) |
| 434 | return *ptr |
| 435 | default: |
| 436 | return nil |
| 437 | } |
| 438 | } |
| 439 | |
| 440 | // computeDistinctReflect computes a `Distinct` using reflection, |
| 441 | // works for any size input. |
| 442 | func computeDistinctReflect(kvs []KeyValue) interface{} { |
| 443 | at := reflect.New(reflect.ArrayOf(len(kvs), keyValueType)).Elem() |
| 444 | for i, keyValue := range kvs { |
| 445 | *(at.Index(i).Addr().Interface().(*KeyValue)) = keyValue |
| 446 | } |
| 447 | return at.Interface() |
| 448 | } |
| 449 | |
| 450 | // MarshalJSON returns the JSON encoding of the `*Set`. |
| 451 | func (l *Set) MarshalJSON() ([]byte, error) { |
| 452 | return json.Marshal(l.equivalent.iface) |
| 453 | } |
| 454 | |
| 455 | // Len implements `sort.Interface`. |
| 456 | func (l *Sortable) Len() int { |
| 457 | return len(*l) |
| 458 | } |
| 459 | |
| 460 | // Swap implements `sort.Interface`. |
| 461 | func (l *Sortable) Swap(i, j int) { |
| 462 | (*l)[i], (*l)[j] = (*l)[j], (*l)[i] |
| 463 | } |
| 464 | |
| 465 | // Less implements `sort.Interface`. |
| 466 | func (l *Sortable) Less(i, j int) bool { |
| 467 | return (*l)[i].Key < (*l)[j].Key |
| 468 | } |