blob: 7a8a1e0990143b5ccde82f9fd59a8a787ed5f606 [file] [log] [blame]
sslobodrd046be82019-01-16 10:02:22 -05001// Copyright 2018 The Prometheus Authors
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14package procfs
15
16// While implementing parsing of /proc/[pid]/mountstats, this blog was used
17// heavily as a reference:
18// https://utcc.utoronto.ca/~cks/space/blog/linux/NFSMountstatsIndex
19//
20// Special thanks to Chris Siebenmann for all of his posts explaining the
21// various statistics available for NFS.
22
23import (
24 "bufio"
25 "fmt"
26 "io"
27 "strconv"
28 "strings"
29 "time"
30)
31
32// Constants shared between multiple functions.
33const (
34 deviceEntryLen = 8
35
36 fieldBytesLen = 8
37 fieldEventsLen = 27
38
39 statVersion10 = "1.0"
40 statVersion11 = "1.1"
41
42 fieldTransport10TCPLen = 10
43 fieldTransport10UDPLen = 7
44
45 fieldTransport11TCPLen = 13
46 fieldTransport11UDPLen = 10
47)
48
49// A Mount is a device mount parsed from /proc/[pid]/mountstats.
50type Mount struct {
51 // Name of the device.
52 Device string
53 // The mount point of the device.
54 Mount string
55 // The filesystem type used by the device.
56 Type string
57 // If available additional statistics related to this Mount.
58 // Use a type assertion to determine if additional statistics are available.
59 Stats MountStats
60}
61
62// A MountStats is a type which contains detailed statistics for a specific
63// type of Mount.
64type MountStats interface {
65 mountStats()
66}
67
68// A MountStatsNFS is a MountStats implementation for NFSv3 and v4 mounts.
69type MountStatsNFS struct {
70 // The version of statistics provided.
71 StatVersion string
72 // The age of the NFS mount.
73 Age time.Duration
74 // Statistics related to byte counters for various operations.
75 Bytes NFSBytesStats
76 // Statistics related to various NFS event occurrences.
77 Events NFSEventsStats
78 // Statistics broken down by filesystem operation.
79 Operations []NFSOperationStats
80 // Statistics about the NFS RPC transport.
81 Transport NFSTransportStats
82}
83
84// mountStats implements MountStats.
85func (m MountStatsNFS) mountStats() {}
86
87// A NFSBytesStats contains statistics about the number of bytes read and written
88// by an NFS client to and from an NFS server.
89type NFSBytesStats struct {
90 // Number of bytes read using the read() syscall.
91 Read uint64
92 // Number of bytes written using the write() syscall.
93 Write uint64
94 // Number of bytes read using the read() syscall in O_DIRECT mode.
95 DirectRead uint64
96 // Number of bytes written using the write() syscall in O_DIRECT mode.
97 DirectWrite uint64
98 // Number of bytes read from the NFS server, in total.
99 ReadTotal uint64
100 // Number of bytes written to the NFS server, in total.
101 WriteTotal uint64
102 // Number of pages read directly via mmap()'d files.
103 ReadPages uint64
104 // Number of pages written directly via mmap()'d files.
105 WritePages uint64
106}
107
108// A NFSEventsStats contains statistics about NFS event occurrences.
109type NFSEventsStats struct {
110 // Number of times cached inode attributes are re-validated from the server.
111 InodeRevalidate uint64
112 // Number of times cached dentry nodes are re-validated from the server.
113 DnodeRevalidate uint64
114 // Number of times an inode cache is cleared.
115 DataInvalidate uint64
116 // Number of times cached inode attributes are invalidated.
117 AttributeInvalidate uint64
118 // Number of times files or directories have been open()'d.
119 VFSOpen uint64
120 // Number of times a directory lookup has occurred.
121 VFSLookup uint64
122 // Number of times permissions have been checked.
123 VFSAccess uint64
124 // Number of updates (and potential writes) to pages.
125 VFSUpdatePage uint64
126 // Number of pages read directly via mmap()'d files.
127 VFSReadPage uint64
128 // Number of times a group of pages have been read.
129 VFSReadPages uint64
130 // Number of pages written directly via mmap()'d files.
131 VFSWritePage uint64
132 // Number of times a group of pages have been written.
133 VFSWritePages uint64
134 // Number of times directory entries have been read with getdents().
135 VFSGetdents uint64
136 // Number of times attributes have been set on inodes.
137 VFSSetattr uint64
138 // Number of pending writes that have been forcefully flushed to the server.
139 VFSFlush uint64
140 // Number of times fsync() has been called on directories and files.
141 VFSFsync uint64
142 // Number of times locking has been attempted on a file.
143 VFSLock uint64
144 // Number of times files have been closed and released.
145 VFSFileRelease uint64
146 // Unknown. Possibly unused.
147 CongestionWait uint64
148 // Number of times files have been truncated.
149 Truncation uint64
150 // Number of times a file has been grown due to writes beyond its existing end.
151 WriteExtension uint64
152 // Number of times a file was removed while still open by another process.
153 SillyRename uint64
154 // Number of times the NFS server gave less data than expected while reading.
155 ShortRead uint64
156 // Number of times the NFS server wrote less data than expected while writing.
157 ShortWrite uint64
158 // Number of times the NFS server indicated EJUKEBOX; retrieving data from
159 // offline storage.
160 JukeboxDelay uint64
161 // Number of NFS v4.1+ pNFS reads.
162 PNFSRead uint64
163 // Number of NFS v4.1+ pNFS writes.
164 PNFSWrite uint64
165}
166
167// A NFSOperationStats contains statistics for a single operation.
168type NFSOperationStats struct {
169 // The name of the operation.
170 Operation string
171 // Number of requests performed for this operation.
172 Requests uint64
173 // Number of times an actual RPC request has been transmitted for this operation.
174 Transmissions uint64
175 // Number of times a request has had a major timeout.
176 MajorTimeouts uint64
177 // Number of bytes sent for this operation, including RPC headers and payload.
178 BytesSent uint64
179 // Number of bytes received for this operation, including RPC headers and payload.
180 BytesReceived uint64
181 // Duration all requests spent queued for transmission before they were sent.
182 CumulativeQueueTime time.Duration
183 // Duration it took to get a reply back after the request was transmitted.
184 CumulativeTotalResponseTime time.Duration
185 // Duration from when a request was enqueued to when it was completely handled.
186 CumulativeTotalRequestTime time.Duration
187}
188
189// A NFSTransportStats contains statistics for the NFS mount RPC requests and
190// responses.
191type NFSTransportStats struct {
192 // The transport protocol used for the NFS mount.
193 Protocol string
194 // The local port used for the NFS mount.
195 Port uint64
196 // Number of times the client has had to establish a connection from scratch
197 // to the NFS server.
198 Bind uint64
199 // Number of times the client has made a TCP connection to the NFS server.
200 Connect uint64
201 // Duration (in jiffies, a kernel internal unit of time) the NFS mount has
202 // spent waiting for connections to the server to be established.
203 ConnectIdleTime uint64
204 // Duration since the NFS mount last saw any RPC traffic.
205 IdleTime time.Duration
206 // Number of RPC requests for this mount sent to the NFS server.
207 Sends uint64
208 // Number of RPC responses for this mount received from the NFS server.
209 Receives uint64
210 // Number of times the NFS server sent a response with a transaction ID
211 // unknown to this client.
212 BadTransactionIDs uint64
213 // A running counter, incremented on each request as the current difference
214 // ebetween sends and receives.
215 CumulativeActiveRequests uint64
216 // A running counter, incremented on each request by the current backlog
217 // queue size.
218 CumulativeBacklog uint64
219
220 // Stats below only available with stat version 1.1.
221
222 // Maximum number of simultaneously active RPC requests ever used.
223 MaximumRPCSlotsUsed uint64
224 // A running counter, incremented on each request as the current size of the
225 // sending queue.
226 CumulativeSendingQueue uint64
227 // A running counter, incremented on each request as the current size of the
228 // pending queue.
229 CumulativePendingQueue uint64
230}
231
232// parseMountStats parses a /proc/[pid]/mountstats file and returns a slice
233// of Mount structures containing detailed information about each mount.
234// If available, statistics for each mount are parsed as well.
235func parseMountStats(r io.Reader) ([]*Mount, error) {
236 const (
237 device = "device"
238 statVersionPrefix = "statvers="
239
240 nfs3Type = "nfs"
241 nfs4Type = "nfs4"
242 )
243
244 var mounts []*Mount
245
246 s := bufio.NewScanner(r)
247 for s.Scan() {
248 // Only look for device entries in this function
249 ss := strings.Fields(string(s.Bytes()))
250 if len(ss) == 0 || ss[0] != device {
251 continue
252 }
253
254 m, err := parseMount(ss)
255 if err != nil {
256 return nil, err
257 }
258
259 // Does this mount also possess statistics information?
260 if len(ss) > deviceEntryLen {
261 // Only NFSv3 and v4 are supported for parsing statistics
262 if m.Type != nfs3Type && m.Type != nfs4Type {
263 return nil, fmt.Errorf("cannot parse MountStats for fstype %q", m.Type)
264 }
265
266 statVersion := strings.TrimPrefix(ss[8], statVersionPrefix)
267
268 stats, err := parseMountStatsNFS(s, statVersion)
269 if err != nil {
270 return nil, err
271 }
272
273 m.Stats = stats
274 }
275
276 mounts = append(mounts, m)
277 }
278
279 return mounts, s.Err()
280}
281
282// parseMount parses an entry in /proc/[pid]/mountstats in the format:
283// device [device] mounted on [mount] with fstype [type]
284func parseMount(ss []string) (*Mount, error) {
285 if len(ss) < deviceEntryLen {
286 return nil, fmt.Errorf("invalid device entry: %v", ss)
287 }
288
289 // Check for specific words appearing at specific indices to ensure
290 // the format is consistent with what we expect
291 format := []struct {
292 i int
293 s string
294 }{
295 {i: 0, s: "device"},
296 {i: 2, s: "mounted"},
297 {i: 3, s: "on"},
298 {i: 5, s: "with"},
299 {i: 6, s: "fstype"},
300 }
301
302 for _, f := range format {
303 if ss[f.i] != f.s {
304 return nil, fmt.Errorf("invalid device entry: %v", ss)
305 }
306 }
307
308 return &Mount{
309 Device: ss[1],
310 Mount: ss[4],
311 Type: ss[7],
312 }, nil
313}
314
315// parseMountStatsNFS parses a MountStatsNFS by scanning additional information
316// related to NFS statistics.
317func parseMountStatsNFS(s *bufio.Scanner, statVersion string) (*MountStatsNFS, error) {
318 // Field indicators for parsing specific types of data
319 const (
320 fieldAge = "age:"
321 fieldBytes = "bytes:"
322 fieldEvents = "events:"
323 fieldPerOpStats = "per-op"
324 fieldTransport = "xprt:"
325 )
326
327 stats := &MountStatsNFS{
328 StatVersion: statVersion,
329 }
330
331 for s.Scan() {
332 ss := strings.Fields(string(s.Bytes()))
333 if len(ss) == 0 {
334 break
335 }
336 if len(ss) < 2 {
337 return nil, fmt.Errorf("not enough information for NFS stats: %v", ss)
338 }
339
340 switch ss[0] {
341 case fieldAge:
342 // Age integer is in seconds
343 d, err := time.ParseDuration(ss[1] + "s")
344 if err != nil {
345 return nil, err
346 }
347
348 stats.Age = d
349 case fieldBytes:
350 bstats, err := parseNFSBytesStats(ss[1:])
351 if err != nil {
352 return nil, err
353 }
354
355 stats.Bytes = *bstats
356 case fieldEvents:
357 estats, err := parseNFSEventsStats(ss[1:])
358 if err != nil {
359 return nil, err
360 }
361
362 stats.Events = *estats
363 case fieldTransport:
364 if len(ss) < 3 {
365 return nil, fmt.Errorf("not enough information for NFS transport stats: %v", ss)
366 }
367
368 tstats, err := parseNFSTransportStats(ss[1:], statVersion)
369 if err != nil {
370 return nil, err
371 }
372
373 stats.Transport = *tstats
374 }
375
376 // When encountering "per-operation statistics", we must break this
377 // loop and parse them separately to ensure we can terminate parsing
378 // before reaching another device entry; hence why this 'if' statement
379 // is not just another switch case
380 if ss[0] == fieldPerOpStats {
381 break
382 }
383 }
384
385 if err := s.Err(); err != nil {
386 return nil, err
387 }
388
389 // NFS per-operation stats appear last before the next device entry
390 perOpStats, err := parseNFSOperationStats(s)
391 if err != nil {
392 return nil, err
393 }
394
395 stats.Operations = perOpStats
396
397 return stats, nil
398}
399
400// parseNFSBytesStats parses a NFSBytesStats line using an input set of
401// integer fields.
402func parseNFSBytesStats(ss []string) (*NFSBytesStats, error) {
403 if len(ss) != fieldBytesLen {
404 return nil, fmt.Errorf("invalid NFS bytes stats: %v", ss)
405 }
406
407 ns := make([]uint64, 0, fieldBytesLen)
408 for _, s := range ss {
409 n, err := strconv.ParseUint(s, 10, 64)
410 if err != nil {
411 return nil, err
412 }
413
414 ns = append(ns, n)
415 }
416
417 return &NFSBytesStats{
418 Read: ns[0],
419 Write: ns[1],
420 DirectRead: ns[2],
421 DirectWrite: ns[3],
422 ReadTotal: ns[4],
423 WriteTotal: ns[5],
424 ReadPages: ns[6],
425 WritePages: ns[7],
426 }, nil
427}
428
429// parseNFSEventsStats parses a NFSEventsStats line using an input set of
430// integer fields.
431func parseNFSEventsStats(ss []string) (*NFSEventsStats, error) {
432 if len(ss) != fieldEventsLen {
433 return nil, fmt.Errorf("invalid NFS events stats: %v", ss)
434 }
435
436 ns := make([]uint64, 0, fieldEventsLen)
437 for _, s := range ss {
438 n, err := strconv.ParseUint(s, 10, 64)
439 if err != nil {
440 return nil, err
441 }
442
443 ns = append(ns, n)
444 }
445
446 return &NFSEventsStats{
447 InodeRevalidate: ns[0],
448 DnodeRevalidate: ns[1],
449 DataInvalidate: ns[2],
450 AttributeInvalidate: ns[3],
451 VFSOpen: ns[4],
452 VFSLookup: ns[5],
453 VFSAccess: ns[6],
454 VFSUpdatePage: ns[7],
455 VFSReadPage: ns[8],
456 VFSReadPages: ns[9],
457 VFSWritePage: ns[10],
458 VFSWritePages: ns[11],
459 VFSGetdents: ns[12],
460 VFSSetattr: ns[13],
461 VFSFlush: ns[14],
462 VFSFsync: ns[15],
463 VFSLock: ns[16],
464 VFSFileRelease: ns[17],
465 CongestionWait: ns[18],
466 Truncation: ns[19],
467 WriteExtension: ns[20],
468 SillyRename: ns[21],
469 ShortRead: ns[22],
470 ShortWrite: ns[23],
471 JukeboxDelay: ns[24],
472 PNFSRead: ns[25],
473 PNFSWrite: ns[26],
474 }, nil
475}
476
477// parseNFSOperationStats parses a slice of NFSOperationStats by scanning
478// additional information about per-operation statistics until an empty
479// line is reached.
480func parseNFSOperationStats(s *bufio.Scanner) ([]NFSOperationStats, error) {
481 const (
482 // Number of expected fields in each per-operation statistics set
483 numFields = 9
484 )
485
486 var ops []NFSOperationStats
487
488 for s.Scan() {
489 ss := strings.Fields(string(s.Bytes()))
490 if len(ss) == 0 {
491 // Must break when reading a blank line after per-operation stats to
492 // enable top-level function to parse the next device entry
493 break
494 }
495
496 if len(ss) != numFields {
497 return nil, fmt.Errorf("invalid NFS per-operations stats: %v", ss)
498 }
499
500 // Skip string operation name for integers
501 ns := make([]uint64, 0, numFields-1)
502 for _, st := range ss[1:] {
503 n, err := strconv.ParseUint(st, 10, 64)
504 if err != nil {
505 return nil, err
506 }
507
508 ns = append(ns, n)
509 }
510
511 ops = append(ops, NFSOperationStats{
512 Operation: strings.TrimSuffix(ss[0], ":"),
513 Requests: ns[0],
514 Transmissions: ns[1],
515 MajorTimeouts: ns[2],
516 BytesSent: ns[3],
517 BytesReceived: ns[4],
518 CumulativeQueueTime: time.Duration(ns[5]) * time.Millisecond,
519 CumulativeTotalResponseTime: time.Duration(ns[6]) * time.Millisecond,
520 CumulativeTotalRequestTime: time.Duration(ns[7]) * time.Millisecond,
521 })
522 }
523
524 return ops, s.Err()
525}
526
527// parseNFSTransportStats parses a NFSTransportStats line using an input set of
528// integer fields matched to a specific stats version.
529func parseNFSTransportStats(ss []string, statVersion string) (*NFSTransportStats, error) {
530 // Extract the protocol field. It is the only string value in the line
531 protocol := ss[0]
532 ss = ss[1:]
533
534 switch statVersion {
535 case statVersion10:
536 var expectedLength int
537 if protocol == "tcp" {
538 expectedLength = fieldTransport10TCPLen
539 } else if protocol == "udp" {
540 expectedLength = fieldTransport10UDPLen
541 } else {
542 return nil, fmt.Errorf("invalid NFS protocol \"%s\" in stats 1.0 statement: %v", protocol, ss)
543 }
544 if len(ss) != expectedLength {
545 return nil, fmt.Errorf("invalid NFS transport stats 1.0 statement: %v", ss)
546 }
547 case statVersion11:
548 var expectedLength int
549 if protocol == "tcp" {
550 expectedLength = fieldTransport11TCPLen
551 } else if protocol == "udp" {
552 expectedLength = fieldTransport11UDPLen
553 } else {
554 return nil, fmt.Errorf("invalid NFS protocol \"%s\" in stats 1.1 statement: %v", protocol, ss)
555 }
556 if len(ss) != expectedLength {
557 return nil, fmt.Errorf("invalid NFS transport stats 1.1 statement: %v", ss)
558 }
559 default:
560 return nil, fmt.Errorf("unrecognized NFS transport stats version: %q", statVersion)
561 }
562
563 // Allocate enough for v1.1 stats since zero value for v1.1 stats will be okay
564 // in a v1.0 response. Since the stat length is bigger for TCP stats, we use
565 // the TCP length here.
566 //
567 // Note: slice length must be set to length of v1.1 stats to avoid a panic when
568 // only v1.0 stats are present.
569 // See: https://github.com/prometheus/node_exporter/issues/571.
570 ns := make([]uint64, fieldTransport11TCPLen)
571 for i, s := range ss {
572 n, err := strconv.ParseUint(s, 10, 64)
573 if err != nil {
574 return nil, err
575 }
576
577 ns[i] = n
578 }
579
580 // The fields differ depending on the transport protocol (TCP or UDP)
581 // From https://utcc.utoronto.ca/%7Ecks/space/blog/linux/NFSMountstatsXprt
582 //
583 // For the udp RPC transport there is no connection count, connect idle time,
584 // or idle time (fields #3, #4, and #5); all other fields are the same. So
585 // we set them to 0 here.
586 if protocol == "udp" {
587 ns = append(ns[:2], append(make([]uint64, 3), ns[2:]...)...)
588 }
589
590 return &NFSTransportStats{
591 Protocol: protocol,
592 Port: ns[0],
593 Bind: ns[1],
594 Connect: ns[2],
595 ConnectIdleTime: ns[3],
596 IdleTime: time.Duration(ns[4]) * time.Second,
597 Sends: ns[5],
598 Receives: ns[6],
599 BadTransactionIDs: ns[7],
600 CumulativeActiveRequests: ns[8],
601 CumulativeBacklog: ns[9],
602 MaximumRPCSlotsUsed: ns[10],
603 CumulativeSendingQueue: ns[11],
604 CumulativePendingQueue: ns[12],
605 }, nil
606}