blob: f7a828bb1da760336984085a5ed9f898c4dccc8e [file] [log] [blame]
khenaidoo59ce9dd2019-11-11 13:05:32 -05001// Copyright 2018 The Prometheus Authors
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14package procfs
15
16// While implementing parsing of /proc/[pid]/mountstats, this blog was used
17// heavily as a reference:
18// https://utcc.utoronto.ca/~cks/space/blog/linux/NFSMountstatsIndex
19//
20// Special thanks to Chris Siebenmann for all of his posts explaining the
21// various statistics available for NFS.
22
23import (
24 "bufio"
25 "fmt"
26 "io"
27 "strconv"
28 "strings"
29 "time"
30)
31
32// Constants shared between multiple functions.
33const (
34 deviceEntryLen = 8
35
36 fieldBytesLen = 8
37 fieldEventsLen = 27
38
39 statVersion10 = "1.0"
40 statVersion11 = "1.1"
41
42 fieldTransport10TCPLen = 10
43 fieldTransport10UDPLen = 7
44
45 fieldTransport11TCPLen = 13
46 fieldTransport11UDPLen = 10
47)
48
49// A Mount is a device mount parsed from /proc/[pid]/mountstats.
50type Mount struct {
51 // Name of the device.
52 Device string
53 // The mount point of the device.
54 Mount string
55 // The filesystem type used by the device.
56 Type string
57 // If available additional statistics related to this Mount.
58 // Use a type assertion to determine if additional statistics are available.
59 Stats MountStats
60}
61
62// A MountStats is a type which contains detailed statistics for a specific
63// type of Mount.
64type MountStats interface {
65 mountStats()
66}
67
68// A MountStatsNFS is a MountStats implementation for NFSv3 and v4 mounts.
69type MountStatsNFS struct {
70 // The version of statistics provided.
71 StatVersion string
72 // The mount options of the NFS mount.
73 Opts map[string]string
74 // The age of the NFS mount.
75 Age time.Duration
76 // Statistics related to byte counters for various operations.
77 Bytes NFSBytesStats
78 // Statistics related to various NFS event occurrences.
79 Events NFSEventsStats
80 // Statistics broken down by filesystem operation.
81 Operations []NFSOperationStats
82 // Statistics about the NFS RPC transport.
83 Transport NFSTransportStats
84}
85
86// mountStats implements MountStats.
87func (m MountStatsNFS) mountStats() {}
88
89// A NFSBytesStats contains statistics about the number of bytes read and written
90// by an NFS client to and from an NFS server.
91type NFSBytesStats struct {
92 // Number of bytes read using the read() syscall.
93 Read uint64
94 // Number of bytes written using the write() syscall.
95 Write uint64
96 // Number of bytes read using the read() syscall in O_DIRECT mode.
97 DirectRead uint64
98 // Number of bytes written using the write() syscall in O_DIRECT mode.
99 DirectWrite uint64
100 // Number of bytes read from the NFS server, in total.
101 ReadTotal uint64
102 // Number of bytes written to the NFS server, in total.
103 WriteTotal uint64
104 // Number of pages read directly via mmap()'d files.
105 ReadPages uint64
106 // Number of pages written directly via mmap()'d files.
107 WritePages uint64
108}
109
110// A NFSEventsStats contains statistics about NFS event occurrences.
111type NFSEventsStats struct {
112 // Number of times cached inode attributes are re-validated from the server.
113 InodeRevalidate uint64
114 // Number of times cached dentry nodes are re-validated from the server.
115 DnodeRevalidate uint64
116 // Number of times an inode cache is cleared.
117 DataInvalidate uint64
118 // Number of times cached inode attributes are invalidated.
119 AttributeInvalidate uint64
120 // Number of times files or directories have been open()'d.
121 VFSOpen uint64
122 // Number of times a directory lookup has occurred.
123 VFSLookup uint64
124 // Number of times permissions have been checked.
125 VFSAccess uint64
126 // Number of updates (and potential writes) to pages.
127 VFSUpdatePage uint64
128 // Number of pages read directly via mmap()'d files.
129 VFSReadPage uint64
130 // Number of times a group of pages have been read.
131 VFSReadPages uint64
132 // Number of pages written directly via mmap()'d files.
133 VFSWritePage uint64
134 // Number of times a group of pages have been written.
135 VFSWritePages uint64
136 // Number of times directory entries have been read with getdents().
137 VFSGetdents uint64
138 // Number of times attributes have been set on inodes.
139 VFSSetattr uint64
140 // Number of pending writes that have been forcefully flushed to the server.
141 VFSFlush uint64
142 // Number of times fsync() has been called on directories and files.
143 VFSFsync uint64
144 // Number of times locking has been attempted on a file.
145 VFSLock uint64
146 // Number of times files have been closed and released.
147 VFSFileRelease uint64
148 // Unknown. Possibly unused.
149 CongestionWait uint64
150 // Number of times files have been truncated.
151 Truncation uint64
152 // Number of times a file has been grown due to writes beyond its existing end.
153 WriteExtension uint64
154 // Number of times a file was removed while still open by another process.
155 SillyRename uint64
156 // Number of times the NFS server gave less data than expected while reading.
157 ShortRead uint64
158 // Number of times the NFS server wrote less data than expected while writing.
159 ShortWrite uint64
160 // Number of times the NFS server indicated EJUKEBOX; retrieving data from
161 // offline storage.
162 JukeboxDelay uint64
163 // Number of NFS v4.1+ pNFS reads.
164 PNFSRead uint64
165 // Number of NFS v4.1+ pNFS writes.
166 PNFSWrite uint64
167}
168
169// A NFSOperationStats contains statistics for a single operation.
170type NFSOperationStats struct {
171 // The name of the operation.
172 Operation string
173 // Number of requests performed for this operation.
174 Requests uint64
175 // Number of times an actual RPC request has been transmitted for this operation.
176 Transmissions uint64
177 // Number of times a request has had a major timeout.
178 MajorTimeouts uint64
179 // Number of bytes sent for this operation, including RPC headers and payload.
180 BytesSent uint64
181 // Number of bytes received for this operation, including RPC headers and payload.
182 BytesReceived uint64
183 // Duration all requests spent queued for transmission before they were sent.
184 CumulativeQueueMilliseconds uint64
185 // Duration it took to get a reply back after the request was transmitted.
186 CumulativeTotalResponseMilliseconds uint64
187 // Duration from when a request was enqueued to when it was completely handled.
188 CumulativeTotalRequestMilliseconds uint64
khenaidoo26721882021-08-11 17:42:52 -0400189 // The count of operations that complete with tk_status < 0. These statuses usually indicate error conditions.
190 Errors uint64
khenaidoo59ce9dd2019-11-11 13:05:32 -0500191}
192
193// A NFSTransportStats contains statistics for the NFS mount RPC requests and
194// responses.
195type NFSTransportStats struct {
196 // The transport protocol used for the NFS mount.
197 Protocol string
198 // The local port used for the NFS mount.
199 Port uint64
200 // Number of times the client has had to establish a connection from scratch
201 // to the NFS server.
202 Bind uint64
203 // Number of times the client has made a TCP connection to the NFS server.
204 Connect uint64
205 // Duration (in jiffies, a kernel internal unit of time) the NFS mount has
206 // spent waiting for connections to the server to be established.
207 ConnectIdleTime uint64
208 // Duration since the NFS mount last saw any RPC traffic.
209 IdleTimeSeconds uint64
210 // Number of RPC requests for this mount sent to the NFS server.
211 Sends uint64
212 // Number of RPC responses for this mount received from the NFS server.
213 Receives uint64
214 // Number of times the NFS server sent a response with a transaction ID
215 // unknown to this client.
216 BadTransactionIDs uint64
217 // A running counter, incremented on each request as the current difference
218 // ebetween sends and receives.
219 CumulativeActiveRequests uint64
220 // A running counter, incremented on each request by the current backlog
221 // queue size.
222 CumulativeBacklog uint64
223
224 // Stats below only available with stat version 1.1.
225
226 // Maximum number of simultaneously active RPC requests ever used.
227 MaximumRPCSlotsUsed uint64
228 // A running counter, incremented on each request as the current size of the
229 // sending queue.
230 CumulativeSendingQueue uint64
231 // A running counter, incremented on each request as the current size of the
232 // pending queue.
233 CumulativePendingQueue uint64
234}
235
236// parseMountStats parses a /proc/[pid]/mountstats file and returns a slice
237// of Mount structures containing detailed information about each mount.
238// If available, statistics for each mount are parsed as well.
239func parseMountStats(r io.Reader) ([]*Mount, error) {
240 const (
241 device = "device"
242 statVersionPrefix = "statvers="
243
244 nfs3Type = "nfs"
245 nfs4Type = "nfs4"
246 )
247
248 var mounts []*Mount
249
250 s := bufio.NewScanner(r)
251 for s.Scan() {
252 // Only look for device entries in this function
253 ss := strings.Fields(string(s.Bytes()))
254 if len(ss) == 0 || ss[0] != device {
255 continue
256 }
257
258 m, err := parseMount(ss)
259 if err != nil {
260 return nil, err
261 }
262
263 // Does this mount also possess statistics information?
264 if len(ss) > deviceEntryLen {
265 // Only NFSv3 and v4 are supported for parsing statistics
266 if m.Type != nfs3Type && m.Type != nfs4Type {
267 return nil, fmt.Errorf("cannot parse MountStats for fstype %q", m.Type)
268 }
269
270 statVersion := strings.TrimPrefix(ss[8], statVersionPrefix)
271
272 stats, err := parseMountStatsNFS(s, statVersion)
273 if err != nil {
274 return nil, err
275 }
276
277 m.Stats = stats
278 }
279
280 mounts = append(mounts, m)
281 }
282
283 return mounts, s.Err()
284}
285
286// parseMount parses an entry in /proc/[pid]/mountstats in the format:
287// device [device] mounted on [mount] with fstype [type]
288func parseMount(ss []string) (*Mount, error) {
289 if len(ss) < deviceEntryLen {
290 return nil, fmt.Errorf("invalid device entry: %v", ss)
291 }
292
293 // Check for specific words appearing at specific indices to ensure
294 // the format is consistent with what we expect
295 format := []struct {
296 i int
297 s string
298 }{
299 {i: 0, s: "device"},
300 {i: 2, s: "mounted"},
301 {i: 3, s: "on"},
302 {i: 5, s: "with"},
303 {i: 6, s: "fstype"},
304 }
305
306 for _, f := range format {
307 if ss[f.i] != f.s {
308 return nil, fmt.Errorf("invalid device entry: %v", ss)
309 }
310 }
311
312 return &Mount{
313 Device: ss[1],
314 Mount: ss[4],
315 Type: ss[7],
316 }, nil
317}
318
319// parseMountStatsNFS parses a MountStatsNFS by scanning additional information
320// related to NFS statistics.
321func parseMountStatsNFS(s *bufio.Scanner, statVersion string) (*MountStatsNFS, error) {
322 // Field indicators for parsing specific types of data
323 const (
324 fieldOpts = "opts:"
325 fieldAge = "age:"
326 fieldBytes = "bytes:"
327 fieldEvents = "events:"
328 fieldPerOpStats = "per-op"
329 fieldTransport = "xprt:"
330 )
331
332 stats := &MountStatsNFS{
333 StatVersion: statVersion,
334 }
335
336 for s.Scan() {
337 ss := strings.Fields(string(s.Bytes()))
338 if len(ss) == 0 {
339 break
340 }
khenaidoo59ce9dd2019-11-11 13:05:32 -0500341
342 switch ss[0] {
343 case fieldOpts:
khenaidoo26721882021-08-11 17:42:52 -0400344 if len(ss) < 2 {
345 return nil, fmt.Errorf("not enough information for NFS stats: %v", ss)
346 }
khenaidoo59ce9dd2019-11-11 13:05:32 -0500347 if stats.Opts == nil {
348 stats.Opts = map[string]string{}
349 }
350 for _, opt := range strings.Split(ss[1], ",") {
351 split := strings.Split(opt, "=")
352 if len(split) == 2 {
353 stats.Opts[split[0]] = split[1]
354 } else {
355 stats.Opts[opt] = ""
356 }
357 }
358 case fieldAge:
khenaidoo26721882021-08-11 17:42:52 -0400359 if len(ss) < 2 {
360 return nil, fmt.Errorf("not enough information for NFS stats: %v", ss)
361 }
khenaidoo59ce9dd2019-11-11 13:05:32 -0500362 // Age integer is in seconds
363 d, err := time.ParseDuration(ss[1] + "s")
364 if err != nil {
365 return nil, err
366 }
367
368 stats.Age = d
369 case fieldBytes:
khenaidoo26721882021-08-11 17:42:52 -0400370 if len(ss) < 2 {
371 return nil, fmt.Errorf("not enough information for NFS stats: %v", ss)
372 }
khenaidoo59ce9dd2019-11-11 13:05:32 -0500373 bstats, err := parseNFSBytesStats(ss[1:])
374 if err != nil {
375 return nil, err
376 }
377
378 stats.Bytes = *bstats
379 case fieldEvents:
khenaidoo26721882021-08-11 17:42:52 -0400380 if len(ss) < 2 {
381 return nil, fmt.Errorf("not enough information for NFS stats: %v", ss)
382 }
khenaidoo59ce9dd2019-11-11 13:05:32 -0500383 estats, err := parseNFSEventsStats(ss[1:])
384 if err != nil {
385 return nil, err
386 }
387
388 stats.Events = *estats
389 case fieldTransport:
390 if len(ss) < 3 {
391 return nil, fmt.Errorf("not enough information for NFS transport stats: %v", ss)
392 }
393
394 tstats, err := parseNFSTransportStats(ss[1:], statVersion)
395 if err != nil {
396 return nil, err
397 }
398
399 stats.Transport = *tstats
400 }
401
402 // When encountering "per-operation statistics", we must break this
403 // loop and parse them separately to ensure we can terminate parsing
404 // before reaching another device entry; hence why this 'if' statement
405 // is not just another switch case
406 if ss[0] == fieldPerOpStats {
407 break
408 }
409 }
410
411 if err := s.Err(); err != nil {
412 return nil, err
413 }
414
415 // NFS per-operation stats appear last before the next device entry
416 perOpStats, err := parseNFSOperationStats(s)
417 if err != nil {
418 return nil, err
419 }
420
421 stats.Operations = perOpStats
422
423 return stats, nil
424}
425
426// parseNFSBytesStats parses a NFSBytesStats line using an input set of
427// integer fields.
428func parseNFSBytesStats(ss []string) (*NFSBytesStats, error) {
429 if len(ss) != fieldBytesLen {
430 return nil, fmt.Errorf("invalid NFS bytes stats: %v", ss)
431 }
432
433 ns := make([]uint64, 0, fieldBytesLen)
434 for _, s := range ss {
435 n, err := strconv.ParseUint(s, 10, 64)
436 if err != nil {
437 return nil, err
438 }
439
440 ns = append(ns, n)
441 }
442
443 return &NFSBytesStats{
444 Read: ns[0],
445 Write: ns[1],
446 DirectRead: ns[2],
447 DirectWrite: ns[3],
448 ReadTotal: ns[4],
449 WriteTotal: ns[5],
450 ReadPages: ns[6],
451 WritePages: ns[7],
452 }, nil
453}
454
455// parseNFSEventsStats parses a NFSEventsStats line using an input set of
456// integer fields.
457func parseNFSEventsStats(ss []string) (*NFSEventsStats, error) {
458 if len(ss) != fieldEventsLen {
459 return nil, fmt.Errorf("invalid NFS events stats: %v", ss)
460 }
461
462 ns := make([]uint64, 0, fieldEventsLen)
463 for _, s := range ss {
464 n, err := strconv.ParseUint(s, 10, 64)
465 if err != nil {
466 return nil, err
467 }
468
469 ns = append(ns, n)
470 }
471
472 return &NFSEventsStats{
473 InodeRevalidate: ns[0],
474 DnodeRevalidate: ns[1],
475 DataInvalidate: ns[2],
476 AttributeInvalidate: ns[3],
477 VFSOpen: ns[4],
478 VFSLookup: ns[5],
479 VFSAccess: ns[6],
480 VFSUpdatePage: ns[7],
481 VFSReadPage: ns[8],
482 VFSReadPages: ns[9],
483 VFSWritePage: ns[10],
484 VFSWritePages: ns[11],
485 VFSGetdents: ns[12],
486 VFSSetattr: ns[13],
487 VFSFlush: ns[14],
488 VFSFsync: ns[15],
489 VFSLock: ns[16],
490 VFSFileRelease: ns[17],
491 CongestionWait: ns[18],
492 Truncation: ns[19],
493 WriteExtension: ns[20],
494 SillyRename: ns[21],
495 ShortRead: ns[22],
496 ShortWrite: ns[23],
497 JukeboxDelay: ns[24],
498 PNFSRead: ns[25],
499 PNFSWrite: ns[26],
500 }, nil
501}
502
503// parseNFSOperationStats parses a slice of NFSOperationStats by scanning
504// additional information about per-operation statistics until an empty
505// line is reached.
506func parseNFSOperationStats(s *bufio.Scanner) ([]NFSOperationStats, error) {
507 const (
khenaidoo26721882021-08-11 17:42:52 -0400508 // Minimum number of expected fields in each per-operation statistics set
509 minFields = 9
khenaidoo59ce9dd2019-11-11 13:05:32 -0500510 )
511
512 var ops []NFSOperationStats
513
514 for s.Scan() {
515 ss := strings.Fields(string(s.Bytes()))
516 if len(ss) == 0 {
517 // Must break when reading a blank line after per-operation stats to
518 // enable top-level function to parse the next device entry
519 break
520 }
521
khenaidoo26721882021-08-11 17:42:52 -0400522 if len(ss) < minFields {
khenaidoo59ce9dd2019-11-11 13:05:32 -0500523 return nil, fmt.Errorf("invalid NFS per-operations stats: %v", ss)
524 }
525
526 // Skip string operation name for integers
khenaidoo26721882021-08-11 17:42:52 -0400527 ns := make([]uint64, 0, minFields-1)
khenaidoo59ce9dd2019-11-11 13:05:32 -0500528 for _, st := range ss[1:] {
529 n, err := strconv.ParseUint(st, 10, 64)
530 if err != nil {
531 return nil, err
532 }
533
534 ns = append(ns, n)
535 }
536
khenaidoo26721882021-08-11 17:42:52 -0400537 opStats := NFSOperationStats{
khenaidoo59ce9dd2019-11-11 13:05:32 -0500538 Operation: strings.TrimSuffix(ss[0], ":"),
539 Requests: ns[0],
540 Transmissions: ns[1],
541 MajorTimeouts: ns[2],
542 BytesSent: ns[3],
543 BytesReceived: ns[4],
544 CumulativeQueueMilliseconds: ns[5],
545 CumulativeTotalResponseMilliseconds: ns[6],
546 CumulativeTotalRequestMilliseconds: ns[7],
khenaidoo26721882021-08-11 17:42:52 -0400547 }
548
549 if len(ns) > 8 {
550 opStats.Errors = ns[8]
551 }
552
553 ops = append(ops, opStats)
khenaidoo59ce9dd2019-11-11 13:05:32 -0500554 }
555
556 return ops, s.Err()
557}
558
559// parseNFSTransportStats parses a NFSTransportStats line using an input set of
560// integer fields matched to a specific stats version.
561func parseNFSTransportStats(ss []string, statVersion string) (*NFSTransportStats, error) {
562 // Extract the protocol field. It is the only string value in the line
563 protocol := ss[0]
564 ss = ss[1:]
565
566 switch statVersion {
567 case statVersion10:
568 var expectedLength int
569 if protocol == "tcp" {
570 expectedLength = fieldTransport10TCPLen
571 } else if protocol == "udp" {
572 expectedLength = fieldTransport10UDPLen
573 } else {
574 return nil, fmt.Errorf("invalid NFS protocol \"%s\" in stats 1.0 statement: %v", protocol, ss)
575 }
576 if len(ss) != expectedLength {
577 return nil, fmt.Errorf("invalid NFS transport stats 1.0 statement: %v", ss)
578 }
579 case statVersion11:
580 var expectedLength int
581 if protocol == "tcp" {
582 expectedLength = fieldTransport11TCPLen
583 } else if protocol == "udp" {
584 expectedLength = fieldTransport11UDPLen
585 } else {
586 return nil, fmt.Errorf("invalid NFS protocol \"%s\" in stats 1.1 statement: %v", protocol, ss)
587 }
588 if len(ss) != expectedLength {
589 return nil, fmt.Errorf("invalid NFS transport stats 1.1 statement: %v", ss)
590 }
591 default:
592 return nil, fmt.Errorf("unrecognized NFS transport stats version: %q", statVersion)
593 }
594
595 // Allocate enough for v1.1 stats since zero value for v1.1 stats will be okay
596 // in a v1.0 response. Since the stat length is bigger for TCP stats, we use
597 // the TCP length here.
598 //
599 // Note: slice length must be set to length of v1.1 stats to avoid a panic when
600 // only v1.0 stats are present.
601 // See: https://github.com/prometheus/node_exporter/issues/571.
602 ns := make([]uint64, fieldTransport11TCPLen)
603 for i, s := range ss {
604 n, err := strconv.ParseUint(s, 10, 64)
605 if err != nil {
606 return nil, err
607 }
608
609 ns[i] = n
610 }
611
612 // The fields differ depending on the transport protocol (TCP or UDP)
613 // From https://utcc.utoronto.ca/%7Ecks/space/blog/linux/NFSMountstatsXprt
614 //
615 // For the udp RPC transport there is no connection count, connect idle time,
616 // or idle time (fields #3, #4, and #5); all other fields are the same. So
617 // we set them to 0 here.
618 if protocol == "udp" {
619 ns = append(ns[:2], append(make([]uint64, 3), ns[2:]...)...)
620 }
621
622 return &NFSTransportStats{
623 Protocol: protocol,
624 Port: ns[0],
625 Bind: ns[1],
626 Connect: ns[2],
627 ConnectIdleTime: ns[3],
628 IdleTimeSeconds: ns[4],
629 Sends: ns[5],
630 Receives: ns[6],
631 BadTransactionIDs: ns[7],
632 CumulativeActiveRequests: ns[8],
633 CumulativeBacklog: ns[9],
634 MaximumRPCSlotsUsed: ns[10],
635 CumulativeSendingQueue: ns[11],
636 CumulativePendingQueue: ns[12],
637 }, nil
638}