blob: e50e4cb276c8608783ab77efb3a6519055c0098a [file] [log] [blame]
Matteo Scandoloa4285862020-12-01 18:10:10 -08001// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Linux system calls.
6// This file is compiled as ordinary Go code,
7// but it is also input to mksyscall,
8// which parses the //sys lines and generates system call stubs.
9// Note that sometimes we use a lowercase //sys name and
10// wrap it in our own nicer implementation.
11
12package unix
13
14import (
15 "encoding/binary"
16 "runtime"
17 "syscall"
18 "unsafe"
19)
20
21/*
22 * Wrapped
23 */
24
25func Access(path string, mode uint32) (err error) {
26 return Faccessat(AT_FDCWD, path, mode, 0)
27}
28
29func Chmod(path string, mode uint32) (err error) {
30 return Fchmodat(AT_FDCWD, path, mode, 0)
31}
32
33func Chown(path string, uid int, gid int) (err error) {
34 return Fchownat(AT_FDCWD, path, uid, gid, 0)
35}
36
37func Creat(path string, mode uint32) (fd int, err error) {
38 return Open(path, O_CREAT|O_WRONLY|O_TRUNC, mode)
39}
40
41//sys FanotifyInit(flags uint, event_f_flags uint) (fd int, err error)
42//sys fanotifyMark(fd int, flags uint, mask uint64, dirFd int, pathname *byte) (err error)
43
44func FanotifyMark(fd int, flags uint, mask uint64, dirFd int, pathname string) (err error) {
45 if pathname == "" {
46 return fanotifyMark(fd, flags, mask, dirFd, nil)
47 }
48 p, err := BytePtrFromString(pathname)
49 if err != nil {
50 return err
51 }
52 return fanotifyMark(fd, flags, mask, dirFd, p)
53}
54
55//sys fchmodat(dirfd int, path string, mode uint32) (err error)
56
57func Fchmodat(dirfd int, path string, mode uint32, flags int) (err error) {
58 // Linux fchmodat doesn't support the flags parameter. Mimick glibc's behavior
59 // and check the flags. Otherwise the mode would be applied to the symlink
60 // destination which is not what the user expects.
61 if flags&^AT_SYMLINK_NOFOLLOW != 0 {
62 return EINVAL
63 } else if flags&AT_SYMLINK_NOFOLLOW != 0 {
64 return EOPNOTSUPP
65 }
66 return fchmodat(dirfd, path, mode)
67}
68
69//sys ioctl(fd int, req uint, arg uintptr) (err error)
70
71// ioctl itself should not be exposed directly, but additional get/set
72// functions for specific types are permissible.
73
74// IoctlRetInt performs an ioctl operation specified by req on a device
75// associated with opened file descriptor fd, and returns a non-negative
76// integer that is returned by the ioctl syscall.
77func IoctlRetInt(fd int, req uint) (int, error) {
78 ret, _, err := Syscall(SYS_IOCTL, uintptr(fd), uintptr(req), 0)
79 if err != 0 {
80 return 0, err
81 }
82 return int(ret), nil
83}
84
85// IoctlSetPointerInt performs an ioctl operation which sets an
86// integer value on fd, using the specified request number. The ioctl
87// argument is called with a pointer to the integer value, rather than
88// passing the integer value directly.
89func IoctlSetPointerInt(fd int, req uint, value int) error {
90 v := int32(value)
91 return ioctl(fd, req, uintptr(unsafe.Pointer(&v)))
92}
93
94func IoctlSetRTCTime(fd int, value *RTCTime) error {
95 err := ioctl(fd, RTC_SET_TIME, uintptr(unsafe.Pointer(value)))
96 runtime.KeepAlive(value)
97 return err
98}
99
100func IoctlSetRTCWkAlrm(fd int, value *RTCWkAlrm) error {
101 err := ioctl(fd, RTC_WKALM_SET, uintptr(unsafe.Pointer(value)))
102 runtime.KeepAlive(value)
103 return err
104}
105
106func IoctlGetUint32(fd int, req uint) (uint32, error) {
107 var value uint32
108 err := ioctl(fd, req, uintptr(unsafe.Pointer(&value)))
109 return value, err
110}
111
112func IoctlGetRTCTime(fd int) (*RTCTime, error) {
113 var value RTCTime
114 err := ioctl(fd, RTC_RD_TIME, uintptr(unsafe.Pointer(&value)))
115 return &value, err
116}
117
118func IoctlGetRTCWkAlrm(fd int) (*RTCWkAlrm, error) {
119 var value RTCWkAlrm
120 err := ioctl(fd, RTC_WKALM_RD, uintptr(unsafe.Pointer(&value)))
121 return &value, err
122}
123
124//sys Linkat(olddirfd int, oldpath string, newdirfd int, newpath string, flags int) (err error)
125
126func Link(oldpath string, newpath string) (err error) {
127 return Linkat(AT_FDCWD, oldpath, AT_FDCWD, newpath, 0)
128}
129
130func Mkdir(path string, mode uint32) (err error) {
131 return Mkdirat(AT_FDCWD, path, mode)
132}
133
134func Mknod(path string, mode uint32, dev int) (err error) {
135 return Mknodat(AT_FDCWD, path, mode, dev)
136}
137
138func Open(path string, mode int, perm uint32) (fd int, err error) {
139 return openat(AT_FDCWD, path, mode|O_LARGEFILE, perm)
140}
141
142//sys openat(dirfd int, path string, flags int, mode uint32) (fd int, err error)
143
144func Openat(dirfd int, path string, flags int, mode uint32) (fd int, err error) {
145 return openat(dirfd, path, flags|O_LARGEFILE, mode)
146}
147
148//sys ppoll(fds *PollFd, nfds int, timeout *Timespec, sigmask *Sigset_t) (n int, err error)
149
150func Ppoll(fds []PollFd, timeout *Timespec, sigmask *Sigset_t) (n int, err error) {
151 if len(fds) == 0 {
152 return ppoll(nil, 0, timeout, sigmask)
153 }
154 return ppoll(&fds[0], len(fds), timeout, sigmask)
155}
156
157//sys Readlinkat(dirfd int, path string, buf []byte) (n int, err error)
158
159func Readlink(path string, buf []byte) (n int, err error) {
160 return Readlinkat(AT_FDCWD, path, buf)
161}
162
163func Rename(oldpath string, newpath string) (err error) {
164 return Renameat(AT_FDCWD, oldpath, AT_FDCWD, newpath)
165}
166
167func Rmdir(path string) error {
168 return Unlinkat(AT_FDCWD, path, AT_REMOVEDIR)
169}
170
171//sys Symlinkat(oldpath string, newdirfd int, newpath string) (err error)
172
173func Symlink(oldpath string, newpath string) (err error) {
174 return Symlinkat(oldpath, AT_FDCWD, newpath)
175}
176
177func Unlink(path string) error {
178 return Unlinkat(AT_FDCWD, path, 0)
179}
180
181//sys Unlinkat(dirfd int, path string, flags int) (err error)
182
183func Utimes(path string, tv []Timeval) error {
184 if tv == nil {
185 err := utimensat(AT_FDCWD, path, nil, 0)
186 if err != ENOSYS {
187 return err
188 }
189 return utimes(path, nil)
190 }
191 if len(tv) != 2 {
192 return EINVAL
193 }
194 var ts [2]Timespec
195 ts[0] = NsecToTimespec(TimevalToNsec(tv[0]))
196 ts[1] = NsecToTimespec(TimevalToNsec(tv[1]))
197 err := utimensat(AT_FDCWD, path, (*[2]Timespec)(unsafe.Pointer(&ts[0])), 0)
198 if err != ENOSYS {
199 return err
200 }
201 return utimes(path, (*[2]Timeval)(unsafe.Pointer(&tv[0])))
202}
203
204//sys utimensat(dirfd int, path string, times *[2]Timespec, flags int) (err error)
205
206func UtimesNano(path string, ts []Timespec) error {
207 if ts == nil {
208 err := utimensat(AT_FDCWD, path, nil, 0)
209 if err != ENOSYS {
210 return err
211 }
212 return utimes(path, nil)
213 }
214 if len(ts) != 2 {
215 return EINVAL
216 }
217 err := utimensat(AT_FDCWD, path, (*[2]Timespec)(unsafe.Pointer(&ts[0])), 0)
218 if err != ENOSYS {
219 return err
220 }
221 // If the utimensat syscall isn't available (utimensat was added to Linux
222 // in 2.6.22, Released, 8 July 2007) then fall back to utimes
223 var tv [2]Timeval
224 for i := 0; i < 2; i++ {
225 tv[i] = NsecToTimeval(TimespecToNsec(ts[i]))
226 }
227 return utimes(path, (*[2]Timeval)(unsafe.Pointer(&tv[0])))
228}
229
230func UtimesNanoAt(dirfd int, path string, ts []Timespec, flags int) error {
231 if ts == nil {
232 return utimensat(dirfd, path, nil, flags)
233 }
234 if len(ts) != 2 {
235 return EINVAL
236 }
237 return utimensat(dirfd, path, (*[2]Timespec)(unsafe.Pointer(&ts[0])), flags)
238}
239
240func Futimesat(dirfd int, path string, tv []Timeval) error {
241 if tv == nil {
242 return futimesat(dirfd, path, nil)
243 }
244 if len(tv) != 2 {
245 return EINVAL
246 }
247 return futimesat(dirfd, path, (*[2]Timeval)(unsafe.Pointer(&tv[0])))
248}
249
250func Futimes(fd int, tv []Timeval) (err error) {
251 // Believe it or not, this is the best we can do on Linux
252 // (and is what glibc does).
253 return Utimes("/proc/self/fd/"+itoa(fd), tv)
254}
255
256const ImplementsGetwd = true
257
258//sys Getcwd(buf []byte) (n int, err error)
259
260func Getwd() (wd string, err error) {
261 var buf [PathMax]byte
262 n, err := Getcwd(buf[0:])
263 if err != nil {
264 return "", err
265 }
266 // Getcwd returns the number of bytes written to buf, including the NUL.
267 if n < 1 || n > len(buf) || buf[n-1] != 0 {
268 return "", EINVAL
269 }
270 return string(buf[0 : n-1]), nil
271}
272
273func Getgroups() (gids []int, err error) {
274 n, err := getgroups(0, nil)
275 if err != nil {
276 return nil, err
277 }
278 if n == 0 {
279 return nil, nil
280 }
281
282 // Sanity check group count. Max is 1<<16 on Linux.
283 if n < 0 || n > 1<<20 {
284 return nil, EINVAL
285 }
286
287 a := make([]_Gid_t, n)
288 n, err = getgroups(n, &a[0])
289 if err != nil {
290 return nil, err
291 }
292 gids = make([]int, n)
293 for i, v := range a[0:n] {
294 gids[i] = int(v)
295 }
296 return
297}
298
299func Setgroups(gids []int) (err error) {
300 if len(gids) == 0 {
301 return setgroups(0, nil)
302 }
303
304 a := make([]_Gid_t, len(gids))
305 for i, v := range gids {
306 a[i] = _Gid_t(v)
307 }
308 return setgroups(len(a), &a[0])
309}
310
311type WaitStatus uint32
312
313// Wait status is 7 bits at bottom, either 0 (exited),
314// 0x7F (stopped), or a signal number that caused an exit.
315// The 0x80 bit is whether there was a core dump.
316// An extra number (exit code, signal causing a stop)
317// is in the high bits. At least that's the idea.
318// There are various irregularities. For example, the
319// "continued" status is 0xFFFF, distinguishing itself
320// from stopped via the core dump bit.
321
322const (
323 mask = 0x7F
324 core = 0x80
325 exited = 0x00
326 stopped = 0x7F
327 shift = 8
328)
329
330func (w WaitStatus) Exited() bool { return w&mask == exited }
331
332func (w WaitStatus) Signaled() bool { return w&mask != stopped && w&mask != exited }
333
334func (w WaitStatus) Stopped() bool { return w&0xFF == stopped }
335
336func (w WaitStatus) Continued() bool { return w == 0xFFFF }
337
338func (w WaitStatus) CoreDump() bool { return w.Signaled() && w&core != 0 }
339
340func (w WaitStatus) ExitStatus() int {
341 if !w.Exited() {
342 return -1
343 }
344 return int(w>>shift) & 0xFF
345}
346
347func (w WaitStatus) Signal() syscall.Signal {
348 if !w.Signaled() {
349 return -1
350 }
351 return syscall.Signal(w & mask)
352}
353
354func (w WaitStatus) StopSignal() syscall.Signal {
355 if !w.Stopped() {
356 return -1
357 }
358 return syscall.Signal(w>>shift) & 0xFF
359}
360
361func (w WaitStatus) TrapCause() int {
362 if w.StopSignal() != SIGTRAP {
363 return -1
364 }
365 return int(w>>shift) >> 8
366}
367
368//sys wait4(pid int, wstatus *_C_int, options int, rusage *Rusage) (wpid int, err error)
369
370func Wait4(pid int, wstatus *WaitStatus, options int, rusage *Rusage) (wpid int, err error) {
371 var status _C_int
372 wpid, err = wait4(pid, &status, options, rusage)
373 if wstatus != nil {
374 *wstatus = WaitStatus(status)
375 }
376 return
377}
378
379func Mkfifo(path string, mode uint32) error {
380 return Mknod(path, mode|S_IFIFO, 0)
381}
382
383func Mkfifoat(dirfd int, path string, mode uint32) error {
384 return Mknodat(dirfd, path, mode|S_IFIFO, 0)
385}
386
387func (sa *SockaddrInet4) sockaddr() (unsafe.Pointer, _Socklen, error) {
388 if sa.Port < 0 || sa.Port > 0xFFFF {
389 return nil, 0, EINVAL
390 }
391 sa.raw.Family = AF_INET
392 p := (*[2]byte)(unsafe.Pointer(&sa.raw.Port))
393 p[0] = byte(sa.Port >> 8)
394 p[1] = byte(sa.Port)
395 for i := 0; i < len(sa.Addr); i++ {
396 sa.raw.Addr[i] = sa.Addr[i]
397 }
398 return unsafe.Pointer(&sa.raw), SizeofSockaddrInet4, nil
399}
400
401func (sa *SockaddrInet6) sockaddr() (unsafe.Pointer, _Socklen, error) {
402 if sa.Port < 0 || sa.Port > 0xFFFF {
403 return nil, 0, EINVAL
404 }
405 sa.raw.Family = AF_INET6
406 p := (*[2]byte)(unsafe.Pointer(&sa.raw.Port))
407 p[0] = byte(sa.Port >> 8)
408 p[1] = byte(sa.Port)
409 sa.raw.Scope_id = sa.ZoneId
410 for i := 0; i < len(sa.Addr); i++ {
411 sa.raw.Addr[i] = sa.Addr[i]
412 }
413 return unsafe.Pointer(&sa.raw), SizeofSockaddrInet6, nil
414}
415
416func (sa *SockaddrUnix) sockaddr() (unsafe.Pointer, _Socklen, error) {
417 name := sa.Name
418 n := len(name)
419 if n >= len(sa.raw.Path) {
420 return nil, 0, EINVAL
421 }
422 sa.raw.Family = AF_UNIX
423 for i := 0; i < n; i++ {
424 sa.raw.Path[i] = int8(name[i])
425 }
426 // length is family (uint16), name, NUL.
427 sl := _Socklen(2)
428 if n > 0 {
429 sl += _Socklen(n) + 1
430 }
431 if sa.raw.Path[0] == '@' {
432 sa.raw.Path[0] = 0
433 // Don't count trailing NUL for abstract address.
434 sl--
435 }
436
437 return unsafe.Pointer(&sa.raw), sl, nil
438}
439
440// SockaddrLinklayer implements the Sockaddr interface for AF_PACKET type sockets.
441type SockaddrLinklayer struct {
442 Protocol uint16
443 Ifindex int
444 Hatype uint16
445 Pkttype uint8
446 Halen uint8
447 Addr [8]byte
448 raw RawSockaddrLinklayer
449}
450
451func (sa *SockaddrLinklayer) sockaddr() (unsafe.Pointer, _Socklen, error) {
452 if sa.Ifindex < 0 || sa.Ifindex > 0x7fffffff {
453 return nil, 0, EINVAL
454 }
455 sa.raw.Family = AF_PACKET
456 sa.raw.Protocol = sa.Protocol
457 sa.raw.Ifindex = int32(sa.Ifindex)
458 sa.raw.Hatype = sa.Hatype
459 sa.raw.Pkttype = sa.Pkttype
460 sa.raw.Halen = sa.Halen
461 for i := 0; i < len(sa.Addr); i++ {
462 sa.raw.Addr[i] = sa.Addr[i]
463 }
464 return unsafe.Pointer(&sa.raw), SizeofSockaddrLinklayer, nil
465}
466
467// SockaddrNetlink implements the Sockaddr interface for AF_NETLINK type sockets.
468type SockaddrNetlink struct {
469 Family uint16
470 Pad uint16
471 Pid uint32
472 Groups uint32
473 raw RawSockaddrNetlink
474}
475
476func (sa *SockaddrNetlink) sockaddr() (unsafe.Pointer, _Socklen, error) {
477 sa.raw.Family = AF_NETLINK
478 sa.raw.Pad = sa.Pad
479 sa.raw.Pid = sa.Pid
480 sa.raw.Groups = sa.Groups
481 return unsafe.Pointer(&sa.raw), SizeofSockaddrNetlink, nil
482}
483
484// SockaddrHCI implements the Sockaddr interface for AF_BLUETOOTH type sockets
485// using the HCI protocol.
486type SockaddrHCI struct {
487 Dev uint16
488 Channel uint16
489 raw RawSockaddrHCI
490}
491
492func (sa *SockaddrHCI) sockaddr() (unsafe.Pointer, _Socklen, error) {
493 sa.raw.Family = AF_BLUETOOTH
494 sa.raw.Dev = sa.Dev
495 sa.raw.Channel = sa.Channel
496 return unsafe.Pointer(&sa.raw), SizeofSockaddrHCI, nil
497}
498
499// SockaddrL2 implements the Sockaddr interface for AF_BLUETOOTH type sockets
500// using the L2CAP protocol.
501type SockaddrL2 struct {
502 PSM uint16
503 CID uint16
504 Addr [6]uint8
505 AddrType uint8
506 raw RawSockaddrL2
507}
508
509func (sa *SockaddrL2) sockaddr() (unsafe.Pointer, _Socklen, error) {
510 sa.raw.Family = AF_BLUETOOTH
511 psm := (*[2]byte)(unsafe.Pointer(&sa.raw.Psm))
512 psm[0] = byte(sa.PSM)
513 psm[1] = byte(sa.PSM >> 8)
514 for i := 0; i < len(sa.Addr); i++ {
515 sa.raw.Bdaddr[i] = sa.Addr[len(sa.Addr)-1-i]
516 }
517 cid := (*[2]byte)(unsafe.Pointer(&sa.raw.Cid))
518 cid[0] = byte(sa.CID)
519 cid[1] = byte(sa.CID >> 8)
520 sa.raw.Bdaddr_type = sa.AddrType
521 return unsafe.Pointer(&sa.raw), SizeofSockaddrL2, nil
522}
523
524// SockaddrRFCOMM implements the Sockaddr interface for AF_BLUETOOTH type sockets
525// using the RFCOMM protocol.
526//
527// Server example:
528//
529// fd, _ := Socket(AF_BLUETOOTH, SOCK_STREAM, BTPROTO_RFCOMM)
530// _ = unix.Bind(fd, &unix.SockaddrRFCOMM{
531// Channel: 1,
532// Addr: [6]uint8{0, 0, 0, 0, 0, 0}, // BDADDR_ANY or 00:00:00:00:00:00
533// })
534// _ = Listen(fd, 1)
535// nfd, sa, _ := Accept(fd)
536// fmt.Printf("conn addr=%v fd=%d", sa.(*unix.SockaddrRFCOMM).Addr, nfd)
537// Read(nfd, buf)
538//
539// Client example:
540//
541// fd, _ := Socket(AF_BLUETOOTH, SOCK_STREAM, BTPROTO_RFCOMM)
542// _ = Connect(fd, &SockaddrRFCOMM{
543// Channel: 1,
544// Addr: [6]byte{0x11, 0x22, 0x33, 0xaa, 0xbb, 0xcc}, // CC:BB:AA:33:22:11
545// })
546// Write(fd, []byte(`hello`))
547type SockaddrRFCOMM struct {
548 // Addr represents a bluetooth address, byte ordering is little-endian.
549 Addr [6]uint8
550
551 // Channel is a designated bluetooth channel, only 1-30 are available for use.
552 // Since Linux 2.6.7 and further zero value is the first available channel.
553 Channel uint8
554
555 raw RawSockaddrRFCOMM
556}
557
558func (sa *SockaddrRFCOMM) sockaddr() (unsafe.Pointer, _Socklen, error) {
559 sa.raw.Family = AF_BLUETOOTH
560 sa.raw.Channel = sa.Channel
561 sa.raw.Bdaddr = sa.Addr
562 return unsafe.Pointer(&sa.raw), SizeofSockaddrRFCOMM, nil
563}
564
565// SockaddrCAN implements the Sockaddr interface for AF_CAN type sockets.
566// The RxID and TxID fields are used for transport protocol addressing in
567// (CAN_TP16, CAN_TP20, CAN_MCNET, and CAN_ISOTP), they can be left with
568// zero values for CAN_RAW and CAN_BCM sockets as they have no meaning.
569//
570// The SockaddrCAN struct must be bound to the socket file descriptor
571// using Bind before the CAN socket can be used.
572//
573// // Read one raw CAN frame
574// fd, _ := Socket(AF_CAN, SOCK_RAW, CAN_RAW)
575// addr := &SockaddrCAN{Ifindex: index}
576// Bind(fd, addr)
577// frame := make([]byte, 16)
578// Read(fd, frame)
579//
580// The full SocketCAN documentation can be found in the linux kernel
581// archives at: https://www.kernel.org/doc/Documentation/networking/can.txt
582type SockaddrCAN struct {
583 Ifindex int
584 RxID uint32
585 TxID uint32
586 raw RawSockaddrCAN
587}
588
589func (sa *SockaddrCAN) sockaddr() (unsafe.Pointer, _Socklen, error) {
590 if sa.Ifindex < 0 || sa.Ifindex > 0x7fffffff {
591 return nil, 0, EINVAL
592 }
593 sa.raw.Family = AF_CAN
594 sa.raw.Ifindex = int32(sa.Ifindex)
595 rx := (*[4]byte)(unsafe.Pointer(&sa.RxID))
596 for i := 0; i < 4; i++ {
597 sa.raw.Addr[i] = rx[i]
598 }
599 tx := (*[4]byte)(unsafe.Pointer(&sa.TxID))
600 for i := 0; i < 4; i++ {
601 sa.raw.Addr[i+4] = tx[i]
602 }
603 return unsafe.Pointer(&sa.raw), SizeofSockaddrCAN, nil
604}
605
606// SockaddrALG implements the Sockaddr interface for AF_ALG type sockets.
607// SockaddrALG enables userspace access to the Linux kernel's cryptography
608// subsystem. The Type and Name fields specify which type of hash or cipher
609// should be used with a given socket.
610//
611// To create a file descriptor that provides access to a hash or cipher, both
612// Bind and Accept must be used. Once the setup process is complete, input
613// data can be written to the socket, processed by the kernel, and then read
614// back as hash output or ciphertext.
615//
616// Here is an example of using an AF_ALG socket with SHA1 hashing.
617// The initial socket setup process is as follows:
618//
619// // Open a socket to perform SHA1 hashing.
620// fd, _ := unix.Socket(unix.AF_ALG, unix.SOCK_SEQPACKET, 0)
621// addr := &unix.SockaddrALG{Type: "hash", Name: "sha1"}
622// unix.Bind(fd, addr)
623// // Note: unix.Accept does not work at this time; must invoke accept()
624// // manually using unix.Syscall.
625// hashfd, _, _ := unix.Syscall(unix.SYS_ACCEPT, uintptr(fd), 0, 0)
626//
627// Once a file descriptor has been returned from Accept, it may be used to
628// perform SHA1 hashing. The descriptor is not safe for concurrent use, but
629// may be re-used repeatedly with subsequent Write and Read operations.
630//
631// When hashing a small byte slice or string, a single Write and Read may
632// be used:
633//
634// // Assume hashfd is already configured using the setup process.
635// hash := os.NewFile(hashfd, "sha1")
636// // Hash an input string and read the results. Each Write discards
637// // previous hash state. Read always reads the current state.
638// b := make([]byte, 20)
639// for i := 0; i < 2; i++ {
640// io.WriteString(hash, "Hello, world.")
641// hash.Read(b)
642// fmt.Println(hex.EncodeToString(b))
643// }
644// // Output:
645// // 2ae01472317d1935a84797ec1983ae243fc6aa28
646// // 2ae01472317d1935a84797ec1983ae243fc6aa28
647//
648// For hashing larger byte slices, or byte streams such as those read from
649// a file or socket, use Sendto with MSG_MORE to instruct the kernel to update
650// the hash digest instead of creating a new one for a given chunk and finalizing it.
651//
652// // Assume hashfd and addr are already configured using the setup process.
653// hash := os.NewFile(hashfd, "sha1")
654// // Hash the contents of a file.
655// f, _ := os.Open("/tmp/linux-4.10-rc7.tar.xz")
656// b := make([]byte, 4096)
657// for {
658// n, err := f.Read(b)
659// if err == io.EOF {
660// break
661// }
662// unix.Sendto(hashfd, b[:n], unix.MSG_MORE, addr)
663// }
664// hash.Read(b)
665// fmt.Println(hex.EncodeToString(b))
666// // Output: 85cdcad0c06eef66f805ecce353bec9accbeecc5
667//
668// For more information, see: http://www.chronox.de/crypto-API/crypto/userspace-if.html.
669type SockaddrALG struct {
670 Type string
671 Name string
672 Feature uint32
673 Mask uint32
674 raw RawSockaddrALG
675}
676
677func (sa *SockaddrALG) sockaddr() (unsafe.Pointer, _Socklen, error) {
678 // Leave room for NUL byte terminator.
679 if len(sa.Type) > 13 {
680 return nil, 0, EINVAL
681 }
682 if len(sa.Name) > 63 {
683 return nil, 0, EINVAL
684 }
685
686 sa.raw.Family = AF_ALG
687 sa.raw.Feat = sa.Feature
688 sa.raw.Mask = sa.Mask
689
690 typ, err := ByteSliceFromString(sa.Type)
691 if err != nil {
692 return nil, 0, err
693 }
694 name, err := ByteSliceFromString(sa.Name)
695 if err != nil {
696 return nil, 0, err
697 }
698
699 copy(sa.raw.Type[:], typ)
700 copy(sa.raw.Name[:], name)
701
702 return unsafe.Pointer(&sa.raw), SizeofSockaddrALG, nil
703}
704
705// SockaddrVM implements the Sockaddr interface for AF_VSOCK type sockets.
706// SockaddrVM provides access to Linux VM sockets: a mechanism that enables
707// bidirectional communication between a hypervisor and its guest virtual
708// machines.
709type SockaddrVM struct {
710 // CID and Port specify a context ID and port address for a VM socket.
711 // Guests have a unique CID, and hosts may have a well-known CID of:
712 // - VMADDR_CID_HYPERVISOR: refers to the hypervisor process.
713 // - VMADDR_CID_HOST: refers to other processes on the host.
714 CID uint32
715 Port uint32
716 raw RawSockaddrVM
717}
718
719func (sa *SockaddrVM) sockaddr() (unsafe.Pointer, _Socklen, error) {
720 sa.raw.Family = AF_VSOCK
721 sa.raw.Port = sa.Port
722 sa.raw.Cid = sa.CID
723
724 return unsafe.Pointer(&sa.raw), SizeofSockaddrVM, nil
725}
726
727type SockaddrXDP struct {
728 Flags uint16
729 Ifindex uint32
730 QueueID uint32
731 SharedUmemFD uint32
732 raw RawSockaddrXDP
733}
734
735func (sa *SockaddrXDP) sockaddr() (unsafe.Pointer, _Socklen, error) {
736 sa.raw.Family = AF_XDP
737 sa.raw.Flags = sa.Flags
738 sa.raw.Ifindex = sa.Ifindex
739 sa.raw.Queue_id = sa.QueueID
740 sa.raw.Shared_umem_fd = sa.SharedUmemFD
741
742 return unsafe.Pointer(&sa.raw), SizeofSockaddrXDP, nil
743}
744
745// This constant mirrors the #define of PX_PROTO_OE in
746// linux/if_pppox.h. We're defining this by hand here instead of
747// autogenerating through mkerrors.sh because including
748// linux/if_pppox.h causes some declaration conflicts with other
749// includes (linux/if_pppox.h includes linux/in.h, which conflicts
750// with netinet/in.h). Given that we only need a single zero constant
751// out of that file, it's cleaner to just define it by hand here.
752const px_proto_oe = 0
753
754type SockaddrPPPoE struct {
755 SID uint16
756 Remote []byte
757 Dev string
758 raw RawSockaddrPPPoX
759}
760
761func (sa *SockaddrPPPoE) sockaddr() (unsafe.Pointer, _Socklen, error) {
762 if len(sa.Remote) != 6 {
763 return nil, 0, EINVAL
764 }
765 if len(sa.Dev) > IFNAMSIZ-1 {
766 return nil, 0, EINVAL
767 }
768
769 *(*uint16)(unsafe.Pointer(&sa.raw[0])) = AF_PPPOX
770 // This next field is in host-endian byte order. We can't use the
771 // same unsafe pointer cast as above, because this value is not
772 // 32-bit aligned and some architectures don't allow unaligned
773 // access.
774 //
775 // However, the value of px_proto_oe is 0, so we can use
776 // encoding/binary helpers to write the bytes without worrying
777 // about the ordering.
778 binary.BigEndian.PutUint32(sa.raw[2:6], px_proto_oe)
779 // This field is deliberately big-endian, unlike the previous
780 // one. The kernel expects SID to be in network byte order.
781 binary.BigEndian.PutUint16(sa.raw[6:8], sa.SID)
782 copy(sa.raw[8:14], sa.Remote)
783 for i := 14; i < 14+IFNAMSIZ; i++ {
784 sa.raw[i] = 0
785 }
786 copy(sa.raw[14:], sa.Dev)
787 return unsafe.Pointer(&sa.raw), SizeofSockaddrPPPoX, nil
788}
789
790// SockaddrTIPC implements the Sockaddr interface for AF_TIPC type sockets.
791// For more information on TIPC, see: http://tipc.sourceforge.net/.
792type SockaddrTIPC struct {
793 // Scope is the publication scopes when binding service/service range.
794 // Should be set to TIPC_CLUSTER_SCOPE or TIPC_NODE_SCOPE.
795 Scope int
796
797 // Addr is the type of address used to manipulate a socket. Addr must be
798 // one of:
799 // - *TIPCSocketAddr: "id" variant in the C addr union
800 // - *TIPCServiceRange: "nameseq" variant in the C addr union
801 // - *TIPCServiceName: "name" variant in the C addr union
802 //
803 // If nil, EINVAL will be returned when the structure is used.
804 Addr TIPCAddr
805
806 raw RawSockaddrTIPC
807}
808
809// TIPCAddr is implemented by types that can be used as an address for
810// SockaddrTIPC. It is only implemented by *TIPCSocketAddr, *TIPCServiceRange,
811// and *TIPCServiceName.
812type TIPCAddr interface {
813 tipcAddrtype() uint8
814 tipcAddr() [12]byte
815}
816
817func (sa *TIPCSocketAddr) tipcAddr() [12]byte {
818 var out [12]byte
819 copy(out[:], (*(*[unsafe.Sizeof(TIPCSocketAddr{})]byte)(unsafe.Pointer(sa)))[:])
820 return out
821}
822
823func (sa *TIPCSocketAddr) tipcAddrtype() uint8 { return TIPC_SOCKET_ADDR }
824
825func (sa *TIPCServiceRange) tipcAddr() [12]byte {
826 var out [12]byte
827 copy(out[:], (*(*[unsafe.Sizeof(TIPCServiceRange{})]byte)(unsafe.Pointer(sa)))[:])
828 return out
829}
830
831func (sa *TIPCServiceRange) tipcAddrtype() uint8 { return TIPC_SERVICE_RANGE }
832
833func (sa *TIPCServiceName) tipcAddr() [12]byte {
834 var out [12]byte
835 copy(out[:], (*(*[unsafe.Sizeof(TIPCServiceName{})]byte)(unsafe.Pointer(sa)))[:])
836 return out
837}
838
839func (sa *TIPCServiceName) tipcAddrtype() uint8 { return TIPC_SERVICE_ADDR }
840
841func (sa *SockaddrTIPC) sockaddr() (unsafe.Pointer, _Socklen, error) {
842 if sa.Addr == nil {
843 return nil, 0, EINVAL
844 }
845
846 sa.raw.Family = AF_TIPC
847 sa.raw.Scope = int8(sa.Scope)
848 sa.raw.Addrtype = sa.Addr.tipcAddrtype()
849 sa.raw.Addr = sa.Addr.tipcAddr()
850
851 return unsafe.Pointer(&sa.raw), SizeofSockaddrTIPC, nil
852}
853
854// SockaddrL2TPIP implements the Sockaddr interface for IPPROTO_L2TP/AF_INET sockets.
855type SockaddrL2TPIP struct {
856 Addr [4]byte
857 ConnId uint32
858 raw RawSockaddrL2TPIP
859}
860
861func (sa *SockaddrL2TPIP) sockaddr() (unsafe.Pointer, _Socklen, error) {
862 sa.raw.Family = AF_INET
863 sa.raw.Conn_id = sa.ConnId
864 for i := 0; i < len(sa.Addr); i++ {
865 sa.raw.Addr[i] = sa.Addr[i]
866 }
867 return unsafe.Pointer(&sa.raw), SizeofSockaddrL2TPIP, nil
868}
869
870// SockaddrL2TPIP6 implements the Sockaddr interface for IPPROTO_L2TP/AF_INET6 sockets.
871type SockaddrL2TPIP6 struct {
872 Addr [16]byte
873 ZoneId uint32
874 ConnId uint32
875 raw RawSockaddrL2TPIP6
876}
877
878func (sa *SockaddrL2TPIP6) sockaddr() (unsafe.Pointer, _Socklen, error) {
879 sa.raw.Family = AF_INET6
880 sa.raw.Conn_id = sa.ConnId
881 sa.raw.Scope_id = sa.ZoneId
882 for i := 0; i < len(sa.Addr); i++ {
883 sa.raw.Addr[i] = sa.Addr[i]
884 }
885 return unsafe.Pointer(&sa.raw), SizeofSockaddrL2TPIP6, nil
886}
887
888func anyToSockaddr(fd int, rsa *RawSockaddrAny) (Sockaddr, error) {
889 switch rsa.Addr.Family {
890 case AF_NETLINK:
891 pp := (*RawSockaddrNetlink)(unsafe.Pointer(rsa))
892 sa := new(SockaddrNetlink)
893 sa.Family = pp.Family
894 sa.Pad = pp.Pad
895 sa.Pid = pp.Pid
896 sa.Groups = pp.Groups
897 return sa, nil
898
899 case AF_PACKET:
900 pp := (*RawSockaddrLinklayer)(unsafe.Pointer(rsa))
901 sa := new(SockaddrLinklayer)
902 sa.Protocol = pp.Protocol
903 sa.Ifindex = int(pp.Ifindex)
904 sa.Hatype = pp.Hatype
905 sa.Pkttype = pp.Pkttype
906 sa.Halen = pp.Halen
907 for i := 0; i < len(sa.Addr); i++ {
908 sa.Addr[i] = pp.Addr[i]
909 }
910 return sa, nil
911
912 case AF_UNIX:
913 pp := (*RawSockaddrUnix)(unsafe.Pointer(rsa))
914 sa := new(SockaddrUnix)
915 if pp.Path[0] == 0 {
916 // "Abstract" Unix domain socket.
917 // Rewrite leading NUL as @ for textual display.
918 // (This is the standard convention.)
919 // Not friendly to overwrite in place,
920 // but the callers below don't care.
921 pp.Path[0] = '@'
922 }
923
924 // Assume path ends at NUL.
925 // This is not technically the Linux semantics for
926 // abstract Unix domain sockets--they are supposed
927 // to be uninterpreted fixed-size binary blobs--but
928 // everyone uses this convention.
929 n := 0
930 for n < len(pp.Path) && pp.Path[n] != 0 {
931 n++
932 }
933 bytes := (*[len(pp.Path)]byte)(unsafe.Pointer(&pp.Path[0]))[0:n]
934 sa.Name = string(bytes)
935 return sa, nil
936
937 case AF_INET:
938 proto, err := GetsockoptInt(fd, SOL_SOCKET, SO_PROTOCOL)
939 if err != nil {
940 return nil, err
941 }
942
943 switch proto {
944 case IPPROTO_L2TP:
945 pp := (*RawSockaddrL2TPIP)(unsafe.Pointer(rsa))
946 sa := new(SockaddrL2TPIP)
947 sa.ConnId = pp.Conn_id
948 for i := 0; i < len(sa.Addr); i++ {
949 sa.Addr[i] = pp.Addr[i]
950 }
951 return sa, nil
952 default:
953 pp := (*RawSockaddrInet4)(unsafe.Pointer(rsa))
954 sa := new(SockaddrInet4)
955 p := (*[2]byte)(unsafe.Pointer(&pp.Port))
956 sa.Port = int(p[0])<<8 + int(p[1])
957 for i := 0; i < len(sa.Addr); i++ {
958 sa.Addr[i] = pp.Addr[i]
959 }
960 return sa, nil
961 }
962
963 case AF_INET6:
964 proto, err := GetsockoptInt(fd, SOL_SOCKET, SO_PROTOCOL)
965 if err != nil {
966 return nil, err
967 }
968
969 switch proto {
970 case IPPROTO_L2TP:
971 pp := (*RawSockaddrL2TPIP6)(unsafe.Pointer(rsa))
972 sa := new(SockaddrL2TPIP6)
973 sa.ConnId = pp.Conn_id
974 sa.ZoneId = pp.Scope_id
975 for i := 0; i < len(sa.Addr); i++ {
976 sa.Addr[i] = pp.Addr[i]
977 }
978 return sa, nil
979 default:
980 pp := (*RawSockaddrInet6)(unsafe.Pointer(rsa))
981 sa := new(SockaddrInet6)
982 p := (*[2]byte)(unsafe.Pointer(&pp.Port))
983 sa.Port = int(p[0])<<8 + int(p[1])
984 sa.ZoneId = pp.Scope_id
985 for i := 0; i < len(sa.Addr); i++ {
986 sa.Addr[i] = pp.Addr[i]
987 }
988 return sa, nil
989 }
990
991 case AF_VSOCK:
992 pp := (*RawSockaddrVM)(unsafe.Pointer(rsa))
993 sa := &SockaddrVM{
994 CID: pp.Cid,
995 Port: pp.Port,
996 }
997 return sa, nil
998 case AF_BLUETOOTH:
999 proto, err := GetsockoptInt(fd, SOL_SOCKET, SO_PROTOCOL)
1000 if err != nil {
1001 return nil, err
1002 }
1003 // only BTPROTO_L2CAP and BTPROTO_RFCOMM can accept connections
1004 switch proto {
1005 case BTPROTO_L2CAP:
1006 pp := (*RawSockaddrL2)(unsafe.Pointer(rsa))
1007 sa := &SockaddrL2{
1008 PSM: pp.Psm,
1009 CID: pp.Cid,
1010 Addr: pp.Bdaddr,
1011 AddrType: pp.Bdaddr_type,
1012 }
1013 return sa, nil
1014 case BTPROTO_RFCOMM:
1015 pp := (*RawSockaddrRFCOMM)(unsafe.Pointer(rsa))
1016 sa := &SockaddrRFCOMM{
1017 Channel: pp.Channel,
1018 Addr: pp.Bdaddr,
1019 }
1020 return sa, nil
1021 }
1022 case AF_XDP:
1023 pp := (*RawSockaddrXDP)(unsafe.Pointer(rsa))
1024 sa := &SockaddrXDP{
1025 Flags: pp.Flags,
1026 Ifindex: pp.Ifindex,
1027 QueueID: pp.Queue_id,
1028 SharedUmemFD: pp.Shared_umem_fd,
1029 }
1030 return sa, nil
1031 case AF_PPPOX:
1032 pp := (*RawSockaddrPPPoX)(unsafe.Pointer(rsa))
1033 if binary.BigEndian.Uint32(pp[2:6]) != px_proto_oe {
1034 return nil, EINVAL
1035 }
1036 sa := &SockaddrPPPoE{
1037 SID: binary.BigEndian.Uint16(pp[6:8]),
1038 Remote: pp[8:14],
1039 }
1040 for i := 14; i < 14+IFNAMSIZ; i++ {
1041 if pp[i] == 0 {
1042 sa.Dev = string(pp[14:i])
1043 break
1044 }
1045 }
1046 return sa, nil
1047 case AF_TIPC:
1048 pp := (*RawSockaddrTIPC)(unsafe.Pointer(rsa))
1049
1050 sa := &SockaddrTIPC{
1051 Scope: int(pp.Scope),
1052 }
1053
1054 // Determine which union variant is present in pp.Addr by checking
1055 // pp.Addrtype.
1056 switch pp.Addrtype {
1057 case TIPC_SERVICE_RANGE:
1058 sa.Addr = (*TIPCServiceRange)(unsafe.Pointer(&pp.Addr))
1059 case TIPC_SERVICE_ADDR:
1060 sa.Addr = (*TIPCServiceName)(unsafe.Pointer(&pp.Addr))
1061 case TIPC_SOCKET_ADDR:
1062 sa.Addr = (*TIPCSocketAddr)(unsafe.Pointer(&pp.Addr))
1063 default:
1064 return nil, EINVAL
1065 }
1066
1067 return sa, nil
1068 }
1069 return nil, EAFNOSUPPORT
1070}
1071
1072func Accept(fd int) (nfd int, sa Sockaddr, err error) {
1073 var rsa RawSockaddrAny
1074 var len _Socklen = SizeofSockaddrAny
1075 nfd, err = accept(fd, &rsa, &len)
1076 if err != nil {
1077 return
1078 }
1079 sa, err = anyToSockaddr(fd, &rsa)
1080 if err != nil {
1081 Close(nfd)
1082 nfd = 0
1083 }
1084 return
1085}
1086
1087func Accept4(fd int, flags int) (nfd int, sa Sockaddr, err error) {
1088 var rsa RawSockaddrAny
1089 var len _Socklen = SizeofSockaddrAny
1090 nfd, err = accept4(fd, &rsa, &len, flags)
1091 if err != nil {
1092 return
1093 }
1094 if len > SizeofSockaddrAny {
1095 panic("RawSockaddrAny too small")
1096 }
1097 sa, err = anyToSockaddr(fd, &rsa)
1098 if err != nil {
1099 Close(nfd)
1100 nfd = 0
1101 }
1102 return
1103}
1104
1105func Getsockname(fd int) (sa Sockaddr, err error) {
1106 var rsa RawSockaddrAny
1107 var len _Socklen = SizeofSockaddrAny
1108 if err = getsockname(fd, &rsa, &len); err != nil {
1109 return
1110 }
1111 return anyToSockaddr(fd, &rsa)
1112}
1113
1114func GetsockoptIPMreqn(fd, level, opt int) (*IPMreqn, error) {
1115 var value IPMreqn
1116 vallen := _Socklen(SizeofIPMreqn)
1117 err := getsockopt(fd, level, opt, unsafe.Pointer(&value), &vallen)
1118 return &value, err
1119}
1120
1121func GetsockoptUcred(fd, level, opt int) (*Ucred, error) {
1122 var value Ucred
1123 vallen := _Socklen(SizeofUcred)
1124 err := getsockopt(fd, level, opt, unsafe.Pointer(&value), &vallen)
1125 return &value, err
1126}
1127
1128func GetsockoptTCPInfo(fd, level, opt int) (*TCPInfo, error) {
1129 var value TCPInfo
1130 vallen := _Socklen(SizeofTCPInfo)
1131 err := getsockopt(fd, level, opt, unsafe.Pointer(&value), &vallen)
1132 return &value, err
1133}
1134
1135// GetsockoptString returns the string value of the socket option opt for the
1136// socket associated with fd at the given socket level.
1137func GetsockoptString(fd, level, opt int) (string, error) {
1138 buf := make([]byte, 256)
1139 vallen := _Socklen(len(buf))
1140 err := getsockopt(fd, level, opt, unsafe.Pointer(&buf[0]), &vallen)
1141 if err != nil {
1142 if err == ERANGE {
1143 buf = make([]byte, vallen)
1144 err = getsockopt(fd, level, opt, unsafe.Pointer(&buf[0]), &vallen)
1145 }
1146 if err != nil {
1147 return "", err
1148 }
1149 }
1150 return string(buf[:vallen-1]), nil
1151}
1152
1153func GetsockoptTpacketStats(fd, level, opt int) (*TpacketStats, error) {
1154 var value TpacketStats
1155 vallen := _Socklen(SizeofTpacketStats)
1156 err := getsockopt(fd, level, opt, unsafe.Pointer(&value), &vallen)
1157 return &value, err
1158}
1159
1160func GetsockoptTpacketStatsV3(fd, level, opt int) (*TpacketStatsV3, error) {
1161 var value TpacketStatsV3
1162 vallen := _Socklen(SizeofTpacketStatsV3)
1163 err := getsockopt(fd, level, opt, unsafe.Pointer(&value), &vallen)
1164 return &value, err
1165}
1166
1167func SetsockoptIPMreqn(fd, level, opt int, mreq *IPMreqn) (err error) {
1168 return setsockopt(fd, level, opt, unsafe.Pointer(mreq), unsafe.Sizeof(*mreq))
1169}
1170
1171func SetsockoptPacketMreq(fd, level, opt int, mreq *PacketMreq) error {
1172 return setsockopt(fd, level, opt, unsafe.Pointer(mreq), unsafe.Sizeof(*mreq))
1173}
1174
1175// SetsockoptSockFprog attaches a classic BPF or an extended BPF program to a
1176// socket to filter incoming packets. See 'man 7 socket' for usage information.
1177func SetsockoptSockFprog(fd, level, opt int, fprog *SockFprog) error {
1178 return setsockopt(fd, level, opt, unsafe.Pointer(fprog), unsafe.Sizeof(*fprog))
1179}
1180
1181func SetsockoptCanRawFilter(fd, level, opt int, filter []CanFilter) error {
1182 var p unsafe.Pointer
1183 if len(filter) > 0 {
1184 p = unsafe.Pointer(&filter[0])
1185 }
1186 return setsockopt(fd, level, opt, p, uintptr(len(filter)*SizeofCanFilter))
1187}
1188
1189func SetsockoptTpacketReq(fd, level, opt int, tp *TpacketReq) error {
1190 return setsockopt(fd, level, opt, unsafe.Pointer(tp), unsafe.Sizeof(*tp))
1191}
1192
1193func SetsockoptTpacketReq3(fd, level, opt int, tp *TpacketReq3) error {
1194 return setsockopt(fd, level, opt, unsafe.Pointer(tp), unsafe.Sizeof(*tp))
1195}
1196
1197// Keyctl Commands (http://man7.org/linux/man-pages/man2/keyctl.2.html)
1198
1199// KeyctlInt calls keyctl commands in which each argument is an int.
1200// These commands are KEYCTL_REVOKE, KEYCTL_CHOWN, KEYCTL_CLEAR, KEYCTL_LINK,
1201// KEYCTL_UNLINK, KEYCTL_NEGATE, KEYCTL_SET_REQKEY_KEYRING, KEYCTL_SET_TIMEOUT,
1202// KEYCTL_ASSUME_AUTHORITY, KEYCTL_SESSION_TO_PARENT, KEYCTL_REJECT,
1203// KEYCTL_INVALIDATE, and KEYCTL_GET_PERSISTENT.
1204//sys KeyctlInt(cmd int, arg2 int, arg3 int, arg4 int, arg5 int) (ret int, err error) = SYS_KEYCTL
1205
1206// KeyctlBuffer calls keyctl commands in which the third and fourth
1207// arguments are a buffer and its length, respectively.
1208// These commands are KEYCTL_UPDATE, KEYCTL_READ, and KEYCTL_INSTANTIATE.
1209//sys KeyctlBuffer(cmd int, arg2 int, buf []byte, arg5 int) (ret int, err error) = SYS_KEYCTL
1210
1211// KeyctlString calls keyctl commands which return a string.
1212// These commands are KEYCTL_DESCRIBE and KEYCTL_GET_SECURITY.
1213func KeyctlString(cmd int, id int) (string, error) {
1214 // We must loop as the string data may change in between the syscalls.
1215 // We could allocate a large buffer here to reduce the chance that the
1216 // syscall needs to be called twice; however, this is unnecessary as
1217 // the performance loss is negligible.
1218 var buffer []byte
1219 for {
1220 // Try to fill the buffer with data
1221 length, err := KeyctlBuffer(cmd, id, buffer, 0)
1222 if err != nil {
1223 return "", err
1224 }
1225
1226 // Check if the data was written
1227 if length <= len(buffer) {
1228 // Exclude the null terminator
1229 return string(buffer[:length-1]), nil
1230 }
1231
1232 // Make a bigger buffer if needed
1233 buffer = make([]byte, length)
1234 }
1235}
1236
1237// Keyctl commands with special signatures.
1238
1239// KeyctlGetKeyringID implements the KEYCTL_GET_KEYRING_ID command.
1240// See the full documentation at:
1241// http://man7.org/linux/man-pages/man3/keyctl_get_keyring_ID.3.html
1242func KeyctlGetKeyringID(id int, create bool) (ringid int, err error) {
1243 createInt := 0
1244 if create {
1245 createInt = 1
1246 }
1247 return KeyctlInt(KEYCTL_GET_KEYRING_ID, id, createInt, 0, 0)
1248}
1249
1250// KeyctlSetperm implements the KEYCTL_SETPERM command. The perm value is the
1251// key handle permission mask as described in the "keyctl setperm" section of
1252// http://man7.org/linux/man-pages/man1/keyctl.1.html.
1253// See the full documentation at:
1254// http://man7.org/linux/man-pages/man3/keyctl_setperm.3.html
1255func KeyctlSetperm(id int, perm uint32) error {
1256 _, err := KeyctlInt(KEYCTL_SETPERM, id, int(perm), 0, 0)
1257 return err
1258}
1259
1260//sys keyctlJoin(cmd int, arg2 string) (ret int, err error) = SYS_KEYCTL
1261
1262// KeyctlJoinSessionKeyring implements the KEYCTL_JOIN_SESSION_KEYRING command.
1263// See the full documentation at:
1264// http://man7.org/linux/man-pages/man3/keyctl_join_session_keyring.3.html
1265func KeyctlJoinSessionKeyring(name string) (ringid int, err error) {
1266 return keyctlJoin(KEYCTL_JOIN_SESSION_KEYRING, name)
1267}
1268
1269//sys keyctlSearch(cmd int, arg2 int, arg3 string, arg4 string, arg5 int) (ret int, err error) = SYS_KEYCTL
1270
1271// KeyctlSearch implements the KEYCTL_SEARCH command.
1272// See the full documentation at:
1273// http://man7.org/linux/man-pages/man3/keyctl_search.3.html
1274func KeyctlSearch(ringid int, keyType, description string, destRingid int) (id int, err error) {
1275 return keyctlSearch(KEYCTL_SEARCH, ringid, keyType, description, destRingid)
1276}
1277
1278//sys keyctlIOV(cmd int, arg2 int, payload []Iovec, arg5 int) (err error) = SYS_KEYCTL
1279
1280// KeyctlInstantiateIOV implements the KEYCTL_INSTANTIATE_IOV command. This
1281// command is similar to KEYCTL_INSTANTIATE, except that the payload is a slice
1282// of Iovec (each of which represents a buffer) instead of a single buffer.
1283// See the full documentation at:
1284// http://man7.org/linux/man-pages/man3/keyctl_instantiate_iov.3.html
1285func KeyctlInstantiateIOV(id int, payload []Iovec, ringid int) error {
1286 return keyctlIOV(KEYCTL_INSTANTIATE_IOV, id, payload, ringid)
1287}
1288
1289//sys keyctlDH(cmd int, arg2 *KeyctlDHParams, buf []byte) (ret int, err error) = SYS_KEYCTL
1290
1291// KeyctlDHCompute implements the KEYCTL_DH_COMPUTE command. This command
1292// computes a Diffie-Hellman shared secret based on the provide params. The
1293// secret is written to the provided buffer and the returned size is the number
1294// of bytes written (returning an error if there is insufficient space in the
1295// buffer). If a nil buffer is passed in, this function returns the minimum
1296// buffer length needed to store the appropriate data. Note that this differs
1297// from KEYCTL_READ's behavior which always returns the requested payload size.
1298// See the full documentation at:
1299// http://man7.org/linux/man-pages/man3/keyctl_dh_compute.3.html
1300func KeyctlDHCompute(params *KeyctlDHParams, buffer []byte) (size int, err error) {
1301 return keyctlDH(KEYCTL_DH_COMPUTE, params, buffer)
1302}
1303
1304// KeyctlRestrictKeyring implements the KEYCTL_RESTRICT_KEYRING command. This
1305// command limits the set of keys that can be linked to the keyring, regardless
1306// of keyring permissions. The command requires the "setattr" permission.
1307//
1308// When called with an empty keyType the command locks the keyring, preventing
1309// any further keys from being linked to the keyring.
1310//
1311// The "asymmetric" keyType defines restrictions requiring key payloads to be
1312// DER encoded X.509 certificates signed by keys in another keyring. Restrictions
1313// for "asymmetric" include "builtin_trusted", "builtin_and_secondary_trusted",
1314// "key_or_keyring:<key>", and "key_or_keyring:<key>:chain".
1315//
1316// As of Linux 4.12, only the "asymmetric" keyType defines type-specific
1317// restrictions.
1318//
1319// See the full documentation at:
1320// http://man7.org/linux/man-pages/man3/keyctl_restrict_keyring.3.html
1321// http://man7.org/linux/man-pages/man2/keyctl.2.html
1322func KeyctlRestrictKeyring(ringid int, keyType string, restriction string) error {
1323 if keyType == "" {
1324 return keyctlRestrictKeyring(KEYCTL_RESTRICT_KEYRING, ringid)
1325 }
1326 return keyctlRestrictKeyringByType(KEYCTL_RESTRICT_KEYRING, ringid, keyType, restriction)
1327}
1328
1329//sys keyctlRestrictKeyringByType(cmd int, arg2 int, keyType string, restriction string) (err error) = SYS_KEYCTL
1330//sys keyctlRestrictKeyring(cmd int, arg2 int) (err error) = SYS_KEYCTL
1331
1332func Recvmsg(fd int, p, oob []byte, flags int) (n, oobn int, recvflags int, from Sockaddr, err error) {
1333 var msg Msghdr
1334 var rsa RawSockaddrAny
1335 msg.Name = (*byte)(unsafe.Pointer(&rsa))
1336 msg.Namelen = uint32(SizeofSockaddrAny)
1337 var iov Iovec
1338 if len(p) > 0 {
1339 iov.Base = &p[0]
1340 iov.SetLen(len(p))
1341 }
1342 var dummy byte
1343 if len(oob) > 0 {
1344 if len(p) == 0 {
1345 var sockType int
1346 sockType, err = GetsockoptInt(fd, SOL_SOCKET, SO_TYPE)
1347 if err != nil {
1348 return
1349 }
1350 // receive at least one normal byte
1351 if sockType != SOCK_DGRAM {
1352 iov.Base = &dummy
1353 iov.SetLen(1)
1354 }
1355 }
1356 msg.Control = &oob[0]
1357 msg.SetControllen(len(oob))
1358 }
1359 msg.Iov = &iov
1360 msg.Iovlen = 1
1361 if n, err = recvmsg(fd, &msg, flags); err != nil {
1362 return
1363 }
1364 oobn = int(msg.Controllen)
1365 recvflags = int(msg.Flags)
1366 // source address is only specified if the socket is unconnected
1367 if rsa.Addr.Family != AF_UNSPEC {
1368 from, err = anyToSockaddr(fd, &rsa)
1369 }
1370 return
1371}
1372
1373func Sendmsg(fd int, p, oob []byte, to Sockaddr, flags int) (err error) {
1374 _, err = SendmsgN(fd, p, oob, to, flags)
1375 return
1376}
1377
1378func SendmsgN(fd int, p, oob []byte, to Sockaddr, flags int) (n int, err error) {
1379 var ptr unsafe.Pointer
1380 var salen _Socklen
1381 if to != nil {
1382 var err error
1383 ptr, salen, err = to.sockaddr()
1384 if err != nil {
1385 return 0, err
1386 }
1387 }
1388 var msg Msghdr
1389 msg.Name = (*byte)(ptr)
1390 msg.Namelen = uint32(salen)
1391 var iov Iovec
1392 if len(p) > 0 {
1393 iov.Base = &p[0]
1394 iov.SetLen(len(p))
1395 }
1396 var dummy byte
1397 if len(oob) > 0 {
1398 if len(p) == 0 {
1399 var sockType int
1400 sockType, err = GetsockoptInt(fd, SOL_SOCKET, SO_TYPE)
1401 if err != nil {
1402 return 0, err
1403 }
1404 // send at least one normal byte
1405 if sockType != SOCK_DGRAM {
1406 iov.Base = &dummy
1407 iov.SetLen(1)
1408 }
1409 }
1410 msg.Control = &oob[0]
1411 msg.SetControllen(len(oob))
1412 }
1413 msg.Iov = &iov
1414 msg.Iovlen = 1
1415 if n, err = sendmsg(fd, &msg, flags); err != nil {
1416 return 0, err
1417 }
1418 if len(oob) > 0 && len(p) == 0 {
1419 n = 0
1420 }
1421 return n, nil
1422}
1423
1424// BindToDevice binds the socket associated with fd to device.
1425func BindToDevice(fd int, device string) (err error) {
1426 return SetsockoptString(fd, SOL_SOCKET, SO_BINDTODEVICE, device)
1427}
1428
1429//sys ptrace(request int, pid int, addr uintptr, data uintptr) (err error)
1430
1431func ptracePeek(req int, pid int, addr uintptr, out []byte) (count int, err error) {
1432 // The peek requests are machine-size oriented, so we wrap it
1433 // to retrieve arbitrary-length data.
1434
1435 // The ptrace syscall differs from glibc's ptrace.
1436 // Peeks returns the word in *data, not as the return value.
1437
1438 var buf [SizeofPtr]byte
1439
1440 // Leading edge. PEEKTEXT/PEEKDATA don't require aligned
1441 // access (PEEKUSER warns that it might), but if we don't
1442 // align our reads, we might straddle an unmapped page
1443 // boundary and not get the bytes leading up to the page
1444 // boundary.
1445 n := 0
1446 if addr%SizeofPtr != 0 {
1447 err = ptrace(req, pid, addr-addr%SizeofPtr, uintptr(unsafe.Pointer(&buf[0])))
1448 if err != nil {
1449 return 0, err
1450 }
1451 n += copy(out, buf[addr%SizeofPtr:])
1452 out = out[n:]
1453 }
1454
1455 // Remainder.
1456 for len(out) > 0 {
1457 // We use an internal buffer to guarantee alignment.
1458 // It's not documented if this is necessary, but we're paranoid.
1459 err = ptrace(req, pid, addr+uintptr(n), uintptr(unsafe.Pointer(&buf[0])))
1460 if err != nil {
1461 return n, err
1462 }
1463 copied := copy(out, buf[0:])
1464 n += copied
1465 out = out[copied:]
1466 }
1467
1468 return n, nil
1469}
1470
1471func PtracePeekText(pid int, addr uintptr, out []byte) (count int, err error) {
1472 return ptracePeek(PTRACE_PEEKTEXT, pid, addr, out)
1473}
1474
1475func PtracePeekData(pid int, addr uintptr, out []byte) (count int, err error) {
1476 return ptracePeek(PTRACE_PEEKDATA, pid, addr, out)
1477}
1478
1479func PtracePeekUser(pid int, addr uintptr, out []byte) (count int, err error) {
1480 return ptracePeek(PTRACE_PEEKUSR, pid, addr, out)
1481}
1482
1483func ptracePoke(pokeReq int, peekReq int, pid int, addr uintptr, data []byte) (count int, err error) {
1484 // As for ptracePeek, we need to align our accesses to deal
1485 // with the possibility of straddling an invalid page.
1486
1487 // Leading edge.
1488 n := 0
1489 if addr%SizeofPtr != 0 {
1490 var buf [SizeofPtr]byte
1491 err = ptrace(peekReq, pid, addr-addr%SizeofPtr, uintptr(unsafe.Pointer(&buf[0])))
1492 if err != nil {
1493 return 0, err
1494 }
1495 n += copy(buf[addr%SizeofPtr:], data)
1496 word := *((*uintptr)(unsafe.Pointer(&buf[0])))
1497 err = ptrace(pokeReq, pid, addr-addr%SizeofPtr, word)
1498 if err != nil {
1499 return 0, err
1500 }
1501 data = data[n:]
1502 }
1503
1504 // Interior.
1505 for len(data) > SizeofPtr {
1506 word := *((*uintptr)(unsafe.Pointer(&data[0])))
1507 err = ptrace(pokeReq, pid, addr+uintptr(n), word)
1508 if err != nil {
1509 return n, err
1510 }
1511 n += SizeofPtr
1512 data = data[SizeofPtr:]
1513 }
1514
1515 // Trailing edge.
1516 if len(data) > 0 {
1517 var buf [SizeofPtr]byte
1518 err = ptrace(peekReq, pid, addr+uintptr(n), uintptr(unsafe.Pointer(&buf[0])))
1519 if err != nil {
1520 return n, err
1521 }
1522 copy(buf[0:], data)
1523 word := *((*uintptr)(unsafe.Pointer(&buf[0])))
1524 err = ptrace(pokeReq, pid, addr+uintptr(n), word)
1525 if err != nil {
1526 return n, err
1527 }
1528 n += len(data)
1529 }
1530
1531 return n, nil
1532}
1533
1534func PtracePokeText(pid int, addr uintptr, data []byte) (count int, err error) {
1535 return ptracePoke(PTRACE_POKETEXT, PTRACE_PEEKTEXT, pid, addr, data)
1536}
1537
1538func PtracePokeData(pid int, addr uintptr, data []byte) (count int, err error) {
1539 return ptracePoke(PTRACE_POKEDATA, PTRACE_PEEKDATA, pid, addr, data)
1540}
1541
1542func PtracePokeUser(pid int, addr uintptr, data []byte) (count int, err error) {
1543 return ptracePoke(PTRACE_POKEUSR, PTRACE_PEEKUSR, pid, addr, data)
1544}
1545
1546func PtraceGetRegs(pid int, regsout *PtraceRegs) (err error) {
1547 return ptrace(PTRACE_GETREGS, pid, 0, uintptr(unsafe.Pointer(regsout)))
1548}
1549
1550func PtraceSetRegs(pid int, regs *PtraceRegs) (err error) {
1551 return ptrace(PTRACE_SETREGS, pid, 0, uintptr(unsafe.Pointer(regs)))
1552}
1553
1554func PtraceSetOptions(pid int, options int) (err error) {
1555 return ptrace(PTRACE_SETOPTIONS, pid, 0, uintptr(options))
1556}
1557
1558func PtraceGetEventMsg(pid int) (msg uint, err error) {
1559 var data _C_long
1560 err = ptrace(PTRACE_GETEVENTMSG, pid, 0, uintptr(unsafe.Pointer(&data)))
1561 msg = uint(data)
1562 return
1563}
1564
1565func PtraceCont(pid int, signal int) (err error) {
1566 return ptrace(PTRACE_CONT, pid, 0, uintptr(signal))
1567}
1568
1569func PtraceSyscall(pid int, signal int) (err error) {
1570 return ptrace(PTRACE_SYSCALL, pid, 0, uintptr(signal))
1571}
1572
1573func PtraceSingleStep(pid int) (err error) { return ptrace(PTRACE_SINGLESTEP, pid, 0, 0) }
1574
1575func PtraceInterrupt(pid int) (err error) { return ptrace(PTRACE_INTERRUPT, pid, 0, 0) }
1576
1577func PtraceAttach(pid int) (err error) { return ptrace(PTRACE_ATTACH, pid, 0, 0) }
1578
1579func PtraceSeize(pid int) (err error) { return ptrace(PTRACE_SEIZE, pid, 0, 0) }
1580
1581func PtraceDetach(pid int) (err error) { return ptrace(PTRACE_DETACH, pid, 0, 0) }
1582
1583//sys reboot(magic1 uint, magic2 uint, cmd int, arg string) (err error)
1584
1585func Reboot(cmd int) (err error) {
1586 return reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, "")
1587}
1588
1589func direntIno(buf []byte) (uint64, bool) {
1590 return readInt(buf, unsafe.Offsetof(Dirent{}.Ino), unsafe.Sizeof(Dirent{}.Ino))
1591}
1592
1593func direntReclen(buf []byte) (uint64, bool) {
1594 return readInt(buf, unsafe.Offsetof(Dirent{}.Reclen), unsafe.Sizeof(Dirent{}.Reclen))
1595}
1596
1597func direntNamlen(buf []byte) (uint64, bool) {
1598 reclen, ok := direntReclen(buf)
1599 if !ok {
1600 return 0, false
1601 }
1602 return reclen - uint64(unsafe.Offsetof(Dirent{}.Name)), true
1603}
1604
1605//sys mount(source string, target string, fstype string, flags uintptr, data *byte) (err error)
1606
1607func Mount(source string, target string, fstype string, flags uintptr, data string) (err error) {
1608 // Certain file systems get rather angry and EINVAL if you give
1609 // them an empty string of data, rather than NULL.
1610 if data == "" {
1611 return mount(source, target, fstype, flags, nil)
1612 }
1613 datap, err := BytePtrFromString(data)
1614 if err != nil {
1615 return err
1616 }
1617 return mount(source, target, fstype, flags, datap)
1618}
1619
1620func Sendfile(outfd int, infd int, offset *int64, count int) (written int, err error) {
1621 if raceenabled {
1622 raceReleaseMerge(unsafe.Pointer(&ioSync))
1623 }
1624 return sendfile(outfd, infd, offset, count)
1625}
1626
1627// Sendto
1628// Recvfrom
1629// Socketpair
1630
1631/*
1632 * Direct access
1633 */
1634//sys Acct(path string) (err error)
1635//sys AddKey(keyType string, description string, payload []byte, ringid int) (id int, err error)
1636//sys Adjtimex(buf *Timex) (state int, err error)
1637//sysnb Capget(hdr *CapUserHeader, data *CapUserData) (err error)
1638//sysnb Capset(hdr *CapUserHeader, data *CapUserData) (err error)
1639//sys Chdir(path string) (err error)
1640//sys Chroot(path string) (err error)
1641//sys ClockGetres(clockid int32, res *Timespec) (err error)
1642//sys ClockGettime(clockid int32, time *Timespec) (err error)
1643//sys ClockNanosleep(clockid int32, flags int, request *Timespec, remain *Timespec) (err error)
1644//sys Close(fd int) (err error)
1645//sys CopyFileRange(rfd int, roff *int64, wfd int, woff *int64, len int, flags int) (n int, err error)
1646//sys DeleteModule(name string, flags int) (err error)
1647//sys Dup(oldfd int) (fd int, err error)
1648
1649func Dup2(oldfd, newfd int) error {
1650 // Android O and newer blocks dup2; riscv and arm64 don't implement dup2.
1651 if runtime.GOOS == "android" || runtime.GOARCH == "riscv64" || runtime.GOARCH == "arm64" {
1652 return Dup3(oldfd, newfd, 0)
1653 }
1654 return dup2(oldfd, newfd)
1655}
1656
1657//sys Dup3(oldfd int, newfd int, flags int) (err error)
1658//sysnb EpollCreate1(flag int) (fd int, err error)
1659//sysnb EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error)
1660//sys Eventfd(initval uint, flags int) (fd int, err error) = SYS_EVENTFD2
1661//sys Exit(code int) = SYS_EXIT_GROUP
1662//sys Fallocate(fd int, mode uint32, off int64, len int64) (err error)
1663//sys Fchdir(fd int) (err error)
1664//sys Fchmod(fd int, mode uint32) (err error)
1665//sys Fchownat(dirfd int, path string, uid int, gid int, flags int) (err error)
1666//sys Fdatasync(fd int) (err error)
1667//sys Fgetxattr(fd int, attr string, dest []byte) (sz int, err error)
1668//sys FinitModule(fd int, params string, flags int) (err error)
1669//sys Flistxattr(fd int, dest []byte) (sz int, err error)
1670//sys Flock(fd int, how int) (err error)
1671//sys Fremovexattr(fd int, attr string) (err error)
1672//sys Fsetxattr(fd int, attr string, dest []byte, flags int) (err error)
1673//sys Fsync(fd int) (err error)
1674//sys Getdents(fd int, buf []byte) (n int, err error) = SYS_GETDENTS64
1675//sysnb Getpgid(pid int) (pgid int, err error)
1676
1677func Getpgrp() (pid int) {
1678 pid, _ = Getpgid(0)
1679 return
1680}
1681
1682//sysnb Getpid() (pid int)
1683//sysnb Getppid() (ppid int)
1684//sys Getpriority(which int, who int) (prio int, err error)
1685//sys Getrandom(buf []byte, flags int) (n int, err error)
1686//sysnb Getrusage(who int, rusage *Rusage) (err error)
1687//sysnb Getsid(pid int) (sid int, err error)
1688//sysnb Gettid() (tid int)
1689//sys Getxattr(path string, attr string, dest []byte) (sz int, err error)
1690//sys InitModule(moduleImage []byte, params string) (err error)
1691//sys InotifyAddWatch(fd int, pathname string, mask uint32) (watchdesc int, err error)
1692//sysnb InotifyInit1(flags int) (fd int, err error)
1693//sysnb InotifyRmWatch(fd int, watchdesc uint32) (success int, err error)
1694//sysnb Kill(pid int, sig syscall.Signal) (err error)
1695//sys Klogctl(typ int, buf []byte) (n int, err error) = SYS_SYSLOG
1696//sys Lgetxattr(path string, attr string, dest []byte) (sz int, err error)
1697//sys Listxattr(path string, dest []byte) (sz int, err error)
1698//sys Llistxattr(path string, dest []byte) (sz int, err error)
1699//sys Lremovexattr(path string, attr string) (err error)
1700//sys Lsetxattr(path string, attr string, data []byte, flags int) (err error)
1701//sys MemfdCreate(name string, flags int) (fd int, err error)
1702//sys Mkdirat(dirfd int, path string, mode uint32) (err error)
1703//sys Mknodat(dirfd int, path string, mode uint32, dev int) (err error)
1704//sys Nanosleep(time *Timespec, leftover *Timespec) (err error)
1705//sys PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error)
1706//sys PivotRoot(newroot string, putold string) (err error) = SYS_PIVOT_ROOT
1707//sysnb prlimit(pid int, resource int, newlimit *Rlimit, old *Rlimit) (err error) = SYS_PRLIMIT64
1708//sys Prctl(option int, arg2 uintptr, arg3 uintptr, arg4 uintptr, arg5 uintptr) (err error)
1709//sys Pselect(nfd int, r *FdSet, w *FdSet, e *FdSet, timeout *Timespec, sigmask *Sigset_t) (n int, err error) = SYS_PSELECT6
1710//sys read(fd int, p []byte) (n int, err error)
1711//sys Removexattr(path string, attr string) (err error)
1712//sys Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags uint) (err error)
1713//sys RequestKey(keyType string, description string, callback string, destRingid int) (id int, err error)
1714//sys Setdomainname(p []byte) (err error)
1715//sys Sethostname(p []byte) (err error)
1716//sysnb Setpgid(pid int, pgid int) (err error)
1717//sysnb Setsid() (pid int, err error)
1718//sysnb Settimeofday(tv *Timeval) (err error)
1719//sys Setns(fd int, nstype int) (err error)
1720
1721// PrctlRetInt performs a prctl operation specified by option and further
1722// optional arguments arg2 through arg5 depending on option. It returns a
1723// non-negative integer that is returned by the prctl syscall.
1724func PrctlRetInt(option int, arg2 uintptr, arg3 uintptr, arg4 uintptr, arg5 uintptr) (int, error) {
1725 ret, _, err := Syscall6(SYS_PRCTL, uintptr(option), uintptr(arg2), uintptr(arg3), uintptr(arg4), uintptr(arg5), 0)
1726 if err != 0 {
1727 return 0, err
1728 }
1729 return int(ret), nil
1730}
1731
1732// issue 1435.
1733// On linux Setuid and Setgid only affects the current thread, not the process.
1734// This does not match what most callers expect so we must return an error
1735// here rather than letting the caller think that the call succeeded.
1736
1737func Setuid(uid int) (err error) {
1738 return EOPNOTSUPP
1739}
1740
1741func Setgid(uid int) (err error) {
1742 return EOPNOTSUPP
1743}
1744
1745// SetfsgidRetGid sets fsgid for current thread and returns previous fsgid set.
1746// setfsgid(2) will return a non-nil error only if its caller lacks CAP_SETUID capability.
1747// If the call fails due to other reasons, current fsgid will be returned.
1748func SetfsgidRetGid(gid int) (int, error) {
1749 return setfsgid(gid)
1750}
1751
1752// SetfsuidRetUid sets fsuid for current thread and returns previous fsuid set.
1753// setfsgid(2) will return a non-nil error only if its caller lacks CAP_SETUID capability
1754// If the call fails due to other reasons, current fsuid will be returned.
1755func SetfsuidRetUid(uid int) (int, error) {
1756 return setfsuid(uid)
1757}
1758
1759func Setfsgid(gid int) error {
1760 _, err := setfsgid(gid)
1761 return err
1762}
1763
1764func Setfsuid(uid int) error {
1765 _, err := setfsuid(uid)
1766 return err
1767}
1768
1769func Signalfd(fd int, sigmask *Sigset_t, flags int) (newfd int, err error) {
1770 return signalfd(fd, sigmask, _C__NSIG/8, flags)
1771}
1772
1773//sys Setpriority(which int, who int, prio int) (err error)
1774//sys Setxattr(path string, attr string, data []byte, flags int) (err error)
1775//sys signalfd(fd int, sigmask *Sigset_t, maskSize uintptr, flags int) (newfd int, err error) = SYS_SIGNALFD4
1776//sys Statx(dirfd int, path string, flags int, mask int, stat *Statx_t) (err error)
1777//sys Sync()
1778//sys Syncfs(fd int) (err error)
1779//sysnb Sysinfo(info *Sysinfo_t) (err error)
1780//sys Tee(rfd int, wfd int, len int, flags int) (n int64, err error)
1781//sysnb TimerfdCreate(clockid int, flags int) (fd int, err error)
1782//sysnb TimerfdGettime(fd int, currValue *ItimerSpec) (err error)
1783//sysnb TimerfdSettime(fd int, flags int, newValue *ItimerSpec, oldValue *ItimerSpec) (err error)
1784//sysnb Tgkill(tgid int, tid int, sig syscall.Signal) (err error)
1785//sysnb Times(tms *Tms) (ticks uintptr, err error)
1786//sysnb Umask(mask int) (oldmask int)
1787//sysnb Uname(buf *Utsname) (err error)
1788//sys Unmount(target string, flags int) (err error) = SYS_UMOUNT2
1789//sys Unshare(flags int) (err error)
1790//sys write(fd int, p []byte) (n int, err error)
1791//sys exitThread(code int) (err error) = SYS_EXIT
1792//sys readlen(fd int, p *byte, np int) (n int, err error) = SYS_READ
1793//sys writelen(fd int, p *byte, np int) (n int, err error) = SYS_WRITE
1794//sys readv(fd int, iovs []Iovec) (n int, err error) = SYS_READV
1795//sys writev(fd int, iovs []Iovec) (n int, err error) = SYS_WRITEV
1796//sys preadv(fd int, iovs []Iovec, offs_l uintptr, offs_h uintptr) (n int, err error) = SYS_PREADV
1797//sys pwritev(fd int, iovs []Iovec, offs_l uintptr, offs_h uintptr) (n int, err error) = SYS_PWRITEV
1798//sys preadv2(fd int, iovs []Iovec, offs_l uintptr, offs_h uintptr, flags int) (n int, err error) = SYS_PREADV2
1799//sys pwritev2(fd int, iovs []Iovec, offs_l uintptr, offs_h uintptr, flags int) (n int, err error) = SYS_PWRITEV2
1800
1801func bytes2iovec(bs [][]byte) []Iovec {
1802 iovecs := make([]Iovec, len(bs))
1803 for i, b := range bs {
1804 iovecs[i].SetLen(len(b))
1805 if len(b) > 0 {
1806 iovecs[i].Base = &b[0]
1807 } else {
1808 iovecs[i].Base = (*byte)(unsafe.Pointer(&_zero))
1809 }
1810 }
1811 return iovecs
1812}
1813
1814// offs2lohi splits offs into its lower and upper unsigned long. On 64-bit
1815// systems, hi will always be 0. On 32-bit systems, offs will be split in half.
1816// preadv/pwritev chose this calling convention so they don't need to add a
1817// padding-register for alignment on ARM.
1818func offs2lohi(offs int64) (lo, hi uintptr) {
1819 return uintptr(offs), uintptr(uint64(offs) >> SizeofLong)
1820}
1821
1822func Readv(fd int, iovs [][]byte) (n int, err error) {
1823 iovecs := bytes2iovec(iovs)
1824 n, err = readv(fd, iovecs)
1825 readvRacedetect(iovecs, n, err)
1826 return n, err
1827}
1828
1829func Preadv(fd int, iovs [][]byte, offset int64) (n int, err error) {
1830 iovecs := bytes2iovec(iovs)
1831 lo, hi := offs2lohi(offset)
1832 n, err = preadv(fd, iovecs, lo, hi)
1833 readvRacedetect(iovecs, n, err)
1834 return n, err
1835}
1836
1837func Preadv2(fd int, iovs [][]byte, offset int64, flags int) (n int, err error) {
1838 iovecs := bytes2iovec(iovs)
1839 lo, hi := offs2lohi(offset)
1840 n, err = preadv2(fd, iovecs, lo, hi, flags)
1841 readvRacedetect(iovecs, n, err)
1842 return n, err
1843}
1844
1845func readvRacedetect(iovecs []Iovec, n int, err error) {
1846 if !raceenabled {
1847 return
1848 }
1849 for i := 0; n > 0 && i < len(iovecs); i++ {
1850 m := int(iovecs[i].Len)
1851 if m > n {
1852 m = n
1853 }
1854 n -= m
1855 if m > 0 {
1856 raceWriteRange(unsafe.Pointer(iovecs[i].Base), m)
1857 }
1858 }
1859 if err == nil {
1860 raceAcquire(unsafe.Pointer(&ioSync))
1861 }
1862}
1863
1864func Writev(fd int, iovs [][]byte) (n int, err error) {
1865 iovecs := bytes2iovec(iovs)
1866 if raceenabled {
1867 raceReleaseMerge(unsafe.Pointer(&ioSync))
1868 }
1869 n, err = writev(fd, iovecs)
1870 writevRacedetect(iovecs, n)
1871 return n, err
1872}
1873
1874func Pwritev(fd int, iovs [][]byte, offset int64) (n int, err error) {
1875 iovecs := bytes2iovec(iovs)
1876 if raceenabled {
1877 raceReleaseMerge(unsafe.Pointer(&ioSync))
1878 }
1879 lo, hi := offs2lohi(offset)
1880 n, err = pwritev(fd, iovecs, lo, hi)
1881 writevRacedetect(iovecs, n)
1882 return n, err
1883}
1884
1885func Pwritev2(fd int, iovs [][]byte, offset int64, flags int) (n int, err error) {
1886 iovecs := bytes2iovec(iovs)
1887 if raceenabled {
1888 raceReleaseMerge(unsafe.Pointer(&ioSync))
1889 }
1890 lo, hi := offs2lohi(offset)
1891 n, err = pwritev2(fd, iovecs, lo, hi, flags)
1892 writevRacedetect(iovecs, n)
1893 return n, err
1894}
1895
1896func writevRacedetect(iovecs []Iovec, n int) {
1897 if !raceenabled {
1898 return
1899 }
1900 for i := 0; n > 0 && i < len(iovecs); i++ {
1901 m := int(iovecs[i].Len)
1902 if m > n {
1903 m = n
1904 }
1905 n -= m
1906 if m > 0 {
1907 raceReadRange(unsafe.Pointer(iovecs[i].Base), m)
1908 }
1909 }
1910}
1911
1912// mmap varies by architecture; see syscall_linux_*.go.
1913//sys munmap(addr uintptr, length uintptr) (err error)
1914
1915var mapper = &mmapper{
1916 active: make(map[*byte][]byte),
1917 mmap: mmap,
1918 munmap: munmap,
1919}
1920
1921func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) {
1922 return mapper.Mmap(fd, offset, length, prot, flags)
1923}
1924
1925func Munmap(b []byte) (err error) {
1926 return mapper.Munmap(b)
1927}
1928
1929//sys Madvise(b []byte, advice int) (err error)
1930//sys Mprotect(b []byte, prot int) (err error)
1931//sys Mlock(b []byte) (err error)
1932//sys Mlockall(flags int) (err error)
1933//sys Msync(b []byte, flags int) (err error)
1934//sys Munlock(b []byte) (err error)
1935//sys Munlockall() (err error)
1936
1937// Vmsplice splices user pages from a slice of Iovecs into a pipe specified by fd,
1938// using the specified flags.
1939func Vmsplice(fd int, iovs []Iovec, flags int) (int, error) {
1940 var p unsafe.Pointer
1941 if len(iovs) > 0 {
1942 p = unsafe.Pointer(&iovs[0])
1943 }
1944
1945 n, _, errno := Syscall6(SYS_VMSPLICE, uintptr(fd), uintptr(p), uintptr(len(iovs)), uintptr(flags), 0, 0)
1946 if errno != 0 {
1947 return 0, syscall.Errno(errno)
1948 }
1949
1950 return int(n), nil
1951}
1952
1953func isGroupMember(gid int) bool {
1954 groups, err := Getgroups()
1955 if err != nil {
1956 return false
1957 }
1958
1959 for _, g := range groups {
1960 if g == gid {
1961 return true
1962 }
1963 }
1964 return false
1965}
1966
1967//sys faccessat(dirfd int, path string, mode uint32) (err error)
1968
1969func Faccessat(dirfd int, path string, mode uint32, flags int) (err error) {
1970 if flags & ^(AT_SYMLINK_NOFOLLOW|AT_EACCESS) != 0 {
1971 return EINVAL
1972 }
1973
1974 // The Linux kernel faccessat system call does not take any flags.
1975 // The glibc faccessat implements the flags itself; see
1976 // https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/faccessat.c;hb=HEAD
1977 // Because people naturally expect syscall.Faccessat to act
1978 // like C faccessat, we do the same.
1979
1980 if flags == 0 {
1981 return faccessat(dirfd, path, mode)
1982 }
1983
1984 var st Stat_t
1985 if err := Fstatat(dirfd, path, &st, flags&AT_SYMLINK_NOFOLLOW); err != nil {
1986 return err
1987 }
1988
1989 mode &= 7
1990 if mode == 0 {
1991 return nil
1992 }
1993
1994 var uid int
1995 if flags&AT_EACCESS != 0 {
1996 uid = Geteuid()
1997 } else {
1998 uid = Getuid()
1999 }
2000
2001 if uid == 0 {
2002 if mode&1 == 0 {
2003 // Root can read and write any file.
2004 return nil
2005 }
2006 if st.Mode&0111 != 0 {
2007 // Root can execute any file that anybody can execute.
2008 return nil
2009 }
2010 return EACCES
2011 }
2012
2013 var fmode uint32
2014 if uint32(uid) == st.Uid {
2015 fmode = (st.Mode >> 6) & 7
2016 } else {
2017 var gid int
2018 if flags&AT_EACCESS != 0 {
2019 gid = Getegid()
2020 } else {
2021 gid = Getgid()
2022 }
2023
2024 if uint32(gid) == st.Gid || isGroupMember(gid) {
2025 fmode = (st.Mode >> 3) & 7
2026 } else {
2027 fmode = st.Mode & 7
2028 }
2029 }
2030
2031 if fmode&mode == mode {
2032 return nil
2033 }
2034
2035 return EACCES
2036}
2037
2038//sys nameToHandleAt(dirFD int, pathname string, fh *fileHandle, mountID *_C_int, flags int) (err error) = SYS_NAME_TO_HANDLE_AT
2039//sys openByHandleAt(mountFD int, fh *fileHandle, flags int) (fd int, err error) = SYS_OPEN_BY_HANDLE_AT
2040
2041// fileHandle is the argument to nameToHandleAt and openByHandleAt. We
2042// originally tried to generate it via unix/linux/types.go with "type
2043// fileHandle C.struct_file_handle" but that generated empty structs
2044// for mips64 and mips64le. Instead, hard code it for now (it's the
2045// same everywhere else) until the mips64 generator issue is fixed.
2046type fileHandle struct {
2047 Bytes uint32
2048 Type int32
2049}
2050
2051// FileHandle represents the C struct file_handle used by
2052// name_to_handle_at (see NameToHandleAt) and open_by_handle_at (see
2053// OpenByHandleAt).
2054type FileHandle struct {
2055 *fileHandle
2056}
2057
2058// NewFileHandle constructs a FileHandle.
2059func NewFileHandle(handleType int32, handle []byte) FileHandle {
2060 const hdrSize = unsafe.Sizeof(fileHandle{})
2061 buf := make([]byte, hdrSize+uintptr(len(handle)))
2062 copy(buf[hdrSize:], handle)
2063 fh := (*fileHandle)(unsafe.Pointer(&buf[0]))
2064 fh.Type = handleType
2065 fh.Bytes = uint32(len(handle))
2066 return FileHandle{fh}
2067}
2068
2069func (fh *FileHandle) Size() int { return int(fh.fileHandle.Bytes) }
2070func (fh *FileHandle) Type() int32 { return fh.fileHandle.Type }
2071func (fh *FileHandle) Bytes() []byte {
2072 n := fh.Size()
2073 if n == 0 {
2074 return nil
2075 }
2076 return (*[1 << 30]byte)(unsafe.Pointer(uintptr(unsafe.Pointer(&fh.fileHandle.Type)) + 4))[:n:n]
2077}
2078
2079// NameToHandleAt wraps the name_to_handle_at system call; it obtains
2080// a handle for a path name.
2081func NameToHandleAt(dirfd int, path string, flags int) (handle FileHandle, mountID int, err error) {
2082 var mid _C_int
2083 // Try first with a small buffer, assuming the handle will
2084 // only be 32 bytes.
2085 size := uint32(32 + unsafe.Sizeof(fileHandle{}))
2086 didResize := false
2087 for {
2088 buf := make([]byte, size)
2089 fh := (*fileHandle)(unsafe.Pointer(&buf[0]))
2090 fh.Bytes = size - uint32(unsafe.Sizeof(fileHandle{}))
2091 err = nameToHandleAt(dirfd, path, fh, &mid, flags)
2092 if err == EOVERFLOW {
2093 if didResize {
2094 // We shouldn't need to resize more than once
2095 return
2096 }
2097 didResize = true
2098 size = fh.Bytes + uint32(unsafe.Sizeof(fileHandle{}))
2099 continue
2100 }
2101 if err != nil {
2102 return
2103 }
2104 return FileHandle{fh}, int(mid), nil
2105 }
2106}
2107
2108// OpenByHandleAt wraps the open_by_handle_at system call; it opens a
2109// file via a handle as previously returned by NameToHandleAt.
2110func OpenByHandleAt(mountFD int, handle FileHandle, flags int) (fd int, err error) {
2111 return openByHandleAt(mountFD, handle.fileHandle, flags)
2112}
2113
2114// Klogset wraps the sys_syslog system call; it sets console_loglevel to
2115// the value specified by arg and passes a dummy pointer to bufp.
2116func Klogset(typ int, arg int) (err error) {
2117 var p unsafe.Pointer
2118 _, _, errno := Syscall(SYS_SYSLOG, uintptr(typ), uintptr(p), uintptr(arg))
2119 if errno != 0 {
2120 return errnoErr(errno)
2121 }
2122 return nil
2123}
2124
2125/*
2126 * Unimplemented
2127 */
2128// AfsSyscall
2129// Alarm
2130// ArchPrctl
2131// Brk
2132// ClockNanosleep
2133// ClockSettime
2134// Clone
2135// EpollCtlOld
2136// EpollPwait
2137// EpollWaitOld
2138// Execve
2139// Fork
2140// Futex
2141// GetKernelSyms
2142// GetMempolicy
2143// GetRobustList
2144// GetThreadArea
2145// Getitimer
2146// Getpmsg
2147// IoCancel
2148// IoDestroy
2149// IoGetevents
2150// IoSetup
2151// IoSubmit
2152// IoprioGet
2153// IoprioSet
2154// KexecLoad
2155// LookupDcookie
2156// Mbind
2157// MigratePages
2158// Mincore
2159// ModifyLdt
2160// Mount
2161// MovePages
2162// MqGetsetattr
2163// MqNotify
2164// MqOpen
2165// MqTimedreceive
2166// MqTimedsend
2167// MqUnlink
2168// Mremap
2169// Msgctl
2170// Msgget
2171// Msgrcv
2172// Msgsnd
2173// Nfsservctl
2174// Personality
2175// Pselect6
2176// Ptrace
2177// Putpmsg
2178// Quotactl
2179// Readahead
2180// Readv
2181// RemapFilePages
2182// RestartSyscall
2183// RtSigaction
2184// RtSigpending
2185// RtSigprocmask
2186// RtSigqueueinfo
2187// RtSigreturn
2188// RtSigsuspend
2189// RtSigtimedwait
2190// SchedGetPriorityMax
2191// SchedGetPriorityMin
2192// SchedGetparam
2193// SchedGetscheduler
2194// SchedRrGetInterval
2195// SchedSetparam
2196// SchedYield
2197// Security
2198// Semctl
2199// Semget
2200// Semop
2201// Semtimedop
2202// SetMempolicy
2203// SetRobustList
2204// SetThreadArea
2205// SetTidAddress
2206// Shmat
2207// Shmctl
2208// Shmdt
2209// Shmget
2210// Sigaltstack
2211// Swapoff
2212// Swapon
2213// Sysfs
2214// TimerCreate
2215// TimerDelete
2216// TimerGetoverrun
2217// TimerGettime
2218// TimerSettime
2219// Tkill (obsolete)
2220// Tuxcall
2221// Umount2
2222// Uselib
2223// Utimensat
2224// Vfork
2225// Vhangup
2226// Vserver
2227// Waitid
2228// _Sysctl