1// Copyright 2020 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package os
6
7import (
8	"internal/poll"
9	"io"
10	"syscall"
11)
12
13var (
14	pollCopyFileRange = poll.CopyFileRange
15	pollSplice        = poll.Splice
16)
17
18// wrapSyscallError takes an error and a syscall name. If the error is
19// a syscall.Errno, it wraps it in an os.SyscallError using the syscall name.
20func wrapSyscallError(name string, err error) error {
21	if _, ok := err.(syscall.Errno); ok {
22		err = NewSyscallError(name, err)
23	}
24	return err
25}
26
27func (f *File) writeTo(w io.Writer) (written int64, handled bool, err error) {
28	pfd, network := getPollFDAndNetwork(w)
29	// TODO(panjf2000): same as File.spliceToFile.
30	if pfd == nil || !pfd.IsStream || !isUnixOrTCP(string(network)) {
31		return
32	}
33
34	sc, err := f.SyscallConn()
35	if err != nil {
36		return
37	}
38
39	rerr := sc.Read(func(fd uintptr) (done bool) {
40		written, err, handled = poll.SendFile(pfd, int(fd), 1<<63-1)
41		return true
42	})
43
44	if err == nil {
45		err = rerr
46	}
47
48	return written, handled, wrapSyscallError("sendfile", err)
49}
50
51func (f *File) readFrom(r io.Reader) (written int64, handled bool, err error) {
52	// Neither copy_file_range(2) nor splice(2) supports destinations opened with
53	// O_APPEND, so don't bother to try zero-copy with these system calls.
54	//
55	// Visit https://man7.org/linux/man-pages/man2/copy_file_range.2.html#ERRORS and
56	// https://man7.org/linux/man-pages/man2/splice.2.html#ERRORS for details.
57	if f.appendMode {
58		return 0, false, nil
59	}
60
61	written, handled, err = f.copyFileRange(r)
62	if handled {
63		return
64	}
65	return f.spliceToFile(r)
66}
67
68func (f *File) spliceToFile(r io.Reader) (written int64, handled bool, err error) {
69	var (
70		remain int64
71		lr     *io.LimitedReader
72	)
73	if lr, r, remain = tryLimitedReader(r); remain <= 0 {
74		return 0, true, nil
75	}
76
77	pfd, _ := getPollFDAndNetwork(r)
78	// TODO(panjf2000): run some tests to see if we should unlock the non-streams for splice.
79	// Streams benefit the most from the splice(2), non-streams are not even supported in old kernels
80	// where splice(2) will just return EINVAL; newer kernels support non-streams like UDP, but I really
81	// doubt that splice(2) could help non-streams, cuz they usually send small frames respectively
82	// and one splice call would result in one frame.
83	// splice(2) is suitable for large data but the generation of fragments defeats its edge here.
84	// Therefore, don't bother to try splice if the r is not a streaming descriptor.
85	if pfd == nil || !pfd.IsStream {
86		return
87	}
88
89	written, handled, err = pollSplice(&f.pfd, pfd, remain)
90
91	if lr != nil {
92		lr.N = remain - written
93	}
94
95	return written, handled, wrapSyscallError("splice", err)
96}
97
98func (f *File) copyFileRange(r io.Reader) (written int64, handled bool, err error) {
99	var (
100		remain int64
101		lr     *io.LimitedReader
102	)
103	if lr, r, remain = tryLimitedReader(r); remain <= 0 {
104		return 0, true, nil
105	}
106
107	var src *File
108	switch v := r.(type) {
109	case *File:
110		src = v
111	case fileWithoutWriteTo:
112		src = v.File
113	default:
114		return 0, false, nil
115	}
116
117	if src.checkValid("ReadFrom") != nil {
118		// Avoid returning the error as we report handled as false,
119		// leave further error handling as the responsibility of the caller.
120		return 0, false, nil
121	}
122
123	written, handled, err = pollCopyFileRange(&f.pfd, &src.pfd, remain)
124	if lr != nil {
125		lr.N -= written
126	}
127	return written, handled, wrapSyscallError("copy_file_range", err)
128}
129
130// getPollFDAndNetwork tries to get the poll.FD and network type from the given interface
131// by expecting the underlying type of i to be the implementation of syscall.Conn
132// that contains a *net.rawConn.
133func getPollFDAndNetwork(i any) (*poll.FD, poll.String) {
134	sc, ok := i.(syscall.Conn)
135	if !ok {
136		return nil, ""
137	}
138	rc, err := sc.SyscallConn()
139	if err != nil {
140		return nil, ""
141	}
142	irc, ok := rc.(interface {
143		PollFD() *poll.FD
144		Network() poll.String
145	})
146	if !ok {
147		return nil, ""
148	}
149	return irc.PollFD(), irc.Network()
150}
151
152// tryLimitedReader tries to assert the io.Reader to io.LimitedReader, it returns the io.LimitedReader,
153// the underlying io.Reader and the remaining amount of bytes if the assertion succeeds,
154// otherwise it just returns the original io.Reader and the theoretical unlimited remaining amount of bytes.
155func tryLimitedReader(r io.Reader) (*io.LimitedReader, io.Reader, int64) {
156	var remain int64 = 1<<63 - 1 // by default, copy until EOF
157
158	lr, ok := r.(*io.LimitedReader)
159	if !ok {
160		return nil, r, remain
161	}
162
163	remain = lr.N
164	return lr, lr.R, remain
165}
166
167func isUnixOrTCP(network string) bool {
168	switch network {
169	case "tcp", "tcp4", "tcp6", "unix":
170		return true
171	default:
172		return false
173	}
174}
175