xref: /aosp_15_r20/external/sg3_utils/testing/sgh_dd.cpp (revision 44704f698541f6367e81f991ef8bb54ccbf3fc18)
1 /*
2  * A utility program for copying files. Specialised for "files" that
3  * represent devices that understand the SCSI command set.
4  *
5  * Copyright (C) 2018-2022 D. Gilbert
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2, or (at your option)
9  * any later version.
10  *
11  * SPDX-License-Identifier: GPL-2.0-or-later
12  *
13  * This program is a specialisation of the Unix "dd" command in which
14  * one or both of the given files is a scsi generic device.
15  * A logical block size ('bs') is assumed to be 512 if not given. This
16  * program complains if 'ibs' or 'obs' are given with some other value
17  * than 'bs'. If 'if' is not given or 'if=-' then stdin is assumed. If
18  * 'of' is not given or 'of=-' then stdout assumed.
19  *
20  * A non-standard argument "bpt" (blocks per transfer) is added to control
21  * the maximum number of blocks in each transfer. The default value is 128.
22  * For example if "bs=512" and "bpt=32" then a maximum of 32 blocks (16 KiB
23  * in this case) are transferred to or from the sg device in a single SCSI
24  * command.
25  *
26  * This version is designed for the linux kernel 2.4, 2.6, 3, 4 and 5 series.
27  *
28  * sgp_dd is a Posix threads specialization of the sg_dd utility. Both
29  * sgp_dd and sg_dd only perform special tasks when one or both of the given
30  * devices belong to the Linux sg driver.
31  *
32  * sgh_dd further extends sgp_dd to use the experimental kernel buffer
33  * sharing feature added in 3.9.02 .
34  * N.B. This utility was previously called sgs_dd but there was already an
35  * archived version of a dd variant called sgs_dd so this utility name was
36  * renamed [20181221]
37  */
38 
39 static const char * version_str = "2.22 20221020";
40 
41 #define _XOPEN_SOURCE 600
42 #ifndef _GNU_SOURCE
43 #define _GNU_SOURCE 1
44 #endif
45 
46 #include <unistd.h>
47 #include <fcntl.h>
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <stdarg.h>
51 #include <stdbool.h>
52 #include <string.h>
53 #include <ctype.h>
54 #include <errno.h>
55 #include <poll.h>
56 #include <limits.h>
57 #include <pthread.h>
58 #include <signal.h>
59 #define __STDC_FORMAT_MACROS 1
60 #include <inttypes.h>
61 #include <sys/ioctl.h>
62 #include <sys/stat.h>
63 #include <sys/sysmacros.h>
64 #ifndef major
65 #include <sys/types.h>
66 #endif
67 #include <sys/time.h>
68 #include <linux/major.h>        /* for MEM_MAJOR, SCSI_GENERIC_MAJOR, etc */
69 #include <linux/fs.h>           /* for BLKSSZGET and friends */
70 #include <sys/mman.h>           /* for mmap() system call */
71 
72 #include <vector>
73 #include <array>
74 #include <atomic>       // C++ header replacing <stdatomic.h> also link
75                         // needed '-l atomic' . Not anymore??
76 #include <random>
77 #include <thread>       // needed for std::this_thread::yield()
78 #include <mutex>
79 #include <chrono>
80 
81 #ifdef HAVE_CONFIG_H
82 #include "config.h"
83 #endif
84 
85 #ifdef HAVE_GETRANDOM
86 #include <sys/random.h>         /* for getrandom() system call */
87 #endif
88 
89 #ifndef HAVE_LINUX_SG_V4_HDR
90 /* Kernel uapi header contain __user decorations on user space pointers
91  * to indicate they are unsafe in the kernel space. However glibc takes
92  * all those __user decorations out from headers in /usr/include/linux .
93  * So to stop compile errors when directly importing include/uapi/scsi/sg.h
94  * undef __user before doing that include. */
95 #define __user
96 
97 /* Want to block the original sg.h header from also being included. That
98  * causes lots of multiple definition errors. This will only work if this
99  * header is included _before_ the original sg.h header.  */
100 #define _SCSI_GENERIC_H         /* original kernel header guard */
101 #define _SCSI_SG_H              /* glibc header guard */
102 
103 #include "uapi_sg.h"    /* local copy of include/uapi/scsi/sg.h */
104 
105 #else
106 #define __user
107 #endif  /* end of: ifndef HAVE_LINUX_SG_V4_HDR */
108 
109 #include "sg_lib.h"
110 #include "sg_cmds_basic.h"
111 #include "sg_io_linux.h"
112 #include "sg_unaligned.h"
113 #include "sg_pr2serr.h"
114 
115 
116 using namespace std;
117 
118 #ifdef __GNUC__
119 #ifndef  __clang__
120 #pragma GCC diagnostic ignored "-Wclobbered"
121 #endif
122 #endif
123 
124 /* comment out following line to stop ioctl(SG_CTL_FLAGM_SNAP_DEV) */
125 #define SGH_DD_SNAP_DEV 1
126 
127 #ifndef SGV4_FLAG_POLLED
128 #define SGV4_FLAG_POLLED 0x800
129 #endif
130 
131 #define DEF_BLOCK_SIZE 512
132 #define DEF_BLOCKS_PER_TRANSFER 128
133 #define DEF_BLOCKS_PER_2048TRANSFER 32
134 #define DEF_SDT_ICT_MS 300
135 #define DEF_SDT_CRT_SEC 3
136 #define DEF_SCSI_CDBSZ 10
137 #define MAX_SCSI_CDBSZ 16
138 #define MAX_BPT_VALUE (1 << 24)         /* used for maximum bs as well */
139 #define MAX_COUNT_SKIP_SEEK (1LL << 48) /* coverity wants upper bound */
140 
141 #define SENSE_BUFF_LEN 64       /* Arbitrary, could be larger */
142 #define READ_CAP_REPLY_LEN 8
143 #define RCAP16_REPLY_LEN 32
144 
145 #define DEF_TIMEOUT 60000       /* 60,000 millisecs == 60 seconds */
146 
147 #define SGP_READ10 0x28
148 #define SGP_PRE_FETCH10 0x34
149 #define SGP_PRE_FETCH16 0x90
150 #define SGP_VERIFY10 0x2f
151 #define SGP_WRITE10 0x2a
152 #define DEF_NUM_THREADS 4
153 #define MAX_NUM_THREADS 1024 /* was SG_MAX_QUEUE with v3 driver */
154 #define DEF_NUM_MRQS 0
155 
156 #define FT_OTHER 1              /* filetype other than one of the following */
157 #define FT_SG 2                 /* filetype is sg char device */
158 #define FT_DEV_NULL 4           /* either /dev/null, /dev/zero or "." */
159 #define FT_ST 8                 /* filetype is st char device (tape) */
160 #define FT_CHAR 16              /* filetype is st char device (tape) */
161 #define FT_BLOCK 32             /* filetype is a block device */
162 #define FT_FIFO 64              /* fifo (named or unnamed pipe (stdout)) */
163 #define FT_RANDOM_0_FF 128      /* iflag=00, iflag=ff and iflag=random
164                                    override if=IFILE */
165 #define FT_ERROR 256            /* couldn't "stat" file */
166 
167 #define DEV_NULL_MINOR_NUM 3
168 #define DEV_ZERO_MINOR_NUM 5
169 
170 #define EBUFF_SZ 768
171 
172 #define PROC_SCSI_SG_VERSION "/proc/scsi/sg/version"
173 #define SYS_SCSI_SG_VERSION "/sys/module/sg/version"
174 
175 struct flags_t {
176     bool append;
177     bool coe;
178     bool defres;        /* without this res_sz==bs*bpt */
179     bool dio;
180     bool direct;
181     bool dpo;
182     bool dsync;
183     bool excl;
184     bool ff;
185     bool fua;
186     bool polled;	/* formerly called 'hipri' */
187     bool masync;        /* more async sg v4 driver flag */
188     bool mrq_immed;     /* mrq submit non-blocking */
189     bool mrq_svb;       /* mrq shared_variable_block, for sg->sg copy */
190     bool no_dur;
191     bool nocreat;
192     bool noshare;
193     bool no_thresh;
194     bool no_unshare;    /* leave it for driver close/release */
195     bool no_waitq;      /* dummy, no longer supported, just warn */
196     bool noxfer;
197     bool qhead;
198     bool qtail;
199     bool random;
200     bool mout_if;       /* META_OUT_IF flag at mrq level */
201     bool same_fds;
202     bool swait;         /* now ignore; kept for backward compatibility */
203     bool v3;
204     bool v4;
205     bool v4_given;
206     bool wq_excl;
207     bool zero;
208     int mmap;
209 };
210 
211 struct global_collection
212 {       /* one instance visible to all threads */
213     int infd;
214     int64_t skip;
215     int in_type;
216     int cdbsz_in;
217     int help;
218     int elem_sz;
219     struct flags_t in_flags;
220     // int64_t in_blk;                /* -\ next block address to read */
221     // int64_t in_count;              /*  | blocks remaining for next read */
222     atomic<int64_t> in_rem_count;     /*  | count of remaining in blocks */
223     atomic<int> in_partial;           /*  | */
224     atomic<bool> in_stop;             /*  | */
225     off_t in_st_size;                 /* Only for FT_OTHER (regular) file */
226     pthread_mutex_t in_mutex;         /* -/ */
227     int nmrqs;                        /* Number of multi-reqs for sg v4 */
228     int outfd;
229     int64_t seek;
230     int out_type;
231     int out2fd;
232     int out2_type;
233     int cdbsz_out;
234     int aen;                          /* abort every nth command */
235     int m_aen;                        /* abort mrq every nth command */
236     struct flags_t out_flags;
237     atomic<int64_t> out_blk;          /* -\ next block address to write */
238     atomic<int64_t> out_count;        /*  | blocks remaining for next write */
239     atomic<int64_t> out_rem_count;    /*  | count of remaining out blocks */
240     atomic<int> out_partial;          /*  | */
241     atomic<bool> out_stop;            /*  | */
242     off_t out_st_size;                /* Only for FT_OTHER (regular) file */
243     pthread_mutex_t out_mutex;        /*  | */
244     pthread_cond_t out_sync_cv;       /*  | hold writes until "in order" */
245     pthread_mutex_t out2_mutex;
246     int bs;
247     int bpt;
248     int cmd_timeout;            /* in milliseconds */
249     int outregfd;
250     int outreg_type;
251     int ofsplit;
252     atomic<int> dio_incomplete_count;
253     atomic<int> sum_of_resids;
254     uint32_t sdt_ict; /* stall detection; initial check time (milliseconds) */
255     uint32_t sdt_crt; /* check repetition time (seconds), after first stall */
256     int fail_mask;
257     int verbose;
258     int dry_run;
259     int chkaddr;
260     bool aen_given;
261     bool cdbsz_given;
262     bool is_mrq_i;
263     bool is_mrq_o;
264     bool m_aen_given;
265     bool ofile_given;
266     bool ofile2_given;
267     bool unit_nanosec;          /* default duration unit is millisecond */
268     bool mrq_cmds;              /* mrq=<NRQS>,C  given */
269     bool mrq_async;             /* mrq_immed flag given */
270     bool noshare;               /* don't use request sharing */
271     bool unbalanced_mrq;        /* so _not_ sg->sg request sharing sync mrq */
272     bool verify;                /* don't copy, verify like Unix: cmp */
273     bool prefetch;              /* for verify: do PF(b),RD(a),V(b)_a_data */
274     bool unshare;               /* let close() do file unshare operation */
275     const char * infp;
276     const char * outfp;
277     const char * out2fp;
278 };
279 
280 typedef struct mrq_abort_info
281 {
282     int from_tid;
283     int fd;
284     int mrq_id;
285     int debug;
286 } Mrq_abort_info;
287 
288 typedef struct request_element
289 {       /* one instance per worker thread */
290     struct global_collection *clp;
291     bool wr;
292     bool has_share;
293     bool both_sg;
294     bool same_sg;
295     bool only_in_sg;
296     bool only_out_sg;
297     // bool mrq_abort_thread_active;
298     int id;
299     int bs;
300     int infd;
301     int outfd;
302     int out2fd;
303     int outregfd;
304     int64_t iblk;
305     int64_t oblk;
306     int num_blks;
307     uint8_t * buffp;
308     uint8_t * alloc_bp;
309     struct sg_io_hdr io_hdr;
310     struct sg_io_v4 io_hdr4[2];
311     uint8_t cmd[MAX_SCSI_CDBSZ];
312     uint8_t sb[SENSE_BUFF_LEN];
313     int dio_incomplete_count;
314     int mmap_active;
315     int resid;
316     int rd_p_id;
317     int rep_count;
318     int rq_id;
319     int mmap_len;
320     int mrq_id;
321     int mrq_index;
322     uint32_t in_mrq_q_blks;
323     uint32_t out_mrq_q_blks;
324     long seed;
325 #ifdef HAVE_SRAND48_R   /* gcc extension. N.B. non-reentrant version slower */
326     struct drand48_data drand;/* opaque, used by srand48_r and mrand48_r */
327 #endif
328     pthread_t mrq_abort_thread_id;
329     Mrq_abort_info mai;
330 } Rq_elem;
331 
332 typedef struct thread_info
333 {
334     int id;
335     struct global_collection * gcp;
336     pthread_t a_pthr;
337 } Thread_info;
338 
339 /* Additional parameters for sg_start_io() and sg_finish_io() */
340 struct sg_io_extra {
341     bool is_wr2;
342     bool prefetch;
343     bool dout_is_split;
344     int hpv4_ind;
345     int blk_offset;
346     int blks;
347 };
348 
349 #define MONO_MRQ_ID_INIT 0x10000
350 
351 // typedef vector< pair<int, struct sg_io_v4> > mrq_arr_t;
352 typedef array<uint8_t, 32> big_cdb;     /* allow up to a 32 byte cdb */
353 typedef pair< vector<struct sg_io_v4>, vector<big_cdb> >   mrq_arr_t;
354 
355 
356 /* Use this class to wrap C++11 <random> features to produce uniform random
357  * unsigned ints in the range [lo, hi] (inclusive) given a_seed */
358 class Rand_uint {
359 public:
Rand_uint(unsigned int lo,unsigned int hi,unsigned int a_seed)360     Rand_uint(unsigned int lo, unsigned int hi, unsigned int a_seed)
361         : uid(lo, hi), dre(a_seed) { }
362     /* uid ctor takes inclusive range when integral type */
363 
get()364     unsigned int get() { return uid(dre); }
365 
366 private:
367     uniform_int_distribution<unsigned int> uid;
368     default_random_engine dre;
369 };
370 
371 static atomic<int> mono_pack_id(1);
372 static atomic<int> mono_mrq_id(MONO_MRQ_ID_INIT);
373 static atomic<long int> pos_index(0);
374 
375 static atomic<int> num_ebusy(0);
376 static atomic<int> num_start_eagain(0);
377 static atomic<int> num_fin_eagain(0);
378 static atomic<int> num_abort_req(0);
379 static atomic<int> num_abort_req_success(0);
380 static atomic<int> num_mrq_abort_req(0);
381 static atomic<int> num_mrq_abort_req_success(0);
382 static atomic<int> num_miscompare(0);
383 static atomic<long> num_waiting_calls(0);
384 static atomic<bool> vb_first_time(true);
385 static atomic<bool> shutting_down(false);
386 
387 static sigset_t signal_set;
388 static sigset_t orig_signal_set;
389 static pthread_t sig_listen_thread_id;
390 
391 static const char * sg_allow_dio = "/sys/module/sg/parameters/allow_dio";
392 
393 static void sg_in_rd_cmd(struct global_collection * clp, Rq_elem * rep,
394                          mrq_arr_t & def_arr);
395 static void sg_out_wr_cmd(Rq_elem * rep, mrq_arr_t & def_arr, bool is_wr2,
396                           bool prefetch);
397 static bool normal_in_rd(Rq_elem * rep, int blocks);
398 static void normal_out_wr(Rq_elem * rep, int blocks);
399 static int sg_start_io(Rq_elem * rep, mrq_arr_t & def_arr, int & pack_id,
400                        struct sg_io_extra *xtrp);
401 static int sg_finish_io(bool wr, Rq_elem * rep, int pack_id,
402                         struct sg_io_extra *xtrp);
403 static int sg_in_open(struct global_collection *clp, const char *inf,
404                       uint8_t **mmpp, int *mmap_len);
405 static int sg_out_open(struct global_collection *clp, const char *outf,
406                        uint8_t **mmpp, int *mmap_len);
407 static int sgh_do_deferred_mrq(Rq_elem * rep, mrq_arr_t & def_arr);
408 
409 #define STRERR_BUFF_LEN 128
410 
411 static pthread_mutex_t strerr_mut = PTHREAD_MUTEX_INITIALIZER;
412 
413 static bool have_sg_version = false;
414 static int sg_version = 0;
415 static bool sg_version_lt_4 = false;
416 static bool sg_version_ge_40045 = false;
417 static bool do_sync = false;
418 static int do_time = 1;
419 static struct global_collection gcoll;
420 static struct timeval start_tm;
421 static int64_t dd_count = -1;
422 static int num_threads = DEF_NUM_THREADS;
423 static int exit_status = 0;
424 static bool after1 = false;
425 
426 static const char * my_name = "sgh_dd: ";
427 
428 static const char * mrq_blk_s = "mrq: ordinary blocking";
429 static const char * mrq_vb_s = "mrq: variable blocking";
430 static const char * mrq_svb_s = "mrq: shared variable blocking (svb)";
431 static const char * mrq_s_nb_s = "mrq: submit of full non-blocking";
432 
433 
434 #ifdef __GNUC__
435 static int pr2serr_lk(const char * fmt, ...)
436         __attribute__ ((format (printf, 1, 2)));
437 #if 0
438 static void pr_errno_lk(int e_no, const char * fmt, ...)
439         __attribute__ ((format (printf, 2, 3)));
440 #endif
441 #else
442 static int pr2serr_lk(const char * fmt, ...);
443 #if 0
444 static void pr_errno_lk(int e_no, const char * fmt, ...);
445 #endif
446 #endif
447 
448 
449 static int
pr2serr_lk(const char * fmt,...)450 pr2serr_lk(const char * fmt, ...)
451 {
452     int n;
453     va_list args;
454 
455     pthread_mutex_lock(&strerr_mut);
456     va_start(args, fmt);
457     n = vfprintf(stderr, fmt, args);
458     va_end(args);
459     pthread_mutex_unlock(&strerr_mut);
460     return n;
461 }
462 
463 static void
usage(int pg_num)464 usage(int pg_num)
465 {
466     if (pg_num > 3)
467         goto page4;
468     else if (pg_num > 2)
469         goto page3;
470     else if (pg_num > 1)
471         goto page2;
472 
473     pr2serr("Usage: sgh_dd  [bs=BS] [conv=CONVS] [count=COUNT] [ibs=BS] "
474             "[if=IFILE]\n"
475             "               [iflag=FLAGS] [obs=BS] [of=OFILE] [oflag=FLAGS] "
476             "[seek=SEEK]\n"
477             "               [skip=SKIP] [--help] [--version]\n\n");
478     pr2serr("               [ae=AEN[,MAEN]] [bpt=BPT] [cdbsz=6|10|12|16] "
479             "[coe=0|1]\n"
480             "               [dio=0|1] [elemsz_kb=EKB] [fail_mask=FM] "
481             "[fua=0|1|2|3]\n"
482             "               [mrq=[I|O,]NRQS[,C]] [noshare=0|1] "
483             "[of2=OFILE2]\n"
484             "               [ofreg=OFREG] [ofsplit=OSP] [sdt=SDT] "
485             "[sync=0|1]\n"
486             "               [thr=THR] [time=0|1|2[,TO]] [unshare=1|0] "
487             "[verbose=VERB]\n"
488             "               [--dry-run] [--prefetch] [-v|-vv|-vvv] "
489             "[--verbose]\n"
490             "               [--verify] [--version]\n\n"
491             "  where the main options (shown in first group above) are:\n"
492             "    bs          must be device logical block size (default "
493             "512)\n"
494             "    conv        comma separated list from: [nocreat,noerror,"
495             "notrunc,\n"
496             "                null,sync]\n"
497             "    count       number of blocks to copy (def: device size)\n"
498             "    if          file or device to read from (def: stdin)\n"
499             "    iflag       comma separated list from: [00,coe,defres,dio,"
500             "direct,dpo,\n"
501             "                dsync,excl,ff,fua,masync,mmap,mout_if,"
502             "mrq_immed,mrq_svb,\n"
503             "                nocreat,nodur,noxfer,null,polled,qhead,"
504             "qtail,\n"
505             "                random,same_fds,v3,v4,wq_excl]\n"
506             "    of          file or device to write to (def: /dev/null "
507             "N.B. different\n"
508             "                from dd it defaults to stdout). If 'of=.' "
509             "uses /dev/null\n"
510             "    of2         second file or device to write to (def: "
511             "/dev/null)\n"
512             "    oflag       comma separated list from: [append,<<list from "
513             "iflag>>]\n"
514             "    seek        block position to start writing to OFILE\n"
515             "    skip        block position to start reading from IFILE\n"
516             "    --help|-h      output this usage message then exit\n"
517             "    --verify|-x    do a verify (compare) operation [def: do a "
518             "copy]\n"
519             "    --version|-V   output version string then exit\n\n"
520             "Copy IFILE to OFILE, similar to dd command. This utility is "
521             "specialized for\nSCSI devices and uses multiple POSIX threads. "
522             "It expects one or both IFILE\nand OFILE to be sg devices. With "
523             "--verify option does a verify/compare\noperation instead of a "
524             "copy. This utility is Linux specific and uses the\nv4 sg "
525             "driver 'share' capability if available. Use '-hh', '-hhh' or "
526             "'-hhhh'\nfor more information.\n"
527            );
528     return;
529 page2:
530     pr2serr("Syntax:  sgh_dd [operands] [options]\n\n"
531             "  where: operands have the form name=value and are pecular to "
532             "'dd'\n"
533             "         style commands, and options start with one or "
534             "two hyphens;\n"
535             "         the lesser used operands and option are:\n\n"
536             "    ae          AEN: abort every n commands (def: 0 --> don't "
537             "abort any)\n"
538             "                MAEN: abort every n mrq commands (def: 0 --> "
539             "don't)\n"
540             "                [requires commands with > 1 ms duration]\n"
541             "    bpt         is blocks_per_transfer (default is 128)\n"
542             "    cdbsz       size of SCSI READ, WRITE or VERIFY cdb_s "
543             "(default is 10)\n"
544             "    coe         continue on error, 0->exit (def), "
545             "1->zero + continue\n"
546             "    dio         is direct IO, 1->attempt, 0->indirect IO (def)\n"
547             "    elemsz_kb    scatter gather list element size in kilobytes "
548             "(def: 32[KB])\n"
549             "    fail_mask    1: misuse KEEP_SHARE flag; 0: nothing (def)\n"
550             "    fua         force unit access: 0->don't(def), 1->OFILE, "
551             "2->IFILE,\n"
552             "                3->OFILE+IFILE\n"
553             "    mrq         number of cmds placed in each sg call "
554             "(def: 0);\n"
555             "                may have trailing ',C', to send bulk cdb_s; "
556             "if preceded\n"
557             "                by 'I' then mrq only on IFILE, likewise 'O' "
558             "for OFILE\n"
559             "    noshare     0->use request sharing(def), 1->don't\n"
560             "    ofreg       OFREG is regular file or pipe to send what is "
561             "read from\n"
562             "                IFILE in the first half of each shared element\n"
563             "    ofsplit     split ofile write in two at block OSP (def: 0 "
564             "(no split))\n"
565             "    sdt         stall detection times: CRT[,ICT]. CRT: check "
566             "repetition\n"
567             "                time (after first) in seconds; ICT: initial "
568             "check time\n"
569             "                in milliseconds. Default: 3,300 . Use CRT=0 "
570             "to disable\n"
571             "    sync        0->no sync(def), 1->SYNCHRONIZE CACHE on OFILE "
572             "after copy\n"
573             "    thr         is number of threads, must be > 0, default 4, "
574             "max 1024\n"
575             "    time        0->no timing, 1->calc throughput(def), "
576             "2->nanosec\n"
577             "                precision; TO is command timeout in seconds "
578             "(def: 60)\n"
579             "    unshare     0->don't explicitly unshare after share; 1->let "
580             "close do\n"
581             "                file unshare (default)\n"
582             "    verbose     increase verbosity\n"
583             "    --chkaddr|-c    exits if read block does not contain "
584             "32 bit block\n"
585             "                    address, used once only checks first "
586             "address in block\n"
587             "    --dry-run|-d    prepare but bypass copy/read\n"
588             "    --prefetch|-p    with verify: do pre-fetch first\n"
589             "    --verbose|-v   increase verbosity of utility\n\n"
590             "Use '-hhh' or '-hhhh' for more information about flags.\n"
591            );
592     return;
593 page3:
594     pr2serr("Syntax:  sgh_dd [operands] [options]\n\n"
595             "  where: 'iflag=<arg>' and 'oflag=<arg>' arguments are listed "
596             "below:\n\n"
597             "    00          use all zeros instead of if=IFILE (only in "
598             "iflags)\n"
599             "    00,ff       generates blocks that contain own (32 bit be) "
600             "blk address\n"
601             "    append      append output to OFILE (assumes OFILE is "
602             "regular file)\n"
603             "    coe         continue of error (reading, fills with zeros)\n"
604             "    defres      keep default reserve buffer size (else its "
605             "bs*bpt)\n"
606             "    dio         sets the SG_FLAG_DIRECT_IO in sg requests\n"
607             "    direct      sets the O_DIRECT flag on open()\n"
608             "    dpo         sets the DPO (disable page out) in SCSI READs "
609             "and WRITEs\n"
610             "    dsync       sets the O_SYNC flag on open()\n"
611             "    excl        sets the O_EXCL flag on open()\n"
612             "    ff          use all 0xff bytes instead of if=IFILE (only in "
613             "iflags)\n"
614             "    fua         sets the FUA (force unit access) in SCSI READs "
615             "and WRITEs\n"
616             "    hipri       same as 'polled'; 'hipri' name is deprecated\n"
617             "    masync      set 'more async' flag on this sg device\n"
618             "    mmap        setup mmap IO on IFILE or OFILE; OFILE only "
619             "with noshare\n"
620             "    mmap,mmap    when used twice, doesn't call munmap()\n"
621             "    mout_if     set META_OUT_IF flag on each request\n"
622             "    mrq_immed    if mrq active, do submit non-blocking (def: "
623             "ordered\n"
624             "                 blocking)\n"
625             "    mrq_svb     if mrq and sg->sg copy, do shared_variable_"
626             "blocking\n"
627             "    nocreat     will fail rather than create OFILE\n"
628             "    nodur       turns off command duration calculations\n"
629             "    noxfer      no transfer to/from the user space\n"
630             "    no_thresh   skip checking per fd max data xfer\n"
631             "    null        does nothing, placeholder\n"
632             "    polled      set POLLED flag on command, uses blk_poll() to "
633             "complete\n"
634             "    qhead       queue new request at head of block queue\n"
635             "    qtail       queue new request at tail of block queue (def: "
636             "q at head)\n"
637             "    random      use random data instead of if=IFILE (only in "
638             "iflags)\n"
639             "    same_fds    each thread uses the same IFILE and OFILE(2) "
640             "file\n"
641             "                descriptors (def: each threads has own file "
642             "descriptors)\n"
643             "    swait       this option is now ignored\n"
644             "    v3          use v3 sg interface (def: v3 unless sg driver "
645             "is v4)\n"
646             "    v4          use v4 sg interface (def: v3 unless sg driver "
647             "is v4)\n"
648             "    wq_excl     set SG_CTL_FLAGM_EXCL_WAITQ on this sg fd\n"
649             "\n"
650             "Copies IFILE to OFILE (and to OFILE2 if given). If IFILE and "
651             "OFILE are sg\ndevices 'shared' mode is selected unless "
652             "'noshare' is given to 'iflag=' or\n'oflag='. of2=OFILE2 uses "
653             "'oflag=FLAGS'. When sharing, the data stays in a\nsingle "
654             "in-kernel buffer which is copied (or mmap-ed) to the user "
655             "space\nif the 'ofreg=OFREG' is given. Use '-hhhh' for more "
656             "information.\n"
657            );
658     return;
659 page4:
660     pr2serr("pack_id:\n"
661             "These are ascending integers, starting at 1, associated with "
662             "each issued\nSCSI command. When both IFILE and OFILE are sg "
663             "devices, then the READ in\neach read-write pair is issued an "
664             "even pack_id and its WRITE pair is\ngiven the pack_id one "
665             "higher (i.e. an odd number). This enables a\n'dmesg -w' "
666             "user to see that progress is being "
667             "made.\n\n");
668     pr2serr("Debugging:\n"
669             "Apart from using one or more '--verbose' options which gets a "
670             "bit noisy\n'dmesg -w' can give a good overview "
671             "of what is happening.\nThat does a sg driver object tree "
672             "traversal that does minimal locking\nto make sure that each "
673             "traversal is 'safe'. So it is important to note\nthe whole "
674             "tree is not locked. This means for fast devices the overall\n"
675             "tree state may change while the traversal is occurring. For "
676             "example,\nit has been observed that both the read- and write- "
677             "sides of a request\nshare show they are in 'active' state "
678             "which should not be possible.\nIt occurs because the read-"
679             "side probably jumped out of active state and\nthe write-side "
680             "request entered it while some other nodes were being "
681             "printed.\n\n");
682     pr2serr("Busy state:\n"
683             "Busy state (abbreviated to 'bsy' in the dmesg "
684             "output)\nis entered during request setup and completion. It "
685             "is intended to be\na temporary state. It should not block "
686             "but does sometimes (e.g. in\nblock_get_request()). Even so "
687             "that blockage should be short and if not\nthere is a "
688             "problem.\n\n");
689     pr2serr("--verify :\n"
690             "For comparing IFILE with OFILE. Does repeated sequences of: "
691             "READ(ifile)\nand uses data returned to send to VERIFY(ofile, "
692             "BYTCHK=1). So the OFILE\ndevice/disk is doing the actual "
693             "comparison. Stops on first miscompare.\n\n");
694     pr2serr("--prefetch :\n"
695             "Used with --verify option. Prepends a PRE-FETCH(ofile, IMMED) "
696             "to verify\nsequence. This should speed the trailing VERIFY by "
697             "making sure that\nthe data it needs for the comparison is "
698             "already in its cache.\n");
699     return;
700 }
701 
702 static void
lk_print_command_len(const char * prefix,uint8_t * cmdp,int len,bool lock)703 lk_print_command_len(const char *prefix, uint8_t * cmdp, int len, bool lock)
704 {
705     if (lock)
706         pthread_mutex_lock(&strerr_mut);
707     if (prefix && *prefix)
708         fputs(prefix, stderr);
709     sg_print_command_len(cmdp, len);
710     if (lock)
711         pthread_mutex_unlock(&strerr_mut);
712 }
713 
714 static void
lk_chk_n_print3(const char * leadin,struct sg_io_hdr * hp,bool raw_sinfo)715 lk_chk_n_print3(const char * leadin, struct sg_io_hdr * hp, bool raw_sinfo)
716 {
717     pthread_mutex_lock(&strerr_mut);
718     sg_chk_n_print3(leadin, hp, raw_sinfo);
719     pthread_mutex_unlock(&strerr_mut);
720 }
721 
722 static void
lk_chk_n_print4(const char * leadin,const struct sg_io_v4 * h4p,bool raw_sinfo)723 lk_chk_n_print4(const char * leadin, const struct sg_io_v4 * h4p,
724                 bool raw_sinfo)
725 {
726     pthread_mutex_lock(&strerr_mut);
727     sg_linux_sense_print(leadin, h4p->device_status, h4p->transport_status,
728                          h4p->driver_status, (const uint8_t *)h4p->response,
729                          h4p->response_len, raw_sinfo);
730     pthread_mutex_unlock(&strerr_mut);
731 }
732 
733 static void
hex2stderr_lk(const uint8_t * b_str,int len,int no_ascii)734 hex2stderr_lk(const uint8_t * b_str, int len, int no_ascii)
735 {
736     pthread_mutex_lock(&strerr_mut);
737     hex2stderr(b_str, len, no_ascii);
738     pthread_mutex_unlock(&strerr_mut);
739 }
740 
741 /* Flags decoded into abbreviations for those that are set, separated by
742  * '|' . */
743 static char *
sg_flags_str(int flags,int b_len,char * b)744 sg_flags_str(int flags, int b_len, char * b)
745 {
746     int n = 0;
747 
748     if ((b_len < 1) || (! b))
749         return b;
750     b[0] = '\0';
751     if (SG_FLAG_DIRECT_IO & flags) {            /* 0x1 */
752         n += sg_scnpr(b + n, b_len - n, "DIO|");
753         if (n >= b_len)
754             goto fini;
755     }
756     if (SG_FLAG_MMAP_IO & flags) {              /* 0x4 */
757         n += sg_scnpr(b + n, b_len - n, "MMAP|");
758         if (n >= b_len)
759             goto fini;
760     }
761     if (SGV4_FLAG_YIELD_TAG & flags) {          /* 0x8 */
762         n += sg_scnpr(b + n, b_len - n, "YTAG|");
763         if (n >= b_len)
764             goto fini;
765     }
766     if (SG_FLAG_Q_AT_TAIL & flags) {            /* 0x10 */
767         n += sg_scnpr(b + n, b_len - n, "QTAI|");
768         if (n >= b_len)
769             goto fini;
770     }
771     if (SG_FLAG_Q_AT_HEAD & flags) {            /* 0x20 */
772         n += sg_scnpr(b + n, b_len - n, "QHEA|");
773         if (n >= b_len)
774             goto fini;
775     }
776     if (SGV4_FLAG_DOUT_OFFSET & flags) {        /* 0x40 */
777         n += sg_scnpr(b + n, b_len - n, "DOFF|");
778         if (n >= b_len)
779             goto fini;
780     }
781     if (SGV4_FLAG_EVENTFD & flags) {           /* 0x80 */
782         n += sg_scnpr(b + n, b_len - n, "EVFD|");
783         if (n >= b_len)
784             goto fini;
785     }
786     if (SGV4_FLAG_COMPLETE_B4 & flags) {        /* 0x100 */
787         n += sg_scnpr(b + n, b_len - n, "CPL_B4|");
788         if (n >= b_len)
789             goto fini;
790     }
791     if (SGV4_FLAG_SIGNAL & flags) {       /* 0x200 */
792         n += sg_scnpr(b + n, b_len - n, "SIGNAL|");
793         if (n >= b_len)
794             goto fini;
795     }
796     if (SGV4_FLAG_IMMED & flags) {              /* 0x400 */
797         n += sg_scnpr(b + n, b_len - n, "IMM|");
798         if (n >= b_len)
799             goto fini;
800     }
801     if (SGV4_FLAG_POLLED & flags) {             /* 0x800 */
802         n += sg_scnpr(b + n, b_len - n, "POLLED|");
803         if (n >= b_len)
804             goto fini;
805     }
806     if (SGV4_FLAG_STOP_IF & flags) {            /* 0x1000 */
807         n += sg_scnpr(b + n, b_len - n, "STOPIF|");
808         if (n >= b_len)
809             goto fini;
810     }
811     if (SGV4_FLAG_DEV_SCOPE & flags) {          /* 0x2000 */
812         n += sg_scnpr(b + n, b_len - n, "DEV_SC|");
813         if (n >= b_len)
814             goto fini;
815     }
816     if (SGV4_FLAG_SHARE & flags) {              /* 0x4000 */
817         n += sg_scnpr(b + n, b_len - n, "SHARE|");
818         if (n >= b_len)
819             goto fini;
820     }
821     if (SGV4_FLAG_DO_ON_OTHER & flags) {        /* 0x8000 */
822         n += sg_scnpr(b + n, b_len - n, "DO_OTH|");
823         if (n >= b_len)
824             goto fini;
825     }
826     if (SGV4_FLAG_NO_DXFER & flags) {          /* 0x10000 */
827         n += sg_scnpr(b + n, b_len - n, "NOXFER|");
828         if (n >= b_len)
829             goto fini;
830     }
831     if (SGV4_FLAG_KEEP_SHARE & flags) {        /* 0x20000 */
832         n += sg_scnpr(b + n, b_len - n, "KEEP_SH|");
833         if (n >= b_len)
834             goto fini;
835     }
836     if (SGV4_FLAG_MULTIPLE_REQS & flags) {     /* 0x40000 */
837         n += sg_scnpr(b + n, b_len - n, "MRQS|");
838         if (n >= b_len)
839             goto fini;
840     }
841     if (SGV4_FLAG_ORDERED_WR & flags) {        /* 0x80000 */
842         n += sg_scnpr(b + n, b_len - n, "OWR|");
843         if (n >= b_len)
844             goto fini;
845     }
846     if (SGV4_FLAG_REC_ORDER & flags) {         /* 0x100000 */
847         n += sg_scnpr(b + n, b_len - n, "REC_O|");
848         if (n >= b_len)
849             goto fini;
850     }
851     if (SGV4_FLAG_META_OUT_IF & flags) {       /* 0x200000 */
852         n += sg_scnpr(b + n, b_len - n, "MOUT_IF|");
853         if (n >= b_len)
854             goto fini;
855     }
856     if (0 == n)
857         n += sg_scnpr(b + n, b_len - n, "<none>");
858 fini:
859     if (n < b_len) {    /* trim trailing '\' */
860         if ('|' == b[n - 1])
861             b[n - 1] = '\0';
862     } else if ('|' == b[b_len - 1])
863         b[b_len - 1] = '\0';
864     return b;
865 }
866 
867 /* Info field decoded into abbreviations for those bits that are set,
868  * separated by '|' . */
869 static char *
sg_info_str(int info,int b_len,char * b)870 sg_info_str(int info, int b_len, char * b)
871 {
872     int n = 0;
873 
874     if ((b_len < 1) || (! b))
875         return b;
876     b[0] = '\0';
877     if (SG_INFO_CHECK & info) {               /* 0x1 */
878         n += sg_scnpr(b + n, b_len - n, "CHK|");
879         if (n >= b_len)
880             goto fini;
881     }
882     if (SG_INFO_DIRECT_IO & info) {           /* 0x2 */
883         n += sg_scnpr(b + n, b_len - n, "DIO|");
884         if (n >= b_len)
885             goto fini;
886     }
887     if (SG_INFO_MIXED_IO & info) {            /* 0x4 */
888         n += sg_scnpr(b + n, b_len - n, "MIO|");
889         if (n >= b_len)
890             goto fini;
891     }
892     if (SG_INFO_DEVICE_DETACHING & info) {    /* 0x8 */
893         n += sg_scnpr(b + n, b_len - n, "DETA|");
894         if (n >= b_len)
895             goto fini;
896     }
897     if (SG_INFO_ABORTED & info) {             /* 0x10 */
898         n += sg_scnpr(b + n, b_len - n, "ABRT|");
899         if (n >= b_len)
900             goto fini;
901     }
902     if (SG_INFO_MRQ_FINI & info) {            /* 0x20 */
903         n += sg_scnpr(b + n, b_len - n, "MRQF|");
904         if (n >= b_len)
905             goto fini;
906     }
907 fini:
908     if (n < b_len) {    /* trim trailing '\' */
909         if ('|' == b[n - 1])
910             b[n - 1] = '\0';
911     } else if ('|' == b[b_len - 1])
912         b[b_len - 1] = '\0';
913     return b;
914 }
915 
916 static void
v4hdr_out_lk(const char * leadin,const sg_io_v4 * h4p,int id)917 v4hdr_out_lk(const char * leadin, const sg_io_v4 * h4p, int id)
918 {
919     char b[80];
920 
921     pthread_mutex_lock(&strerr_mut);
922     if (leadin)
923         pr2serr("%s [id=%d]:\n", leadin, id);
924     if (('Q' != h4p->guard) || (0 != h4p->protocol) ||
925         (0 != h4p->subprotocol))
926         pr2serr("  <<<sg_io_v4 _NOT_ properly set>>>\n");
927     pr2serr("  pointers: cdb=%s  sense=%s  din=%p  dout=%p\n",
928             (h4p->request ? "y" : "NULL"), (h4p->response ? "y" : "NULL"),
929             (void *)h4p->din_xferp, (void *)h4p->dout_xferp);
930     pr2serr("  lengths: cdb=%u  sense=%u  din=%u  dout=%u\n",
931             h4p->request_len, h4p->max_response_len, h4p->din_xfer_len,
932              h4p->dout_xfer_len);
933     pr2serr("  flags=0x%x  request_extra{pack_id}=%d\n",
934             h4p->flags, h4p->request_extra);
935     pr2serr("  flags set: %s\n", sg_flags_str(h4p->flags, sizeof(b), b));
936     pr2serr(" %s OUT fields:\n", leadin);
937     pr2serr("  response_len=%d driver/transport/device_status="
938             "0x%x/0x%x/0x%x\n", h4p->response_len, h4p->driver_status,
939             h4p->transport_status, h4p->device_status);
940     pr2serr("  info=0x%x  din_resid=%u  dout_resid=%u  spare_out=%u  "
941             "dur=%u\n",
942             h4p->info, h4p->din_resid, h4p->dout_resid, h4p->spare_out,
943             h4p->duration);
944     pthread_mutex_unlock(&strerr_mut);
945 }
946 
947 static void
fetch_sg_version(void)948 fetch_sg_version(void)
949 {
950     FILE * fp;
951     char b[96];
952 
953     have_sg_version = false;
954     sg_version = 0;
955     fp = fopen(PROC_SCSI_SG_VERSION, "r");
956     if (fp && fgets(b, sizeof(b) - 1, fp)) {
957         if (1 == sscanf(b, "%d", &sg_version))
958             have_sg_version = !!sg_version;
959     } else {
960         int j, k, l;
961 
962         if (fp)
963             fclose(fp);
964         fp = fopen(SYS_SCSI_SG_VERSION, "r");
965         if (fp && fgets(b, sizeof(b) - 1, fp)) {
966             if (3 == sscanf(b, "%d.%d.%d", &j, &k, &l)) {
967                 sg_version = (j * 10000) + (k * 100) + l;
968                 have_sg_version = !!sg_version;
969             }
970         }
971     }
972     if (fp)
973         fclose(fp);
974 }
975 
976 static void
calc_duration_throughput(int contin)977 calc_duration_throughput(int contin)
978 {
979     struct timeval end_tm, res_tm;
980     double a, b;
981 
982     gettimeofday(&end_tm, NULL);
983     res_tm.tv_sec = end_tm.tv_sec - start_tm.tv_sec;
984     res_tm.tv_usec = end_tm.tv_usec - start_tm.tv_usec;
985     if (res_tm.tv_usec < 0) {
986         --res_tm.tv_sec;
987         res_tm.tv_usec += 1000000;
988     }
989     a = res_tm.tv_sec;
990     a += (0.000001 * res_tm.tv_usec);
991     b = (double)gcoll.bs * (dd_count - gcoll.out_rem_count.load());
992     pr2serr("time to %s data %s %d.%06d secs",
993             (gcoll.verify ? "verify" : "copy"), (contin ? "so far" : "was"),
994             (int)res_tm.tv_sec, (int)res_tm.tv_usec);
995     if ((a > 0.00001) && (b > 511))
996         pr2serr(", %.2f MB/sec\n", b / (a * 1000000.0));
997     else
998         pr2serr("\n");
999 }
1000 
1001 static void
print_stats(const char * str)1002 print_stats(const char * str)
1003 {
1004     int64_t infull;
1005 
1006     if (0 != gcoll.out_rem_count.load())
1007         pr2serr("  remaining block count=%" PRId64 "\n",
1008                 gcoll.out_rem_count.load());
1009     infull = dd_count - gcoll.in_rem_count.load();
1010     pr2serr("%s%" PRId64 "+%d records in\n", str,
1011             infull - gcoll.in_partial.load(), gcoll.in_partial.load());
1012 
1013     if (gcoll.out_type == FT_DEV_NULL)
1014         pr2serr("%s0+0 records out\n", str);
1015     else {
1016         int64_t outfull = dd_count - gcoll.out_rem_count.load();
1017 
1018         pr2serr("%s%" PRId64 "+%d records %s\n", str,
1019                 outfull - gcoll.out_partial.load(), gcoll.out_partial.load(),
1020                 (gcoll.verify ? "verified" : "out"));
1021     }
1022 }
1023 
1024 static void
interrupt_handler(int sig)1025 interrupt_handler(int sig)
1026 {
1027     struct sigaction sigact;
1028 
1029     sigact.sa_handler = SIG_DFL;
1030     sigemptyset(&sigact.sa_mask);
1031     sigact.sa_flags = 0;
1032     sigaction(sig, &sigact, NULL);
1033     pr2serr("Interrupted by signal,");
1034     if (do_time > 0)
1035         calc_duration_throughput(0);
1036     print_stats("");
1037     kill(getpid (), sig);
1038 }
1039 
1040 static void
siginfo_handler(int sig)1041 siginfo_handler(int sig)
1042 {
1043     if (sig) { ; }      /* unused, dummy to suppress warning */
1044     pr2serr("Progress report, continuing ...\n");
1045     if (do_time > 0)
1046         calc_duration_throughput(1);
1047     print_stats("  ");
1048 }
1049 
1050 static void
siginfo2_handler(int sig)1051 siginfo2_handler(int sig)
1052 {
1053     struct global_collection * clp = &gcoll;
1054 
1055     if (sig) { ; }      /* unused, dummy to suppress warning */
1056     pr2serr("Progress report, continuing ...\n");
1057     if (do_time > 0)
1058         calc_duration_throughput(1);
1059     print_stats("  ");
1060     pr2serr("Send broadcast on out_sync_cv condition variable\n");
1061     pthread_cond_broadcast(&clp->out_sync_cv);
1062 }
1063 
1064 static void
install_handler(int sig_num,void (* sig_handler)(int sig))1065 install_handler(int sig_num, void (*sig_handler) (int sig))
1066 {
1067     struct sigaction sigact;
1068     sigaction (sig_num, NULL, &sigact);
1069     if (sigact.sa_handler != SIG_IGN)
1070     {
1071         sigact.sa_handler = sig_handler;
1072         sigemptyset (&sigact.sa_mask);
1073         sigact.sa_flags = 0;
1074         sigaction (sig_num, &sigact, NULL);
1075     }
1076 }
1077 
1078 #ifdef SG_LIB_ANDROID
1079 static void
thread_exit_handler(int sig)1080 thread_exit_handler(int sig)
1081 {
1082     pthread_exit(0);
1083 }
1084 #endif
1085 
1086 /* Make safe_strerror() thread safe */
1087 static char *
tsafe_strerror(int code,char * ebp)1088 tsafe_strerror(int code, char * ebp)
1089 {
1090     char * cp;
1091 
1092     pthread_mutex_lock(&strerr_mut);
1093     cp = safe_strerror(code);
1094     strncpy(ebp, cp, STRERR_BUFF_LEN);
1095     pthread_mutex_unlock(&strerr_mut);
1096 
1097     ebp[strlen(ebp)] = '\0';
1098     return ebp;
1099 }
1100 
1101 
1102 /* Following macro from D.R. Butenhof's POSIX threads book:
1103  * ISBN 0-201-63392-2 . [Highly recommended book.] Changed __FILE__
1104  * to __func__ */
1105 #define err_exit(code,text) do { \
1106     char strerr_buff[STRERR_BUFF_LEN + 1]; \
1107     pr2serr("%s at \"%s\":%d: %s\n", \
1108         text, __func__, __LINE__, tsafe_strerror(code, strerr_buff)); \
1109     exit(1); \
1110     } while (0)
1111 
1112 
1113 static int
dd_filetype(const char * filename,off_t & st_size)1114 dd_filetype(const char * filename, off_t & st_size)
1115 {
1116     struct stat st;
1117     size_t len = strlen(filename);
1118 
1119     if ((1 == len) && ('.' == filename[0]))
1120         return FT_DEV_NULL;
1121     if (stat(filename, &st) < 0)
1122         return FT_ERROR;
1123     if (S_ISCHR(st.st_mode)) {
1124         if ((MEM_MAJOR == major(st.st_rdev)) &&
1125             ((DEV_NULL_MINOR_NUM == minor(st.st_rdev)) ||
1126              (DEV_ZERO_MINOR_NUM == minor(st.st_rdev))))
1127             return FT_DEV_NULL; /* treat /dev/null + /dev/zero the same */
1128         if (SCSI_GENERIC_MAJOR == major(st.st_rdev))
1129             return FT_SG;
1130         if (SCSI_TAPE_MAJOR == major(st.st_rdev))
1131             return FT_ST;
1132         return FT_CHAR;
1133     } else if (S_ISBLK(st.st_mode))
1134         return FT_BLOCK;
1135     else if (S_ISFIFO(st.st_mode))
1136         return FT_FIFO;
1137     st_size = st.st_size;
1138     return FT_OTHER;
1139 }
1140 
1141 static inline void
stop_both(struct global_collection * clp)1142 stop_both(struct global_collection * clp)
1143 {
1144     clp->in_stop = true;
1145     clp->out_stop = true;
1146 }
1147 
1148 /* Return of 0 -> success, see sg_ll_read_capacity*() otherwise */
1149 static int
scsi_read_capacity(int sg_fd,int64_t * num_sect,int * sect_sz)1150 scsi_read_capacity(int sg_fd, int64_t * num_sect, int * sect_sz)
1151 {
1152     int res;
1153     uint8_t rcBuff[RCAP16_REPLY_LEN] = {};
1154 
1155     res = sg_ll_readcap_10(sg_fd, 0, 0, rcBuff, READ_CAP_REPLY_LEN, false, 0);
1156     if (0 != res)
1157         return res;
1158 
1159     if ((0xff == rcBuff[0]) && (0xff == rcBuff[1]) && (0xff == rcBuff[2]) &&
1160         (0xff == rcBuff[3])) {
1161 
1162         res = sg_ll_readcap_16(sg_fd, 0, 0, rcBuff, RCAP16_REPLY_LEN, false,
1163                                0);
1164         if (0 != res)
1165             return res;
1166         *num_sect = sg_get_unaligned_be64(rcBuff + 0) + 1;
1167         *sect_sz = sg_get_unaligned_be32(rcBuff + 8);
1168     } else {
1169         /* take care not to sign extend values > 0x7fffffff */
1170         *num_sect = (int64_t)sg_get_unaligned_be32(rcBuff + 0) + 1;
1171         *sect_sz = sg_get_unaligned_be32(rcBuff + 4);
1172     }
1173     return 0;
1174 }
1175 
1176 /* Return of 0 -> success, -1 -> failure. BLKGETSIZE64, BLKGETSIZE and */
1177 /* BLKSSZGET macros problematic (from <linux/fs.h> or <sys/mount.h>). */
1178 static int
read_blkdev_capacity(int sg_fd,int64_t * num_sect,int * sect_sz)1179 read_blkdev_capacity(int sg_fd, int64_t * num_sect, int * sect_sz)
1180 {
1181 #ifdef BLKSSZGET
1182     if ((ioctl(sg_fd, BLKSSZGET, sect_sz) < 0) && (*sect_sz > 0)) {
1183         perror("BLKSSZGET ioctl error");
1184         return -1;
1185     } else {
1186  #ifdef BLKGETSIZE64
1187         uint64_t ull;
1188 
1189         if (ioctl(sg_fd, BLKGETSIZE64, &ull) < 0) {
1190 
1191             perror("BLKGETSIZE64 ioctl error");
1192             return -1;
1193         }
1194         *num_sect = ((int64_t)ull / (int64_t)*sect_sz);
1195  #else
1196         unsigned long ul;
1197 
1198         if (ioctl(sg_fd, BLKGETSIZE, &ul) < 0) {
1199             perror("BLKGETSIZE ioctl error");
1200             return -1;
1201         }
1202         *num_sect = (int64_t)ul;
1203  #endif
1204     }
1205     return 0;
1206 #else
1207     *num_sect = 0;
1208     *sect_sz = 0;
1209     return -1;
1210 #endif
1211 }
1212 
1213 static int
system_wrapper(const char * cmd)1214 system_wrapper(const char * cmd)
1215 {
1216     int res;
1217 
1218     res = system(cmd);
1219     if (WIFSIGNALED(res) &&
1220         (WTERMSIG(res) == SIGINT || WTERMSIG(res) == SIGQUIT))
1221         raise(WTERMSIG(res));
1222     return WEXITSTATUS(res);
1223 }
1224 
1225 /* Has an infinite loop doing a timed wait for any signals in sig_set. After
1226  * each timeout (300 ms) checks if the most_recent_pack_id atomic integer
1227  * has changed. If not after another two timeouts announces a stall has
1228  * been detected. If shutting down atomic is true breaks out of loop and
1229  * shuts down this thread. Other than that, this thread is normally cancelled
1230  * by the main thread, after other threads have exited. */
1231 static void *
sig_listen_thread(void * v_clp)1232 sig_listen_thread(void * v_clp)
1233 {
1234     bool stall_reported = false;
1235     int prev_pack_id = 0;
1236     struct timespec ts;
1237     struct timespec * tsp = &ts;
1238     struct global_collection * clp = (struct global_collection *)v_clp;
1239     uint32_t ict_ms = (clp->sdt_ict ? clp->sdt_ict : DEF_SDT_ICT_MS);
1240 
1241     tsp->tv_sec = ict_ms / 1000;
1242     tsp->tv_nsec = (ict_ms % 1000) * 1000 * 1000;   /* DEF_SDT_ICT_MS */
1243     while (1) {
1244         int sig_number = sigtimedwait(&signal_set, NULL, tsp);
1245 
1246         if (sig_number < 0) {
1247             int err = errno;
1248 
1249             /* EAGAIN implies a timeout */
1250             if ((EAGAIN == err) && (clp->sdt_crt > 0)) {
1251                 int pack_id = mono_pack_id.load();
1252 
1253                 if ((pack_id > 0) && (pack_id == prev_pack_id)) {
1254                     if (! stall_reported) {
1255                         stall_reported = true;
1256                         tsp->tv_sec = clp->sdt_crt;
1257                         tsp->tv_nsec = 0;
1258                         pr2serr_lk("%s: first stall at pack_id=%d detected\n",
1259                                    __func__, pack_id);
1260                     } else
1261                         pr2serr_lk("%s: subsequent stall at pack_id=%d\n",
1262                                __func__, pack_id);
1263                     // following command assumes linux bash or similar shell
1264                     system_wrapper("cat /proc/scsi/sg/debug >> /dev/stderr\n");
1265                     // system_wrapper("/usr/bin/dmesg\n");
1266                 } else
1267                     prev_pack_id = pack_id;
1268             } else if (EAGAIN != err)
1269                 pr2serr_lk("%s: sigtimedwait() errno=%d\n", __func__, err);
1270         }
1271         if (SIGINT == sig_number) {
1272             pr2serr_lk("%sinterrupted by SIGINT\n", my_name);
1273             stop_both(clp);
1274             pthread_cond_broadcast(&clp->out_sync_cv);
1275             sigprocmask(SIG_SETMASK, &orig_signal_set, NULL);
1276             raise(SIGINT);
1277             break;
1278         }
1279         if (SIGUSR2 == sig_number) {
1280             if (clp->verbose > 2)
1281                 pr2serr_lk("%s: interrupted by SIGUSR2\n", __func__);
1282             break;
1283         }
1284         if (shutting_down)
1285             break;
1286     }           /* end of while loop */
1287     if (clp->verbose > 3)
1288         pr2serr_lk("%s: exiting\n", __func__);
1289 
1290     return NULL;
1291 }
1292 
1293 static void *
mrq_abort_thread(void * v_maip)1294 mrq_abort_thread(void * v_maip)
1295 {
1296     int res, err;
1297     int n = 0;
1298     int seed;
1299     unsigned int rn;
1300     Mrq_abort_info l_mai = *(Mrq_abort_info *)v_maip;
1301     struct sg_io_v4 ctl_v4 {};
1302 
1303 #ifdef HAVE_GETRANDOM
1304     {
1305         ssize_t ssz = getrandom(&seed, sizeof(seed), GRND_NONBLOCK);
1306 
1307         if (ssz < (ssize_t)sizeof(seed)) {
1308             pr2serr("getrandom() failed, ret=%d\n", (int)ssz);
1309             seed = (int)time(NULL);
1310         }
1311     }
1312 #else
1313     seed = (int)time(NULL);    /* use seconds since epoch as proxy */
1314 #endif
1315     if (l_mai.debug)
1316         pr2serr_lk("%s: from_id=%d: to abort mrq_pack_id=%d\n", __func__,
1317                    l_mai.from_tid, l_mai.mrq_id);
1318     res = ioctl(l_mai.fd, SG_GET_NUM_WAITING, &n);
1319     ++num_waiting_calls;
1320     if (res < 0) {
1321         err = errno;
1322         pr2serr_lk("%s: ioctl(SG_GET_NUM_WAITING) failed: %s [%d]\n",
1323                    __func__, safe_strerror(err), err);
1324     } else if (l_mai.debug)
1325         pr2serr_lk("%s: num_waiting=%d\n", __func__, n);
1326 
1327     Rand_uint * ruip = new Rand_uint(5, 500, seed);
1328     struct timespec tspec = {0, 4000 /* 4 usecs */};
1329     rn = ruip->get();
1330     tspec.tv_nsec = rn * 1000;
1331     if (l_mai.debug > 1)
1332         pr2serr_lk("%s: /dev/urandom seed=0x%x delay=%u microsecs\n",
1333                    __func__, seed, rn);
1334     if (rn >= 20)
1335         nanosleep(&tspec, NULL);
1336     else if (l_mai.debug > 1)
1337         pr2serr_lk("%s: skipping nanosleep cause delay < 20 usecs\n",
1338                    __func__);
1339 
1340     ctl_v4.guard = 'Q';
1341     ctl_v4.flags = SGV4_FLAG_MULTIPLE_REQS;
1342     ctl_v4.request_extra = l_mai.mrq_id;
1343     ++num_mrq_abort_req;
1344     res = ioctl(l_mai.fd, SG_IOABORT, &ctl_v4);
1345     if (res < 0) {
1346         err = errno;
1347         if (ENODATA == err)
1348             pr2serr_lk("%s: ioctl(SG_IOABORT) no match on "
1349                        "MRQ pack_id=%d\n", __func__, l_mai.mrq_id);
1350         else
1351             pr2serr_lk("%s: MRQ ioctl(SG_IOABORT) failed: %s [%d]\n",
1352                        __func__, safe_strerror(err), err);
1353     } else {
1354         ++num_mrq_abort_req_success;
1355         if (l_mai.debug > 1)
1356             pr2serr_lk("%s: from_id=%d sent ioctl(SG_IOABORT) on MRQ rq_id="
1357                        "%d, success\n", __func__, l_mai.from_tid,
1358                        l_mai.mrq_id);
1359     }
1360     delete ruip;
1361     return NULL;
1362 }
1363 
1364 static bool
sg_share_prepare(int write_side_fd,int read_side_fd,int id,bool vb_b)1365 sg_share_prepare(int write_side_fd, int read_side_fd, int id, bool vb_b)
1366 {
1367     struct sg_extended_info sei {};
1368     struct sg_extended_info * seip = &sei;
1369 
1370     seip->sei_wr_mask |= SG_SEIM_SHARE_FD;
1371     seip->sei_rd_mask |= SG_SEIM_SHARE_FD;
1372     seip->share_fd = read_side_fd;
1373     if (ioctl(write_side_fd, SG_SET_GET_EXTENDED, seip) < 0) {
1374         pr2serr_lk("tid=%d: ioctl(EXTENDED(shared_fd=%d), failed "
1375                    "errno=%d %s\n", id, read_side_fd, errno,
1376                    strerror(errno));
1377         return false;
1378     }
1379     if (vb_b)
1380         pr2serr_lk("%s: tid=%d: ioctl(EXTENDED(shared_fd)) ok, "
1381                    "read_side_fd=%d, write_side_fd=%d\n", __func__,
1382                    id, read_side_fd, write_side_fd);
1383     return true;
1384 }
1385 
1386 static void
sg_unshare(int sg_fd,int id,bool vb_b)1387 sg_unshare(int sg_fd, int id, bool vb_b)
1388 {
1389     struct sg_extended_info sei {};
1390     struct sg_extended_info * seip = &sei;
1391 
1392     seip->sei_wr_mask |= SG_SEIM_CTL_FLAGS;
1393     seip->sei_rd_mask |= SG_SEIM_CTL_FLAGS;
1394     seip->ctl_flags_wr_mask |= SG_CTL_FLAGM_UNSHARE;
1395     seip->ctl_flags |= SG_CTL_FLAGM_UNSHARE; /* needs to be set to unshare */
1396     if (ioctl(sg_fd, SG_SET_GET_EXTENDED, seip) < 0) {
1397         pr2serr_lk("tid=%d: ioctl(EXTENDED(UNSHARE), failed errno=%d %s\n",
1398                    id,  errno, strerror(errno));
1399         return;
1400     }
1401     if (vb_b)
1402         pr2serr_lk("tid=%d: ioctl(UNSHARE) ok\n", id);
1403 }
1404 
1405 static void
sg_noshare_enlarge(int sg_fd,bool vb_b)1406 sg_noshare_enlarge(int sg_fd, bool vb_b)
1407 {
1408     if (sg_version_ge_40045) {
1409         struct sg_extended_info sei {};
1410         struct sg_extended_info * seip = &sei;
1411 
1412         sei.sei_wr_mask |= SG_SEIM_TOT_FD_THRESH;
1413         seip->tot_fd_thresh = 96 * 1024 * 1024;
1414         if (ioctl(sg_fd, SG_SET_GET_EXTENDED, seip) < 0) {
1415             pr2serr_lk("%s: ioctl(EXTENDED(TOT_FD_THRESH), failed errno=%d "
1416                        "%s\n", __func__, errno, strerror(errno));
1417             return;
1418         }
1419         if (vb_b)
1420             pr2serr_lk("ioctl(TOT_FD_THRESH) ok\n");
1421     }
1422 }
1423 
1424 static void
sg_take_snap(int sg_fd,int id,bool vb_b)1425 sg_take_snap(int sg_fd, int id, bool vb_b)
1426 {
1427     struct sg_extended_info sei {};
1428     struct sg_extended_info * seip = &sei;
1429 
1430     seip->sei_wr_mask |= SG_SEIM_CTL_FLAGS;
1431     seip->sei_rd_mask |= SG_SEIM_CTL_FLAGS;
1432     seip->ctl_flags_wr_mask |= SG_CTL_FLAGM_SNAP_DEV;
1433     seip->ctl_flags &= ~SG_CTL_FLAGM_SNAP_DEV;   /* 0 --> append */
1434     if (ioctl(sg_fd, SG_SET_GET_EXTENDED, seip) < 0) {
1435         pr2serr_lk("tid=%d: ioctl(EXTENDED(SNAP_DEV), failed errno=%d %s\n",
1436                    id,  errno, strerror(errno));
1437         return;
1438     }
1439     if (vb_b)
1440         pr2serr_lk("tid=%d: ioctl(SNAP_DEV) ok\n", id);
1441 }
1442 
1443 static void
cleanup_in(void * v_clp)1444 cleanup_in(void * v_clp)
1445 {
1446     struct global_collection * clp = (struct global_collection *)v_clp;
1447 
1448     pr2serr("thread cancelled while in mutex held\n");
1449     stop_both(clp);
1450     pthread_mutex_unlock(&clp->in_mutex);
1451     pthread_cond_broadcast(&clp->out_sync_cv);
1452 }
1453 
1454 static void
cleanup_out(void * v_clp)1455 cleanup_out(void * v_clp)
1456 {
1457     struct global_collection * clp = (struct global_collection *)v_clp;
1458 
1459     pr2serr("thread cancelled while out_mutex held\n");
1460     stop_both(clp);
1461     pthread_mutex_unlock(&clp->out_mutex);
1462     pthread_cond_broadcast(&clp->out_sync_cv);
1463 }
1464 
buffp_onto_next(Rq_elem * rep)1465 static void inline buffp_onto_next(Rq_elem * rep)
1466 {
1467     struct global_collection * clp = rep->clp;
1468 
1469     if ((clp->nmrqs > 0) && clp->unbalanced_mrq) {
1470         ++rep->mrq_index;
1471         if (rep->mrq_index >= clp->nmrqs)
1472             rep->mrq_index = 0;         /* wrap */
1473     }
1474 }
1475 
1476 static inline uint8_t *
get_buffp(Rq_elem * rep)1477 get_buffp(Rq_elem * rep)
1478 {
1479     struct global_collection * clp = rep->clp;
1480 
1481     if ((clp->nmrqs > 0) && clp->unbalanced_mrq && (rep->mrq_index > 0))
1482         return rep->buffp + (rep->mrq_index * clp->bs * clp->bpt);
1483     else
1484         return rep->buffp;
1485 }
1486 
1487 static void *
read_write_thread(void * v_tip)1488 read_write_thread(void * v_tip)
1489 {
1490     Thread_info * tip;
1491     struct global_collection * clp;
1492     Rq_elem rel {};
1493     Rq_elem * rep = &rel;
1494     int n, sz, blocks, status, vb, err, res, wr_blks, c_addr;
1495     int num_sg = 0;
1496     int64_t my_index;
1497     volatile bool stop_after_write = false;
1498     bool own_infd = false;
1499     bool in_is_sg, in_mmap, out_is_sg, out_mmap;
1500     bool own_outfd = false;
1501     bool own_out2fd = false;
1502     bool share_and_ofreg;
1503     mrq_arr_t deferred_arr;  /* MRQ deferred array (vector) */
1504 
1505     tip = (Thread_info *)v_tip;
1506     clp = tip->gcp;
1507     vb = clp->verbose;
1508     rep->bs = clp->bs;
1509     sz = clp->bpt * rep->bs;
1510     c_addr = clp->chkaddr;
1511     in_is_sg = (FT_SG == clp->in_type);
1512     in_mmap = (in_is_sg && (clp->in_flags.mmap > 0));
1513     out_is_sg = (FT_SG == clp->out_type);
1514     out_mmap = (out_is_sg && (clp->out_flags.mmap > 0));
1515     /* Following clp members are constant during lifetime of thread */
1516     rep->clp = clp;
1517     rep->id = tip->id;
1518     if (vb > 2)
1519         pr2serr_lk("%d <-- Starting worker thread\n", rep->id);
1520     if (! (in_mmap || out_mmap)) {
1521         n = sz;
1522         if (clp->unbalanced_mrq)
1523             n *= clp->nmrqs;
1524         rep->buffp = sg_memalign(n, 0 /* page align */, &rep->alloc_bp,
1525                                  false);
1526         if (NULL == rep->buffp)
1527             err_exit(ENOMEM, "out of memory creating user buffers\n");
1528     }
1529     rep->infd = clp->infd;
1530     rep->outfd = clp->outfd;
1531     rep->out2fd = clp->out2fd;
1532     rep->outregfd = clp->outregfd;
1533     rep->rep_count = 0;
1534     if (clp->unbalanced_mrq && (clp->nmrqs > 0))
1535         rep->mrq_index = clp->nmrqs - 1;
1536 
1537     if (rep->infd == rep->outfd) {
1538         if (in_is_sg)
1539             rep->same_sg = true;
1540     } else if (in_is_sg && out_is_sg)
1541         rep->both_sg = true;
1542     else if (in_is_sg)
1543         rep->only_in_sg = true;
1544     else if (out_is_sg)
1545         rep->only_out_sg = true;
1546 
1547     if (clp->in_flags.random) {
1548 #ifdef HAVE_GETRANDOM
1549         ssize_t ssz = getrandom(&rep->seed, sizeof(rep->seed), GRND_NONBLOCK);
1550 
1551         if (ssz < (ssize_t)sizeof(rep->seed)) {
1552             pr2serr_lk("thread=%d: getrandom() failed, ret=%d\n",
1553                        rep->id, (int)ssz);
1554             rep->seed = (long)time(NULL);
1555         }
1556 #else
1557         rep->seed = (long)time(NULL);    /* use seconds since epoch as proxy */
1558 #endif
1559         if (vb > 1)
1560             pr2serr_lk("thread=%d: seed=%ld\n", rep->id, rep->seed);
1561 #ifdef HAVE_SRAND48_R
1562         srand48_r(rep->seed, &rep->drand);
1563 #else
1564         srand48(rep->seed);
1565 #endif
1566     }
1567     if (clp->in_flags.same_fds || clp->out_flags.same_fds)
1568         ;
1569     else {
1570         int fd;
1571 
1572         if (in_is_sg && clp->infp) {
1573             fd = sg_in_open(clp, clp->infp, (in_mmap ? &rep->buffp : NULL),
1574                             (in_mmap ? &rep->mmap_len : NULL));
1575             if (fd < 0)
1576                 goto fini;
1577             rep->infd = fd;
1578             rep->mmap_active = in_mmap ? clp->in_flags.mmap : 0;
1579             if (in_mmap && (vb > 4))
1580                 pr2serr_lk("thread=%d: mmap buffp=%p\n", rep->id, rep->buffp);
1581             own_infd = true;
1582             ++num_sg;
1583             if (vb > 2)
1584                 pr2serr_lk("thread=%d: opened local sg IFILE\n", rep->id);
1585         }
1586         if (out_is_sg && clp->outfp) {
1587             fd = sg_out_open(clp, clp->outfp, (out_mmap ? &rep->buffp : NULL),
1588                              (out_mmap ? &rep->mmap_len : NULL));
1589             if (fd < 0)
1590                 goto fini;
1591             rep->outfd = fd;
1592             if (! rep->mmap_active)
1593                 rep->mmap_active = out_mmap ? clp->out_flags.mmap : 0;
1594             if (out_mmap && (vb > 4))
1595                 pr2serr_lk("thread=%d: mmap buffp=%p\n", rep->id, rep->buffp);
1596             own_outfd = true;
1597             ++num_sg;
1598             if (vb > 2)
1599                 pr2serr_lk("thread=%d: opened local sg OFILE\n", rep->id);
1600         }
1601         if ((FT_SG == clp->out2_type) && clp->out2fp) {
1602             fd = sg_out_open(clp, clp->out2fp,
1603                              (out_mmap ? &rep->buffp : NULL),
1604                              (out_mmap ? &rep->mmap_len : NULL));
1605             if (fd < 0)
1606                 goto fini;
1607             rep->out2fd = fd;
1608             own_out2fd = true;
1609             if (vb > 2)
1610                 pr2serr_lk("thread=%d: opened local sg OFILE2\n", rep->id);
1611         }
1612     }
1613     if (vb > 2) {
1614         if (in_is_sg && (! own_infd))
1615             pr2serr_lk("thread=%d: using global sg IFILE, fd=%d\n", rep->id,
1616                        rep->infd);
1617         if (out_is_sg && (! own_outfd))
1618             pr2serr_lk("thread=%d: using global sg OFILE, fd=%d\n", rep->id,
1619                        rep->outfd);
1620         if ((FT_SG == clp->out2_type) && (! own_out2fd))
1621             pr2serr_lk("thread=%d: using global sg OFILE2, fd=%d\n", rep->id,
1622                        rep->out2fd);
1623     }
1624     if (!sg_version_ge_40045) {
1625         if (vb > 4)
1626             pr2serr_lk("thread=%d: Skipping share because driver too old\n",
1627                        rep->id);
1628     } else if (clp->noshare) {
1629         if (vb > 4)
1630             pr2serr_lk("thread=%d: Skipping IFILE share with OFILE due to "
1631                        "noshare=1\n", rep->id);
1632     } else if (sg_version_ge_40045 && in_is_sg && out_is_sg)
1633         rep->has_share = sg_share_prepare(rep->outfd, rep->infd, rep->id,
1634                                           vb > 9);
1635     if (vb > 9)
1636         pr2serr_lk("tid=%d, has_share=%s\n", rep->id,
1637                    (rep->has_share ? "true" : "false"));
1638     share_and_ofreg = (rep->has_share && (rep->outregfd >= 0));
1639 
1640     /* vvvvvvvvvvvvvv  Main segment copy loop  vvvvvvvvvvvvvvvvvvvvvvv */
1641     while (1) {
1642         rep->wr = false;
1643         my_index = atomic_fetch_add(&pos_index, (long int)clp->bpt);
1644         /* Start of READ half of a segment */
1645         buffp_onto_next(rep);
1646         status = pthread_mutex_lock(&clp->in_mutex);
1647         if (0 != status) err_exit(status, "lock in_mutex");
1648 
1649         if (dd_count >= 0) {
1650             if (my_index >= dd_count) {
1651                 status = pthread_mutex_unlock(&clp->in_mutex);
1652                 if (0 != status) err_exit(status, "unlock in_mutex");
1653                 if ((clp->nmrqs > 0) && (deferred_arr.first.size() > 0)) {
1654                     if (vb > 2)
1655                         pr2serr_lk("thread=%d: tail-end my_index>=dd_count, "
1656                                    "to_do=%u\n", rep->id,
1657                                    (uint32_t)deferred_arr.first.size());
1658                     res = sgh_do_deferred_mrq(rep, deferred_arr);
1659                     if (res)
1660                         pr2serr_lk("%s tid=%d: sgh_do_deferred_mrq failed\n",
1661                                    __func__, rep->id);
1662                 }
1663                 break;  /* at or beyond end, so leave loop >>>>>>>>>>  */
1664             } else if ((my_index + clp->bpt) > dd_count)
1665                 blocks = dd_count - my_index;
1666             else
1667                 blocks = clp->bpt;
1668         } else
1669             blocks = clp->bpt;
1670 
1671         rep->iblk = clp->skip + my_index;
1672         rep->oblk = clp->seek + my_index;
1673         rep->num_blks = blocks;
1674 
1675         // clp->in_blk += blocks;
1676         // clp->in_count -= blocks;
1677 
1678         pthread_cleanup_push(cleanup_in, (void *)clp);
1679         if (in_is_sg)
1680             sg_in_rd_cmd(clp, rep, deferred_arr);
1681         else {
1682             stop_after_write = normal_in_rd(rep, blocks);
1683             status = pthread_mutex_unlock(&clp->in_mutex);
1684             if (0 != status) err_exit(status, "unlock in_mutex");
1685         }
1686         if (c_addr && (rep->bs > 3)) {
1687             int k, j, off, num;
1688             uint32_t addr = (uint32_t)rep->iblk;
1689 
1690             num = (1 == c_addr) ? 4 : (rep->bs - 3);
1691             for (k = 0, off = 0; k < blocks; ++k, ++addr, off += rep->bs) {
1692                 for (j = 0; j < num; j += 4) {
1693                     if (addr != sg_get_unaligned_be32(rep->buffp + off + j))
1694                         break;
1695                 }
1696                 if (j < num)
1697                     break;
1698             }
1699             if (k < blocks) {
1700                 pr2serr("%s: chkaddr failure at addr=0x%x\n", __func__, addr);
1701                 exit_status = SG_LIB_CAT_MISCOMPARE;
1702                 ++num_miscompare;
1703                 stop_both(clp);
1704             }
1705         }
1706         pthread_cleanup_pop(0);
1707         ++rep->rep_count;
1708 
1709         /* Start of WRITE part of a segment */
1710         rep->wr = true;
1711         status = pthread_mutex_lock(&clp->out_mutex);
1712         if (0 != status) err_exit(status, "lock out_mutex");
1713 
1714         /* Make sure the OFILE (+ OFREG) are in same sequence as IFILE */
1715         if (clp->in_flags.random)
1716             goto skip_force_out_sequence;
1717         if ((rep->outregfd < 0) && in_is_sg && out_is_sg)
1718             goto skip_force_out_sequence;
1719         if (share_and_ofreg || (FT_DEV_NULL != clp->out_type)) {
1720             while ((! clp->out_stop.load()) &&
1721                    (rep->oblk != clp->out_blk.load())) {
1722                 /* if write would be out of sequence then wait */
1723                 pthread_cleanup_push(cleanup_out, (void *)clp);
1724                 status = pthread_cond_wait(&clp->out_sync_cv, &clp->out_mutex);
1725                 if (0 != status) err_exit(status, "cond out_sync_cv");
1726                 pthread_cleanup_pop(0);
1727             }
1728         }
1729 
1730 skip_force_out_sequence:
1731         if (clp->out_stop.load() || (clp->out_count.load() <= 0)) {
1732             if (! clp->out_stop.load())
1733                 clp->out_stop = true;
1734             status = pthread_mutex_unlock(&clp->out_mutex);
1735             if (0 != status) err_exit(status, "unlock out_mutex");
1736             break;      /* stop requested so leave loop >>>>>>>>>>  */
1737         }
1738         if (stop_after_write)
1739             clp->out_stop = true;
1740 
1741         clp->out_count -= blocks;
1742         clp->out_blk += blocks;
1743 
1744         pthread_cleanup_push(cleanup_out, (void *)clp);
1745         if (rep->outregfd >= 0) {
1746             res = write(rep->outregfd, get_buffp(rep),
1747                         rep->bs * rep->num_blks);
1748             err = errno;
1749             if (res < 0)
1750                 pr2serr_lk("%s: tid=%d: write(outregfd) failed: %s\n",
1751                            __func__, rep->id, strerror(err));
1752             else if (vb > 9)
1753                 pr2serr_lk("%s: tid=%d: write(outregfd), fd=%d, num_blks=%d"
1754                            "\n", __func__, rep->id, rep->outregfd,
1755                            rep->num_blks);
1756         }
1757         /* Output to OFILE */
1758         wr_blks = rep->num_blks;
1759         if (out_is_sg) {
1760             sg_out_wr_cmd(rep, deferred_arr, false, clp->prefetch);
1761             ++rep->rep_count;
1762         } else if (FT_DEV_NULL == clp->out_type) {
1763             /* skip actual write operation */
1764             wr_blks = 0;
1765             clp->out_rem_count -= blocks;
1766             status = pthread_mutex_unlock(&clp->out_mutex);
1767             if (0 != status) err_exit(status, "unlock out_mutex");
1768         } else {
1769             normal_out_wr(rep, blocks);
1770             status = pthread_mutex_unlock(&clp->out_mutex);
1771             if (0 != status) err_exit(status, "unlock out_mutex");
1772             ++rep->rep_count;
1773         }
1774         pthread_cleanup_pop(0);
1775 
1776         /* Output to OFILE2 if sg device */
1777         if ((clp->out2fd >= 0) && (FT_SG == clp->out2_type)) {
1778             pthread_cleanup_push(cleanup_out, (void *)clp);
1779             status = pthread_mutex_lock(&clp->out2_mutex);
1780             if (0 != status) err_exit(status, "lock out2_mutex");
1781             /* releases out2_mutex mid operation */
1782             sg_out_wr_cmd(rep, deferred_arr, true, false);
1783 
1784             pthread_cleanup_pop(0);
1785         }
1786         if (0 == rep->num_blks) {
1787             if ((clp->nmrqs > 0) && (deferred_arr.first.size() > 0)) {
1788                 if (wr_blks > 0)
1789                     rep->out_mrq_q_blks += wr_blks;
1790                 if (vb > 2)
1791                     pr2serr_lk("thread=%d: tail-end, to_do=%u\n", rep->id,
1792                                (uint32_t)deferred_arr.first.size());
1793                 res = sgh_do_deferred_mrq(rep, deferred_arr);
1794                 if (res)
1795                     pr2serr_lk("%s tid=%d: sgh_do_deferred_mrq failed\n",
1796                                __func__, rep->id);
1797             }
1798             clp->out_stop = true;
1799             stop_after_write = true;
1800             break;      /* read nothing so leave loop >>>>>>>>>>  */
1801         }
1802         // if ((! rep->has_share) && (FT_DEV_NULL != clp->out_type))
1803         pthread_cond_broadcast(&clp->out_sync_cv);
1804         if (stop_after_write)
1805             break;      /* leaving main loop >>>>>>>>> */
1806     }   /* ^^^^^^^^^^ end of main while loop which copies segments ^^^^^^ */
1807 
1808     status = pthread_mutex_lock(&clp->in_mutex);
1809     if (0 != status) err_exit(status, "lock in_mutex");
1810     if (! clp->in_stop.load())
1811         clp->in_stop = true;  /* flag other workers to stop */
1812     status = pthread_mutex_unlock(&clp->in_mutex);
1813     if (0 != status) err_exit(status, "unlock in_mutex");
1814 
1815 fini:
1816     if ((1 == rep->mmap_active) && (rep->mmap_len > 0)) {
1817         if (munmap(rep->buffp, rep->mmap_len) < 0) {
1818             err = errno;
1819             char bb[STRERR_BUFF_LEN + 1];
1820 
1821             pr2serr_lk("thread=%d: munmap() failed: %s\n", rep->id,
1822                        tsafe_strerror(err, bb));
1823         }
1824         if (vb > 4)
1825             pr2serr_lk("thread=%d: munmap(%p, %d)\n", rep->id, rep->buffp,
1826                        rep->mmap_len);
1827         rep->mmap_active = 0;
1828     }
1829     if (rep->alloc_bp) {
1830         free(rep->alloc_bp);
1831         rep->alloc_bp = NULL;
1832         rep->buffp = NULL;
1833     }
1834 
1835     if (sg_version_ge_40045) {
1836         if (clp->noshare) {
1837             if ((clp->nmrqs > 0) && clp->unshare)
1838                 sg_unshare(rep->infd, rep->id, vb > 9);
1839         } else if (in_is_sg && out_is_sg)
1840             if (clp->unshare)
1841             sg_unshare(rep->infd, rep->id, vb > 9);
1842     }
1843     if (own_infd && (rep->infd >= 0)) {
1844         if (vb && in_is_sg) {
1845             ++num_waiting_calls;
1846             if (ioctl(rep->infd, SG_GET_NUM_WAITING, &n) >= 0) {
1847                 if (n > 0)
1848                     pr2serr_lk("%s: tid=%d: num_waiting=%d prior close(in)\n",
1849                                __func__, rep->id, n);
1850             } else {
1851                 err = errno;
1852                 pr2serr_lk("%s: [%d] ioctl(SG_GET_NUM_WAITING) errno=%d: "
1853                            "%s\n", __func__, rep->id, err, strerror(err));
1854             }
1855         }
1856         close(rep->infd);
1857     }
1858     if (own_outfd && (rep->outfd >= 0)) {
1859         if (vb && out_is_sg) {
1860             ++num_waiting_calls;
1861             if (ioctl(rep->outfd, SG_GET_NUM_WAITING, &n) >= 0) {
1862                 if (n > 0)
1863                     pr2serr_lk("%s: tid=%d: num_waiting=%d prior "
1864                                "close(out)\n", __func__, rep->id, n);
1865             } else {
1866                 err = errno;
1867                 pr2serr_lk("%s: [%d] ioctl(SG_GET_NUM_WAITING) errno=%d: "
1868                            "%s\n", __func__, rep->id, err, strerror(err));
1869             }
1870         }
1871         close(rep->outfd);
1872     }
1873     if (own_out2fd && (rep->out2fd >= 0))
1874         close(rep->out2fd);
1875     pthread_cond_broadcast(&clp->out_sync_cv);
1876     return stop_after_write ? NULL : clp;
1877 }
1878 
1879 /* N.B. A return of true means it wants to stop the copy. So false is the
1880  * 'good' reply (i.e. keep going). */
1881 static bool
normal_in_rd(Rq_elem * rep,int blocks)1882 normal_in_rd(Rq_elem * rep, int blocks)
1883 {
1884     struct global_collection * clp = rep->clp;
1885     bool stop_after_write = false;
1886     bool same_fds = clp->in_flags.same_fds || clp->out_flags.same_fds;
1887     int res;
1888 
1889     if (clp->verbose > 4)
1890         pr2serr_lk("%s: tid=%d: iblk=%" PRIu64 ", blocks=%d\n", __func__,
1891                    rep->id, rep->iblk, blocks);
1892     if (FT_RANDOM_0_FF == clp->in_type) {
1893         int k, j;
1894         const int jbump = sizeof(uint32_t);
1895         long rn;
1896         uint8_t * bp;
1897 
1898         if (clp->in_flags.zero && clp->in_flags.ff && (rep->bs >= 4)) {
1899             uint32_t pos = (uint32_t)rep->iblk;
1900             uint32_t off;
1901 
1902             for (k = 0, off = 0; k < blocks; ++k, off += rep->bs, ++pos) {
1903                 for (j = 0; j < (rep->bs - 3); j += 4)
1904                     sg_put_unaligned_be32(pos, rep->buffp + off + j);
1905             }
1906         } else if (clp->in_flags.zero)
1907             memset(rep->buffp, 0, blocks * rep->bs);
1908         else if (clp->in_flags.ff)
1909             memset(rep->buffp, 0xff, blocks * rep->bs);
1910         else {
1911             for (k = 0, bp = rep->buffp; k < blocks; ++k, bp += rep->bs) {
1912                 for (j = 0; j < rep->bs; j += jbump) {
1913                     /* mrand48 takes uniformly from [-2^31, 2^31) */
1914 #ifdef HAVE_SRAND48_R
1915                     mrand48_r(&rep->drand, &rn);
1916 #else
1917                     rn = mrand48();
1918 #endif
1919                     *((uint32_t *)(bp + j)) = (uint32_t)rn;
1920                 }
1921             }
1922         }
1923         clp->in_rem_count -= blocks;
1924         return stop_after_write;
1925     }
1926     if (! same_fds) {   /* each has own file pointer, so we need to move it */
1927         int64_t pos = rep->iblk * rep->bs;
1928 
1929         if (lseek64(rep->infd, pos, SEEK_SET) < 0) {    /* problem if pipe! */
1930             pr2serr_lk("%s: tid=%d: >> lseek64(%" PRId64 "): %s\n", __func__,
1931                        rep->id, pos, safe_strerror(errno));
1932             stop_both(clp);
1933             return true;
1934         }
1935     }
1936     /* enters holding in_mutex */
1937     while (((res = read(clp->infd, rep->buffp, blocks * rep->bs)) < 0) &&
1938            ((EINTR == errno) || (EAGAIN == errno)))
1939         std::this_thread::yield();/* another thread may be able to progress */
1940     if (res < 0) {
1941         char strerr_buff[STRERR_BUFF_LEN + 1];
1942 
1943         if (clp->in_flags.coe) {
1944             memset(rep->buffp, 0, rep->num_blks * rep->bs);
1945             pr2serr_lk("tid=%d: >> substituted zeros for in blk=%" PRId64
1946                       " for %d bytes, %s\n", rep->id, rep->iblk,
1947                        rep->num_blks * rep->bs,
1948                        tsafe_strerror(errno, strerr_buff));
1949             res = rep->num_blks * rep->bs;
1950         }
1951         else {
1952             pr2serr_lk("tid=%d: error in normal read, %s\n", rep->id,
1953                        tsafe_strerror(errno, strerr_buff));
1954             stop_both(clp);
1955             return true;
1956         }
1957     }
1958     if (res < blocks * rep->bs) {
1959         // int o_blocks = blocks;
1960 
1961         stop_after_write = true;
1962         blocks = res / rep->bs;
1963         if ((res % rep->bs) > 0) {
1964             blocks++;
1965             clp->in_partial++;
1966         }
1967         /* Reverse out + re-apply blocks on clp */
1968         // clp->in_blk -= o_blocks;
1969         // clp->in_count += o_blocks;
1970         rep->num_blks = blocks;
1971         // clp->in_blk += blocks;
1972         // clp->in_count -= blocks;
1973     }
1974     clp->in_rem_count -= blocks;
1975     return stop_after_write;
1976 }
1977 
1978 static void
normal_out_wr(Rq_elem * rep,int blocks)1979 normal_out_wr(Rq_elem * rep, int blocks)
1980 {
1981     int res;
1982     struct global_collection * clp = rep->clp;
1983 
1984     /* enters holding out_mutex */
1985     if (clp->verbose > 4)
1986         pr2serr_lk("%s: tid=%d: oblk=%" PRIu64 ", blocks=%d\n", __func__,
1987                    rep->id, rep->oblk, blocks);
1988     while (((res = write(clp->outfd, rep->buffp, blocks * rep->bs))
1989             < 0) && ((EINTR == errno) || (EAGAIN == errno)))
1990         std::this_thread::yield();/* another thread may be able to progress */
1991     if (res < 0) {
1992         char strerr_buff[STRERR_BUFF_LEN + 1];
1993 
1994         if (clp->out_flags.coe) {
1995             pr2serr_lk("tid=%d: >> ignored error for out blk=%" PRId64
1996                        " for %d bytes, %s\n", rep->id, rep->oblk,
1997                        rep->num_blks * rep->bs,
1998                        tsafe_strerror(errno, strerr_buff));
1999             res = rep->num_blks * rep->bs;
2000         }
2001         else {
2002             pr2serr_lk("tid=%d: error normal write, %s\n", rep->id,
2003                        tsafe_strerror(errno, strerr_buff));
2004             stop_both(clp);
2005             return;
2006         }
2007     }
2008     if (res < blocks * rep->bs) {
2009         blocks = res / rep->bs;
2010         if ((res % rep->bs) > 0) {
2011             blocks++;
2012             clp->out_partial++;
2013         }
2014         rep->num_blks = blocks;
2015     }
2016     clp->out_rem_count -= blocks;
2017 }
2018 
2019 static int
sg_build_scsi_cdb(uint8_t * cdbp,int cdb_sz,unsigned int blocks,int64_t start_block,bool ver_true,bool write_true,bool fua,bool dpo)2020 sg_build_scsi_cdb(uint8_t * cdbp, int cdb_sz, unsigned int blocks,
2021                   int64_t start_block, bool ver_true, bool write_true,
2022                   bool fua, bool dpo)
2023 {
2024     int rd_opcode[] = {0x8, 0x28, 0xa8, 0x88};
2025     int ve_opcode[] = {0xff /* no VER(6) */, 0x2f, 0xaf, 0x8f};
2026     int wr_opcode[] = {0xa, 0x2a, 0xaa, 0x8a};
2027     int sz_ind;
2028 
2029     memset(cdbp, 0, cdb_sz);
2030     if (ver_true) {     /* only support VERIFY(10) */
2031         if (cdb_sz < 10) {
2032             pr2serr_lk("%sonly support VERIFY(10)\n", my_name);
2033             return 1;
2034         }
2035         cdb_sz = 10;
2036         fua = false;
2037         cdbp[1] |= 0x2; /* BYTCHK=1 --> sending dout for comparison */
2038         cdbp[0] = ve_opcode[1];
2039     }
2040     if (dpo)
2041         cdbp[1] |= 0x10;
2042     if (fua)
2043         cdbp[1] |= 0x8;
2044     switch (cdb_sz) {
2045     case 6:
2046         sz_ind = 0;
2047         cdbp[0] = (uint8_t)(write_true ? wr_opcode[sz_ind] :
2048                                          rd_opcode[sz_ind]);
2049         sg_put_unaligned_be24(0x1fffff & start_block, cdbp + 1);
2050         cdbp[4] = (256 == blocks) ? 0 : (uint8_t)blocks;
2051         if (blocks > 256) {
2052             pr2serr_lk("%sfor 6 byte commands, maximum number of blocks is "
2053                        "256\n", my_name);
2054             return 1;
2055         }
2056         if ((start_block + blocks - 1) & (~0x1fffff)) {
2057             pr2serr_lk("%sfor 6 byte commands, can't address blocks beyond "
2058                        "%d\n", my_name, 0x1fffff);
2059             return 1;
2060         }
2061         if (dpo || fua) {
2062             pr2serr_lk("%sfor 6 byte commands, neither dpo nor fua bits "
2063                        "supported\n", my_name);
2064             return 1;
2065         }
2066         break;
2067     case 10:
2068         if (! ver_true) {
2069             sz_ind = 1;
2070             cdbp[0] = (uint8_t)(write_true ? wr_opcode[sz_ind] :
2071                                              rd_opcode[sz_ind]);
2072         }
2073         sg_put_unaligned_be32((uint32_t)start_block, cdbp + 2);
2074         sg_put_unaligned_be16((uint16_t)blocks, cdbp + 7);
2075         if (blocks & (~0xffff)) {
2076             pr2serr_lk("%sfor 10 byte commands, maximum number of blocks is "
2077                        "%d\n", my_name, 0xffff);
2078             return 1;
2079         }
2080         break;
2081     case 12:
2082         sz_ind = 2;
2083         cdbp[0] = (uint8_t)(write_true ? wr_opcode[sz_ind] :
2084                                          rd_opcode[sz_ind]);
2085         sg_put_unaligned_be32((uint32_t)start_block, cdbp + 2);
2086         sg_put_unaligned_be32((uint32_t)blocks, cdbp + 6);
2087         break;
2088     case 16:
2089         sz_ind = 3;
2090         cdbp[0] = (uint8_t)(write_true ? wr_opcode[sz_ind] :
2091                                          rd_opcode[sz_ind]);
2092         sg_put_unaligned_be64((uint64_t)start_block, cdbp + 2);
2093         sg_put_unaligned_be32((uint32_t)blocks, cdbp + 10);
2094         break;
2095     default:
2096         pr2serr_lk("%sexpected cdb size of 6, 10, 12, or 16 but got %d\n",
2097                    my_name, cdb_sz);
2098         return 1;
2099     }
2100     return 0;
2101 }
2102 
2103 /* Enters this function holding in_mutex */
2104 static void
sg_in_rd_cmd(struct global_collection * clp,Rq_elem * rep,mrq_arr_t & def_arr)2105 sg_in_rd_cmd(struct global_collection * clp, Rq_elem * rep,
2106              mrq_arr_t & def_arr)
2107 {
2108     int status, pack_id;
2109 
2110     while (1) {
2111         int res = sg_start_io(rep, def_arr, pack_id, NULL);
2112 
2113         if (1 == res)
2114             err_exit(ENOMEM, "sg starting in command");
2115         else if (res < 0) {
2116             pr2serr_lk("tid=%d: inputting to sg failed, blk=%" PRId64 "\n",
2117                        rep->id, rep->iblk);
2118             status = pthread_mutex_unlock(&clp->in_mutex);
2119             if (0 != status) err_exit(status, "unlock in_mutex");
2120             stop_both(clp);
2121             return;
2122         }
2123         /* Now release in mutex to let other reads run in parallel */
2124         status = pthread_mutex_unlock(&clp->in_mutex);
2125         if (0 != status) err_exit(status, "unlock in_mutex");
2126 
2127         res = sg_finish_io(rep->wr, rep, pack_id, NULL);
2128         switch (res) {
2129         case SG_LIB_CAT_ABORTED_COMMAND:
2130         case SG_LIB_CAT_UNIT_ATTENTION:
2131             /* try again with same addr, count info */
2132             /* now re-acquire in mutex for balance */
2133             /* N.B. This re-read could now be out of read sequence */
2134             status = pthread_mutex_lock(&clp->in_mutex);
2135             if (0 != status) err_exit(status, "lock in_mutex");
2136             break;      /* will loop again */
2137         case SG_LIB_CAT_MEDIUM_HARD:
2138             if (0 == clp->in_flags.coe) {
2139                 pr2serr_lk("error finishing sg in command (medium)\n");
2140                 if (exit_status <= 0)
2141                     exit_status = res;
2142                 stop_both(clp);
2143                 return;
2144             } else {
2145                 memset(get_buffp(rep), 0, rep->num_blks * rep->bs);
2146                 pr2serr_lk("tid=%d: >> substituted zeros for in blk=%" PRId64
2147                            " for %d bytes\n", rep->id, rep->iblk,
2148                            rep->num_blks * rep->bs);
2149             }
2150 #if defined(__GNUC__)
2151 #if (__GNUC__ >= 7)
2152             __attribute__((fallthrough));
2153             /* FALL THROUGH */
2154 #endif
2155 #endif
2156         case 0:
2157             status = pthread_mutex_lock(&clp->in_mutex);
2158             if (0 != status) err_exit(status, "lock in_mutex");
2159             if (rep->dio_incomplete_count || rep->resid) {
2160                 clp->dio_incomplete_count += rep->dio_incomplete_count;
2161                 clp->sum_of_resids += rep->resid;
2162             }
2163             clp->in_rem_count -= rep->num_blks;
2164             status = pthread_mutex_unlock(&clp->in_mutex);
2165             if (0 != status) err_exit(status, "unlock in_mutex");
2166             return;
2167         default:
2168             pr2serr_lk("tid=%d: error finishing sg in command (%d)\n",
2169                        rep->id, res);
2170             if (exit_status <= 0)
2171                 exit_status = res;
2172             stop_both(clp);
2173             return;
2174         }
2175     }           /* end of while (1) loop */
2176 }
2177 
2178 static bool
sg_wr_swap_share(Rq_elem * rep,int to_fd,bool before)2179 sg_wr_swap_share(Rq_elem * rep, int to_fd, bool before)
2180 {
2181     bool not_first = false;
2182     int err = 0;
2183     int k;
2184     int read_side_fd = rep->infd;
2185     struct global_collection * clp = rep->clp;
2186     struct sg_extended_info sei {};
2187     struct sg_extended_info * seip = &sei;
2188 
2189     if (rep->clp->verbose > 2)
2190         pr2serr_lk("%s: tid=%d: to_fd=%d, before=%d\n", __func__, rep->id,
2191                    to_fd, (int)before);
2192     seip->sei_wr_mask |= SG_SEIM_CHG_SHARE_FD;
2193     seip->sei_rd_mask |= SG_SEIM_CHG_SHARE_FD;
2194     seip->share_fd = to_fd;
2195     if (before) {
2196         /* clear READ_SIDE_FINI bit: puts read-side in SG_RQ_SHR_SWAP state */
2197         seip->sei_wr_mask |= SG_SEIM_CTL_FLAGS;
2198         seip->sei_rd_mask |= SG_SEIM_CTL_FLAGS;
2199         seip->ctl_flags_wr_mask |= SG_CTL_FLAGM_READ_SIDE_FINI;
2200         seip->ctl_flags &= ~SG_CTL_FLAGM_READ_SIDE_FINI;/* would be 0 anyway */
2201     }
2202     for (k = 0; (ioctl(read_side_fd, SG_SET_GET_EXTENDED, seip) < 0) &&
2203                  (EBUSY == errno); ++k) {
2204         err = errno;
2205         if (k > 10000)
2206             break;
2207         if (! not_first) {
2208             if (clp->verbose > 3)
2209                 pr2serr_lk("tid=%d: ioctl(EXTENDED(change_shared_fd=%d), "
2210                            "failed errno=%d %s\n", rep->id, read_side_fd, err,
2211                            strerror(err));
2212             not_first = true;
2213         }
2214         err = 0;
2215         std::this_thread::yield();/* another thread may be able to progress */
2216     }
2217     if (err) {
2218         pr2serr_lk("tid=%d: ioctl(EXTENDED(change_shared_fd=%d), failed "
2219                    "errno=%d %s\n", rep->id, read_side_fd, err, strerror(err));
2220         return false;
2221     }
2222     if (clp->verbose > 15)
2223         pr2serr_lk("%s: tid=%d: ioctl(EXTENDED(change_shared_fd)) ok, "
2224                    "read_side_fd=%d, to_write_side_fd=%d\n", __func__, rep->id,
2225                    read_side_fd, to_fd);
2226     return true;
2227 }
2228 
2229 /* Enters this function holding out_mutex */
2230 static void
sg_out_wr_cmd(Rq_elem * rep,mrq_arr_t & def_arr,bool is_wr2,bool prefetch)2231 sg_out_wr_cmd(Rq_elem * rep, mrq_arr_t & def_arr, bool is_wr2, bool prefetch)
2232 {
2233     int res, status, pack_id, nblks;
2234     struct global_collection * clp = rep->clp;
2235     uint32_t ofsplit = clp->ofsplit;
2236     pthread_mutex_t * mutexp = is_wr2 ? &clp->out2_mutex : &clp->out_mutex;
2237     struct sg_io_extra xtr {};
2238     struct sg_io_extra * xtrp = &xtr;
2239     const char * wr_or_ver = clp->verify ? "verify" : "out";
2240 
2241     xtrp->is_wr2 = is_wr2;
2242     xtrp->prefetch = prefetch;
2243     nblks = rep->num_blks;
2244     if (rep->has_share && is_wr2)
2245         sg_wr_swap_share(rep, rep->out2fd, true);
2246 
2247     if (prefetch) {
2248 again:
2249         res = sg_start_io(rep, def_arr, pack_id, xtrp);
2250         if (1 == res)
2251             err_exit(ENOMEM, "sg starting out command");
2252         else if (res < 0) {
2253             pr2serr_lk("%ssg %s failed, blk=%" PRId64 "\n",
2254                        my_name, wr_or_ver, rep->oblk);
2255             status = pthread_mutex_unlock(mutexp);
2256             if (0 != status) err_exit(status, "unlock out_mutex");
2257             stop_both(clp);
2258             goto fini;
2259         }
2260         /* Now release in mutex to let other reads run in parallel */
2261         status = pthread_mutex_unlock(mutexp);
2262         if (0 != status) err_exit(status, "unlock out_mutex");
2263 
2264         res = sg_finish_io(rep->wr, rep, pack_id, xtrp);
2265         switch (res) {
2266         case SG_LIB_CAT_ABORTED_COMMAND:
2267         case SG_LIB_CAT_UNIT_ATTENTION:
2268             /* try again with same addr, count info */
2269             /* now re-acquire out mutex for balance */
2270             /* N.B. This re-write could now be out of write sequence */
2271             status = pthread_mutex_lock(mutexp);
2272             if (0 != status) err_exit(status, "lock out_mutex");
2273             goto again;
2274         case SG_LIB_CAT_CONDITION_MET:
2275         case 0:
2276             status = pthread_mutex_lock(mutexp);
2277             if (0 != status) err_exit(status, "unlock out_mutex");
2278             break;
2279         default:
2280             pr2serr_lk("error finishing sg prefetch command (%d)\n", res);
2281             if (exit_status <= 0)
2282                 exit_status = res;
2283             stop_both(clp);
2284             goto fini;
2285         }
2286     }
2287 
2288     /* start write (or verify) on current segment on sg device */
2289     xtrp->prefetch = false;
2290     if ((ofsplit > 0) && (rep->num_blks > (int)ofsplit)) {
2291         xtrp->dout_is_split = true;
2292         xtrp->blk_offset = 0;
2293         xtrp->blks = ofsplit;
2294         nblks = ofsplit;
2295         xtrp->hpv4_ind = 0;
2296     }
2297 split_upper:
2298     while (1) {
2299         res = sg_start_io(rep, def_arr, pack_id, xtrp);
2300         if (1 == res)
2301             err_exit(ENOMEM, "sg starting out command");
2302         else if (res < 0) {
2303             pr2serr_lk("%ssg %s failed, blk=%" PRId64 "\n", my_name,
2304                        wr_or_ver, rep->oblk);
2305             status = pthread_mutex_unlock(mutexp);
2306             if (0 != status) err_exit(status, "unlock out_mutex");
2307             stop_both(clp);
2308             goto fini;
2309         }
2310         /* Now release in mutex to let other reads run in parallel */
2311         status = pthread_mutex_unlock(mutexp);
2312         if (0 != status) err_exit(status, "unlock out_mutex");
2313 
2314         res = sg_finish_io(rep->wr, rep, pack_id, xtrp);
2315         switch (res) {
2316         case SG_LIB_CAT_ABORTED_COMMAND:
2317         case SG_LIB_CAT_UNIT_ATTENTION:
2318             /* try again with same addr, count info */
2319             /* now re-acquire out mutex for balance */
2320             /* N.B. This re-write could now be out of write sequence */
2321             status = pthread_mutex_lock(mutexp);
2322             if (0 != status) err_exit(status, "lock out_mutex");
2323             break;      /* loops around */
2324         case SG_LIB_CAT_MEDIUM_HARD:
2325             if (0 == clp->out_flags.coe) {
2326                 pr2serr_lk("error finishing sg %s command (medium)\n",
2327                            wr_or_ver);
2328                 if (exit_status <= 0)
2329                     exit_status = res;
2330                 stop_both(clp);
2331                 goto fini;
2332             } else
2333                 pr2serr_lk(">> ignored error for %s blk=%" PRId64 " for %d "
2334                            "bytes\n", wr_or_ver, rep->oblk, nblks * rep->bs);
2335 #if defined(__GNUC__)
2336 #if (__GNUC__ >= 7)
2337             __attribute__((fallthrough));
2338             /* FALL THROUGH */
2339 #endif
2340 #endif
2341         case SG_LIB_CAT_CONDITION_MET:
2342         case 0:
2343             if (! is_wr2) {
2344                 status = pthread_mutex_lock(mutexp);
2345                 if (0 != status) err_exit(status, "lock out_mutex");
2346                 if (rep->dio_incomplete_count || rep->resid) {
2347                     clp->dio_incomplete_count += rep->dio_incomplete_count;
2348                     clp->sum_of_resids += rep->resid;
2349                 }
2350                 clp->out_rem_count -= nblks;
2351                 status = pthread_mutex_unlock(mutexp);
2352                 if (0 != status) err_exit(status, "unlock out_mutex");
2353             }
2354             goto fini;
2355         case SG_LIB_CAT_MISCOMPARE:
2356             ++num_miscompare;
2357             // fall through
2358         default:
2359             pr2serr_lk("error finishing sg %s command (%d)\n", wr_or_ver,
2360                        res);
2361             if (exit_status <= 0)
2362                 exit_status = res;
2363             stop_both(clp);
2364             goto fini;
2365         }
2366     }           /* end of while (1) loop */
2367 fini:
2368     if (xtrp->dout_is_split) {  /* set up upper half of split */
2369         if ((0 == xtrp->hpv4_ind) && (rep->num_blks > (int)ofsplit)) {
2370             xtrp->hpv4_ind = 1;
2371             xtrp->blk_offset = ofsplit;
2372             xtrp->blks = rep->num_blks - ofsplit;
2373             nblks = xtrp->blks;
2374             status = pthread_mutex_lock(mutexp);
2375             if (0 != status) err_exit(status, "lock out_mutex");
2376             goto split_upper;
2377         }
2378     }
2379     if (rep->has_share && is_wr2)
2380         sg_wr_swap_share(rep, rep->outfd, false);
2381 }
2382 
2383 static int
process_mrq_response(Rq_elem * rep,const struct sg_io_v4 * ctl_v4p,const struct sg_io_v4 * a_v4p,int num_mrq,uint32_t & good_inblks,uint32_t & good_outblks)2384 process_mrq_response(Rq_elem * rep, const struct sg_io_v4 * ctl_v4p,
2385                      const struct sg_io_v4 * a_v4p, int num_mrq,
2386                      uint32_t & good_inblks, uint32_t & good_outblks)
2387 {
2388     struct global_collection * clp = rep->clp;
2389     bool ok, all_good;
2390     bool sb_in_co = !!(ctl_v4p->response);
2391     int id = rep->id;
2392     int resid = ctl_v4p->din_resid;
2393     int sres = ctl_v4p->spare_out;
2394     int n_subm = num_mrq - ctl_v4p->dout_resid;
2395     int n_cmpl = ctl_v4p->info;
2396     int n_good = 0;
2397     int hole_count = 0;
2398     int vb = clp->verbose;
2399     int k, j, f1, slen, blen;
2400     char b[160];
2401 
2402     blen = sizeof(b);
2403     good_inblks = 0;
2404     good_outblks = 0;
2405     if (vb > 2)
2406         pr2serr_lk("[thread_id=%d] %s: num_mrq=%d, n_subm=%d, n_cmpl=%d\n",
2407                    id, __func__, num_mrq, n_subm, n_cmpl);
2408     if (n_subm < 0) {
2409         pr2serr_lk("[%d] co.dout_resid(%d) > num_mrq(%d)\n", id,
2410                    ctl_v4p->dout_resid, num_mrq);
2411         return -1;
2412     }
2413     if (n_cmpl != (num_mrq - resid))
2414         pr2serr_lk("[%d] co.info(%d) != (num_mrq(%d) - co.din_resid(%d))\n"
2415                    "will use co.info\n", id, n_cmpl, num_mrq, resid);
2416     if (n_cmpl > n_subm) {
2417         pr2serr_lk("[%d] n_cmpl(%d) > n_subm(%d), use n_subm for both\n",
2418                    id, n_cmpl, n_subm);
2419         n_cmpl = n_subm;
2420     }
2421     if (sres) {
2422         pr2serr_lk("[%d] secondary error: %s [%d], info=0x%x\n", id,
2423                    strerror(sres), sres, ctl_v4p->info);
2424         if (E2BIG == sres) {
2425             sg_take_snap(rep->infd, id, true);
2426             sg_take_snap(rep->outfd, id, true);
2427         }
2428     }
2429     /* Check if those submitted have finished or not. N.B. If there has been
2430      * an error then there may be "holes" (i.e. info=0x0) in the array due
2431      * to completions being out-of-order. */
2432     for (k = 0, j = 0; ((k < num_mrq) && (j < n_subm));
2433          ++k, j += f1, ++a_v4p) {
2434         slen = a_v4p->response_len;
2435         if (! (SG_INFO_MRQ_FINI & a_v4p->info))
2436             ++hole_count;
2437         ok = true;
2438         f1 = !!(a_v4p->info);   /* want to skip n_subm count if info is 0x0 */
2439         if (SG_INFO_CHECK & a_v4p->info) {
2440             ok = false;
2441             pr2serr_lk("[%d] a_v4[%d]: SG_INFO_CHECK set [%s]\n", id, k,
2442                        sg_info_str(a_v4p->info, sizeof(b), b));
2443         }
2444         if (sg_scsi_status_is_bad(a_v4p->device_status) ||
2445             a_v4p->transport_status || a_v4p->driver_status) {
2446             ok = false;
2447             if (SAM_STAT_CHECK_CONDITION != a_v4p->device_status) {
2448                 pr2serr_lk("[%d] a_v4[%d]:\n", id, k);
2449                 if (vb)
2450                     lk_chk_n_print4("  >>", a_v4p, vb > 4);
2451             }
2452         }
2453         if (slen > 0) {
2454             struct sg_scsi_sense_hdr ssh;
2455             const uint8_t *sbp = (const uint8_t *)
2456                         (sb_in_co ? ctl_v4p->response : a_v4p->response);
2457 
2458             if (sg_scsi_normalize_sense(sbp, slen, &ssh) &&
2459                 (ssh.response_code >= 0x70)) {
2460                 if (ssh.response_code & 0x1)
2461                     ok = true;
2462                 if (vb) {
2463                     sg_get_sense_str("  ", sbp, slen, false, blen, b);
2464                     pr2serr_lk("[%d] a_v4[%d]:\n%s\n", id, k, b);
2465                 }
2466             }
2467         }
2468         if (ok && f1) {
2469             ++n_good;
2470             if (a_v4p->dout_xfer_len >= (uint32_t)rep->bs)
2471                 good_outblks += (a_v4p->dout_xfer_len - a_v4p->dout_resid) /
2472                                 rep->bs;
2473             if (a_v4p->din_xfer_len >= (uint32_t)rep->bs)
2474                 good_inblks += (a_v4p->din_xfer_len - a_v4p->din_resid) /
2475                                rep->bs;
2476         }
2477     }   /* end of request array scan loop */
2478     if ((n_subm == num_mrq) || (vb < 3))
2479         goto fini;
2480     pr2serr_lk("[%d] checking response array _beyond_ number of "
2481                "submissions [%d] to num_mrq:\n", id, k);
2482     for (all_good = true; k < num_mrq; ++k, ++a_v4p) {
2483         if (SG_INFO_MRQ_FINI & a_v4p->info) {
2484             pr2serr_lk("[%d] a_v4[%d]: unexpected SG_INFO_MRQ_FINI set [%s]\n",
2485                        id, k, sg_info_str(a_v4p->info, sizeof(b), b));
2486             all_good = false;
2487         }
2488         if (a_v4p->device_status || a_v4p->transport_status ||
2489             a_v4p->driver_status) {
2490             pr2serr_lk("[%d] a_v4[%d]:\n", id, k);
2491             lk_chk_n_print4("    ", a_v4p, vb > 4);
2492             all_good = false;
2493         }
2494     }
2495     if (all_good)
2496         pr2serr_lk("    ... all good\n");
2497 fini:
2498     return n_good;
2499 }
2500 
2501 #if 0
2502 static int
2503 chk_mrq_response(Rq_elem * rep, const struct sg_io_v4 * ctl_v4p,
2504                  const struct sg_io_v4 * a_v4p, int nrq,
2505                  uint32_t * good_inblksp, uint32_t * good_outblksp)
2506 {
2507     struct global_collection * clp = rep->clp;
2508     bool ok;
2509     int id = rep->id;
2510     int resid = ctl_v4p->din_resid;
2511     int sres = ctl_v4p->spare_out;
2512     int n_subm = nrq - ctl_v4p->dout_resid;
2513     int n_cmpl = ctl_v4p->info;
2514     int n_good = 0;
2515     int vb = clp->verbose;
2516     int k, slen, blen;
2517     uint32_t good_inblks = 0;
2518     uint32_t good_outblks = 0;
2519     const struct sg_io_v4 * a_np = a_v4p;
2520     char b[80];
2521 
2522     blen = sizeof(b);
2523     if (n_subm < 0) {
2524         pr2serr_lk("[%d] %s: co.dout_resid(%d) > nrq(%d)\n", id, __func__,
2525                    ctl_v4p->dout_resid, nrq);
2526         return -1;
2527     }
2528     if (n_cmpl != (nrq - resid))
2529         pr2serr_lk("[%d] %s: co.info(%d) != (nrq(%d) - co.din_resid(%d))\n"
2530                    "will use co.info\n", id, __func__, n_cmpl, nrq, resid);
2531     if (n_cmpl > n_subm) {
2532         pr2serr_lk("[%d] %s: n_cmpl(%d) > n_subm(%d), use n_subm for both\n",
2533                    id, __func__, n_cmpl, n_subm);
2534         n_cmpl = n_subm;
2535     }
2536     if (sres) {
2537         pr2serr_lk("[%d] %s: secondary error: %s [%d], info=0x%x\n", id,
2538                    __func__, strerror(sres), sres, ctl_v4p->info);
2539         if (E2BIG == sres) {
2540             sg_take_snap(rep->infd, id, true);
2541             sg_take_snap(rep->outfd, id, true);
2542         }
2543     }
2544     /* Check if those submitted have finished or not */
2545     for (k = 0; k < n_subm; ++k, ++a_np) {
2546         slen = a_np->response_len;
2547         if (! (SG_INFO_MRQ_FINI & a_np->info)) {
2548             pr2serr_lk("[%d] %s, a_n[%d]: missing SG_INFO_MRQ_FINI [%s]\n",
2549                        id, __func__, k, sg_info_str(a_np->info, blen, b));
2550             v4hdr_out_lk("a_np", a_np, id);
2551             v4hdr_out_lk("cop", ctl_v4p, id);
2552         }
2553         ok = true;
2554         if (SG_INFO_CHECK & a_np->info) {
2555             ok = false;
2556             pr2serr_lk("[%d] a_n[%d]: SG_INFO_CHECK set [%s]\n", id, k,
2557                        sg_info_str(a_np->info, sizeof(b), b));
2558         }
2559         if (sg_scsi_status_is_bad(a_np->device_status) ||
2560             a_np->transport_status || a_np->driver_status) {
2561             ok = false;
2562             if (SAM_STAT_CHECK_CONDITION != a_np->device_status) {
2563                 pr2serr_lk("[%d] %s, a_n[%d]:\n", id, __func__, k);
2564                 if (vb)
2565                     lk_chk_n_print4("  >>", a_np, false);
2566             }
2567         }
2568         if (slen > 0) {
2569             struct sg_scsi_sense_hdr ssh;
2570             const uint8_t *sbp = (const uint8_t *)a_np->response;
2571 
2572             if (sg_scsi_normalize_sense(sbp, slen, &ssh) &&
2573                 (ssh.response_code >= 0x70)) {
2574                 char b[256];
2575 
2576                 if (ssh.response_code & 0x1)
2577                     ok = true;
2578                 if (vb) {
2579                     sg_get_sense_str("  ", sbp, slen, false, sizeof(b), b);
2580                     pr2serr_lk("[%d] %s, a_n[%d]:\n%s\n", id, __func__, k, b);
2581                 }
2582             }
2583         }
2584         if (ok) {
2585             ++n_good;
2586             if (a_np->dout_xfer_len >= (uint32_t)rep->bs)
2587                 good_outblks += (a_np->dout_xfer_len - a_np->dout_resid) /
2588                                 rep->bs;
2589             if (a_np->din_xfer_len >= (uint32_t)rep->bs)
2590                 good_inblks += (a_np->din_xfer_len - a_np->din_resid) /
2591                                rep->bs;
2592         }
2593     }
2594     if ((n_subm == nrq) || (vb < 3))
2595         goto fini;
2596     pr2serr_lk("[%d] %s: checking response array beyond number of "
2597                "submissions:\n", id, __func__);
2598     for (k = n_subm; k < nrq; ++k, ++a_np) {
2599         if (SG_INFO_MRQ_FINI & a_np->info)
2600             pr2serr_lk("[%d] %s, a_n[%d]: unexpected SG_INFO_MRQ_FINI set\n",
2601                        id, __func__, k);
2602         if (a_np->device_status || a_np->transport_status ||
2603             a_np->driver_status) {
2604             pr2serr_lk("[%d] %s, a_n[%d]:\n", id, __func__, k);
2605             lk_chk_n_print4("    ", a_np, vb > 4);
2606         }
2607     }
2608 fini:
2609     if (good_inblksp)
2610         *good_inblksp = good_inblks;
2611     if (good_outblksp)
2612         *good_outblksp = good_outblks;
2613     return n_good;
2614 }
2615 #endif
2616 
2617 /* do mrq 'full non-blocking' invocation so both submission and completion
2618  * is async (i.e. uses SGV4_FLAG_IMMED flag). This type of mrq is
2619  * restricted to a single file descriptor (i.e. the 'fd' argument). */
2620 static int
sgh_do_async_mrq(Rq_elem * rep,mrq_arr_t & def_arr,int fd,struct sg_io_v4 * ctlop,int nrq)2621 sgh_do_async_mrq(Rq_elem * rep, mrq_arr_t & def_arr, int fd,
2622                  struct sg_io_v4 * ctlop, int nrq)
2623 {
2624     int half = nrq / 2;
2625     int k, res, nwait, half_num, rest, err, num_good, b_len;
2626     const int64_t wait_us = 10;
2627     uint32_t in_fin_blks, out_fin_blks;
2628     struct sg_io_v4 * a_v4p;
2629     struct sg_io_v4 hold_ctlo;
2630     struct global_collection * clp = rep->clp;
2631     char b[80];
2632 
2633     hold_ctlo = *ctlop;
2634     b_len = sizeof(b);
2635     a_v4p = def_arr.first.data();
2636     ctlop->flags = SGV4_FLAG_MULTIPLE_REQS;
2637     if (clp->in_flags.polled || clp->out_flags.polled) {
2638         /* submit of full non-blocking with POLLED */
2639         ctlop->flags |= (SGV4_FLAG_IMMED | SGV4_FLAG_POLLED);
2640         if (!after1 && (clp->verbose > 1)) {
2641             after1 = true;
2642             pr2serr_lk("%s: %s\n", __func__, mrq_s_nb_s);
2643         }
2644     } else {
2645         ctlop->flags |= SGV4_FLAG_IMMED;        /* submit non-blocking */
2646         if (!after1 && (clp->verbose > 1)) {
2647             after1 = true;
2648             pr2serr_lk("%s: %s\n", __func__, mrq_s_nb_s);
2649         }
2650     }
2651     if (clp->verbose > 4) {
2652         pr2serr_lk("%s: Controlling object _before_ ioctl(SG_IOSUBMIT):\n",
2653                    __func__);
2654         if (clp->verbose > 5)
2655             hex2stderr_lk((const uint8_t *)ctlop, sizeof(*ctlop), 1);
2656         v4hdr_out_lk("Controlling object before", ctlop, rep->id);
2657     }
2658     res = ioctl(fd, SG_IOSUBMIT, ctlop);
2659     if (res < 0) {
2660         err = errno;
2661         if (E2BIG == err)
2662             sg_take_snap(fd, rep->id, true);
2663         pr2serr_lk("%s: ioctl(SG_IOSUBMIT, %s)-->%d, errno=%d: %s\n", __func__,
2664                    sg_flags_str(ctlop->flags, b_len, b), res, err,
2665                    strerror(err));
2666         return -1;
2667     }
2668     /* fetch first half */
2669     for (k = 0; k < 100000; ++k) {
2670         ++num_waiting_calls;
2671         res = ioctl(fd, SG_GET_NUM_WAITING, &nwait);
2672         if (res < 0) {
2673             err = errno;
2674             pr2serr_lk("%s: ioctl(SG_GET_NUM_WAITING)-->%d, errno=%d: %s\n",
2675                        __func__, res, err, strerror(err));
2676             return -1;
2677         }
2678         if (nwait >= half)
2679             break;
2680         this_thread::sleep_for(chrono::microseconds{wait_us});
2681     }
2682     ctlop->flags = (SGV4_FLAG_MULTIPLE_REQS | SGV4_FLAG_IMMED);
2683     res = ioctl(fd, SG_IORECEIVE, ctlop);
2684     if (res < 0) {
2685         err = errno;
2686         if (ENODATA != err) {
2687             pr2serr_lk("%s: ioctl(SG_IORECEIVE, %s),1-->%d, errno=%d: %s\n",
2688                        __func__, sg_flags_str(ctlop->flags, b_len, b), res,
2689                        err, strerror(err));
2690             return -1;
2691         }
2692         half_num = 0;
2693     } else
2694         half_num = ctlop->info;
2695     if (clp->verbose > 4) {
2696         pr2serr_lk("%s: Controlling object output by ioctl(SG_IORECEIVE),1: "
2697                    "num_received=%d\n", __func__, half_num);
2698         if (clp->verbose > 5)
2699             hex2stderr_lk((const uint8_t *)ctlop, sizeof(*ctlop), 1);
2700         v4hdr_out_lk("Controlling object after", ctlop, rep->id);
2701         if (clp->verbose > 5) {
2702             for (k = 0; k < half_num; ++k) {
2703                 pr2serr_lk("AFTER: def_arr[%d]:\n", k);
2704                 v4hdr_out_lk("normal v4 object", (a_v4p + k), rep->id);
2705                 // hex2stderr_lk((const uint8_t *)(a_v4p + k), sizeof(*a_v4p),
2706                                   // 1);
2707             }
2708         }
2709     }
2710     in_fin_blks = 0;
2711     out_fin_blks = 0;
2712     num_good = process_mrq_response(rep, ctlop, a_v4p, half_num, in_fin_blks,
2713                                     out_fin_blks);
2714     if (clp->verbose > 2)
2715         pr2serr_lk("%s: >>>1 num_good=%d, in_q/fin blks=%u/%u;  out_q/fin "
2716                    "blks=%u/%u\n", __func__, num_good, rep->in_mrq_q_blks,
2717                    in_fin_blks, rep->out_mrq_q_blks, out_fin_blks);
2718 
2719     if (num_good < 0)
2720         res = -1;
2721     else if (num_good < half_num) {
2722         int resid_blks = rep->in_mrq_q_blks - in_fin_blks;
2723 
2724         if (resid_blks > 0)
2725             gcoll.in_rem_count += resid_blks;
2726         resid_blks = rep->out_mrq_q_blks - out_fin_blks;
2727         if (resid_blks > 0)
2728             gcoll.out_rem_count += resid_blks;
2729 
2730         return -1;
2731     }
2732 
2733     rest = nrq - half_num;
2734     if (rest < 1)
2735         goto fini;
2736     /* fetch remaining */
2737     for (k = 0; k < 100000; ++k) {
2738         ++num_waiting_calls;
2739         res = ioctl(fd, SG_GET_NUM_WAITING, &nwait);
2740         if (res < 0) {
2741             pr2serr_lk("%s: ioctl(SG_GET_NUM_WAITING)-->%d, errno=%d: %s\n",
2742                        __func__, res, errno, strerror(errno));
2743             return -1;
2744         }
2745         if (nwait >= rest)
2746             break;
2747         this_thread::sleep_for(chrono::microseconds{wait_us});
2748     }
2749     ctlop = &hold_ctlo;
2750     ctlop->din_xferp += (half_num * sizeof(struct sg_io_v4));
2751     ctlop->din_xfer_len -= (half_num * sizeof(struct sg_io_v4));
2752     ctlop->dout_xferp = ctlop->din_xferp;
2753     ctlop->dout_xfer_len = ctlop->din_xfer_len;
2754     ctlop->flags = (SGV4_FLAG_MULTIPLE_REQS | SGV4_FLAG_IMMED);
2755     res = ioctl(fd, SG_IORECEIVE, ctlop);
2756     if (res < 0) {
2757         err = errno;
2758         if (ENODATA != err) {
2759             pr2serr_lk("%s: ioctl(SG_IORECEIVE, %s),2-->%d, errno=%d: %s\n",
2760                        __func__, sg_flags_str(ctlop->flags, b_len, b), res,
2761                        err, strerror(err));
2762             return -1;
2763         }
2764         half_num = 0;
2765     } else
2766         half_num = ctlop->info;
2767     if (clp->verbose > 4) {
2768         pr2serr_lk("%s: Controlling object output by ioctl(SG_IORECEIVE),2: "
2769                    "num_received=%d\n", __func__, half_num);
2770         if (clp->verbose > 5)
2771             hex2stderr_lk((const uint8_t *)ctlop, sizeof(*ctlop), 1);
2772         v4hdr_out_lk("Controlling object after", ctlop, rep->id);
2773         if (clp->verbose > 5) {
2774             for (k = 0; k < half_num; ++k) {
2775                 pr2serr_lk("AFTER: def_arr[%d]:\n", k);
2776                 v4hdr_out_lk("normal v4 object", (a_v4p + k), rep->id);
2777                 // hex2stderr_lk((const uint8_t *)(a_v4p + k), sizeof(*a_v4p),
2778                                   // 1);
2779             }
2780         }
2781     }
2782     in_fin_blks = 0;
2783     out_fin_blks = 0;
2784     num_good = process_mrq_response(rep, ctlop, a_v4p, half_num, in_fin_blks,
2785                                     out_fin_blks);
2786     if (clp->verbose > 2)
2787         pr2serr_lk("%s: >>>2 num_good=%d, in_q/fin blks=%u/%u;  out_q/fin "
2788                    "blks=%u/%u\n", __func__, num_good, rep->in_mrq_q_blks,
2789                    in_fin_blks, rep->out_mrq_q_blks, out_fin_blks);
2790 
2791     if (num_good < 0)
2792         res = -1;
2793     else if (num_good < half_num) {
2794         int resid_blks = rep->in_mrq_q_blks - in_fin_blks;
2795 
2796         if (resid_blks > 0)
2797             gcoll.in_rem_count += resid_blks;
2798         resid_blks = rep->out_mrq_q_blks - out_fin_blks;
2799         if (resid_blks > 0)
2800             gcoll.out_rem_count += resid_blks;
2801 
2802         res = -1;
2803     }
2804 
2805 fini:
2806     return res;
2807 }
2808 
2809 /* Split def_arr into fd_def_arr and o_fd_arr based on whether each element's
2810  * flags field has SGV4_FLAG_DO_ON_OTHER set. If it is set place in
2811  * o_fd_def_arr and mask out SGV4_DO_ON_OTHER. Returns number of elements
2812  * in o_fd_def_arr. */
2813 static int
split_def_arr(const mrq_arr_t & def_arr,mrq_arr_t & fd_def_arr,mrq_arr_t & o_fd_def_arr)2814 split_def_arr(const mrq_arr_t & def_arr, mrq_arr_t & fd_def_arr,
2815               mrq_arr_t & o_fd_def_arr)
2816 {
2817     int nrq, k;
2818     int res = 0;
2819     const struct sg_io_v4 * a_v4p;
2820 
2821     a_v4p = def_arr.first.data();
2822     nrq = def_arr.first.size();
2823 
2824     for (k = 0; k < nrq; ++k) {
2825         int flags;
2826         const struct sg_io_v4 * h4p = a_v4p + k;
2827 
2828         flags = h4p->flags;
2829         if (flags & SGV4_FLAG_DO_ON_OTHER) {
2830             o_fd_def_arr.first.push_back(def_arr.first[k]);
2831             o_fd_def_arr.second.push_back(def_arr.second[k]);
2832             flags &= ~SGV4_FLAG_DO_ON_OTHER;    /* mask out DO_ON_OTHER */
2833             o_fd_def_arr.first[res].flags = flags;
2834             ++res;
2835         } else {
2836             fd_def_arr.first.push_back(def_arr.first[k]);
2837             fd_def_arr.second.push_back(def_arr.second[k]);
2838         }
2839     }
2840     return res;
2841 }
2842 
2843 /* This function sets up a multiple request (mrq) transaction and sends it
2844  * to the pass-through. Returns 0 on success, 1 if ENOMEM error else -1 for
2845  * other errors. */
2846 static int
sgh_do_deferred_mrq(Rq_elem * rep,mrq_arr_t & def_arr)2847 sgh_do_deferred_mrq(Rq_elem * rep, mrq_arr_t & def_arr)
2848 {
2849     bool launch_mrq_abort = false;
2850     int nrq, k, res, fd, mrq_pack_id, status, id, num_good, b_len;
2851     uint32_t in_fin_blks, out_fin_blks;
2852     const int max_cdb_sz = 16;
2853     struct sg_io_v4 * a_v4p;
2854     struct sg_io_v4 ctl_v4 {};
2855     uint8_t * cmd_ap = NULL;
2856     struct global_collection * clp = rep->clp;
2857     const char * iosub_str = "iosub_str";
2858     char b[80];
2859 
2860     id = rep->id;
2861     b_len = sizeof(b);
2862     ctl_v4.guard = 'Q';
2863     a_v4p = def_arr.first.data();
2864     nrq = def_arr.first.size();
2865     if (nrq < 1) {
2866         pr2serr_lk("[%d] %s: strange nrq=0, nothing to do\n", id, __func__);
2867         return 0;
2868     }
2869     if (clp->mrq_cmds) {
2870         cmd_ap = (uint8_t *)calloc(nrq, max_cdb_sz);
2871         if (NULL == cmd_ap) {
2872             pr2serr_lk("[%d] %s: no memory for calloc(%d * 16)\n", id,
2873                        __func__, nrq);
2874             return 1;
2875         }
2876     }
2877     for (k = 0; k < nrq; ++k) {
2878         struct sg_io_v4 * h4p = a_v4p + k;
2879         uint8_t *cmdp = &def_arr.second[k].front();
2880 
2881         if (clp->mrq_cmds) {
2882             memcpy(cmd_ap + (k * max_cdb_sz), cmdp, h4p->request_len);
2883             h4p->request = 0;
2884         } else
2885             h4p->request = (uint64_t)cmdp;
2886         if (clp->verbose > 5) {
2887             pr2serr_lk("%s%s[%d] def_arr[%d]", ((0 == k) ? __func__ : ""),
2888                        ((0 == k) ? ": " : ""), id, k);
2889             if (h4p->din_xferp)
2890                 pr2serr_lk(" [din=0x%p]:\n", (void *)h4p->din_xferp);
2891             else if (h4p->dout_xferp)
2892                 pr2serr_lk(" [dout=0x%p]:\n", (void *)h4p->dout_xferp);
2893             else
2894                 pr2serr_lk(":\n");
2895             hex2stderr_lk((const uint8_t *)h4p, sizeof(*h4p), 1);
2896         }
2897     }
2898     if (rep->both_sg || rep->same_sg)
2899         fd = rep->infd;         /* assume share to rep->outfd */
2900     else if (rep->only_in_sg)
2901         fd = rep->infd;
2902     else if (rep->only_out_sg)
2903         fd = rep->outfd;
2904     else {
2905         pr2serr_lk("[%d] %s: why am I here? No sg devices\n", id, __func__);
2906         res = -1;
2907         goto fini;
2908     }
2909     res = 0;
2910     if (clp->mrq_cmds) {
2911         ctl_v4.request_len = nrq * max_cdb_sz;
2912         ctl_v4.request = (uint64_t)cmd_ap;
2913     }
2914     ctl_v4.flags = SGV4_FLAG_MULTIPLE_REQS;
2915     if (! clp->mrq_async) {
2916         ctl_v4.flags |= SGV4_FLAG_STOP_IF;
2917         if (clp->in_flags.mrq_svb || clp->out_flags.mrq_svb)
2918             ctl_v4.flags |= SGV4_FLAG_SHARE;
2919     }
2920     ctl_v4.dout_xferp = (uint64_t)a_v4p;        /* request array */
2921     ctl_v4.dout_xfer_len = nrq * sizeof(*a_v4p);
2922     ctl_v4.din_xferp = (uint64_t)a_v4p;         /* response array */
2923     ctl_v4.din_xfer_len = nrq * sizeof(*a_v4p);
2924     mrq_pack_id = atomic_fetch_add(&mono_mrq_id, 1);
2925     if ((clp->m_aen > 0) && (MONO_MRQ_ID_INIT != mrq_pack_id) &&
2926         (0 == ((mrq_pack_id - MONO_MRQ_ID_INIT) % clp->m_aen))) {
2927         launch_mrq_abort = true;
2928         if (clp->verbose > 2)
2929             pr2serr_lk("[%d] %s: Decide to launch MRQ abort thread, "
2930                        "mrq_id=%d\n", id, __func__, mrq_pack_id);
2931         memset(&rep->mai, 0, sizeof(rep->mai));
2932         rep->mai.from_tid = id;
2933         rep->mai.mrq_id = mrq_pack_id;
2934         rep->mai.fd = fd;
2935         rep->mai.debug = clp->verbose;
2936 
2937         status = pthread_create(&rep->mrq_abort_thread_id, NULL,
2938                                 mrq_abort_thread, (void *)&rep->mai);
2939         if (0 != status) err_exit(status, "pthread_create, sig...");
2940     }
2941     ctl_v4.request_extra = launch_mrq_abort ? mrq_pack_id : 0;
2942     rep->mrq_id = mrq_pack_id;
2943     if (clp->verbose && rep->both_sg && clp->mrq_async)
2944         iosub_str = "SG_IOSUBMIT(variable)";
2945     if (clp->verbose > 4) {
2946         pr2serr_lk("%s: Controlling object _before_ ioctl(%s):\n",
2947                    __func__, iosub_str);
2948         if (clp->verbose > 5)
2949             hex2stderr_lk((const uint8_t *)&ctl_v4, sizeof(ctl_v4), 1);
2950         v4hdr_out_lk("Controlling object before", &ctl_v4, id);
2951     }
2952     if (clp->mrq_async && (! rep->both_sg)) {
2953         /* do 'submit non-blocking' or 'full non-blocking' mrq */
2954         mrq_arr_t fd_def_arr;
2955         mrq_arr_t o_fd_def_arr;
2956 
2957         /* need to deconstruct def_arr[] into two separate lists, one for
2958          * the source, the other for the destination. */
2959         int o_num_fd = split_def_arr(def_arr, fd_def_arr, o_fd_def_arr);
2960         int num_fd = fd_def_arr.first.size();
2961         if (num_fd > 0) {
2962             struct sg_io_v4 fd_ctl = ctl_v4;
2963             struct sg_io_v4 * aa_v4p = fd_def_arr.first.data();
2964 
2965             for (k = 0; k < num_fd; ++k) {
2966                 struct sg_io_v4 * h4p = aa_v4p + k;
2967                 uint8_t *cmdp = &fd_def_arr.second[k].front();
2968 
2969                 if (clp->mrq_cmds) {
2970                     memcpy(cmd_ap + (k * max_cdb_sz), cmdp, h4p->request_len);
2971                     h4p->request = 0;
2972                 } else
2973                     h4p->request = (uint64_t)cmdp;
2974                 if (clp->verbose > 5) {
2975                     pr2serr_lk("[%d] df_def_arr[%d]:\n", id, k);
2976                     hex2stderr_lk((const uint8_t *)(aa_v4p + k),
2977                                   sizeof(*aa_v4p), 1);
2978                 }
2979             }
2980             fd_ctl.dout_xferp = (uint64_t)aa_v4p;        /* request array */
2981             fd_ctl.dout_xfer_len = num_fd * sizeof(*aa_v4p);
2982             fd_ctl.din_xferp = (uint64_t)aa_v4p;         /* response array */
2983             fd_ctl.din_xfer_len = num_fd * sizeof(*aa_v4p);
2984             fd_ctl.request_extra = launch_mrq_abort ? mrq_pack_id : 0;
2985             /* this is the source side mrq command */
2986             res = sgh_do_async_mrq(rep, fd_def_arr, fd, &fd_ctl, num_fd);
2987             rep->in_mrq_q_blks = 0;
2988             if (res)
2989                 goto fini;
2990         }
2991         if (o_num_fd > 0) {
2992             struct sg_io_v4 o_fd_ctl = ctl_v4;
2993             struct sg_io_v4 * aa_v4p = o_fd_def_arr.first.data();
2994 
2995             for (k = 0; k < o_num_fd; ++k) {
2996                 struct sg_io_v4 * h4p = aa_v4p + k;
2997                 uint8_t *cmdp = &o_fd_def_arr.second[k].front();
2998 
2999                 if (clp->mrq_cmds) {
3000                     memcpy(cmd_ap + (k * max_cdb_sz), cmdp, h4p->request_len);
3001                     h4p->request = 0;
3002                 } else
3003                     h4p->request = (uint64_t)cmdp;
3004                 if (clp->verbose > 5) {
3005                     pr2serr_lk("[%d] o_fd_def_arr[%d]:\n", id, k);
3006                     hex2stderr_lk((const uint8_t *)(aa_v4p + k),
3007                                   sizeof(*aa_v4p), 1);
3008                 }
3009             }
3010             o_fd_ctl.dout_xferp = (uint64_t)aa_v4p;     /* request array */
3011             o_fd_ctl.dout_xfer_len = o_num_fd * sizeof(*aa_v4p);
3012             o_fd_ctl.din_xferp = (uint64_t)aa_v4p;      /* response array */
3013             o_fd_ctl.din_xfer_len = o_num_fd * sizeof(*aa_v4p);
3014             o_fd_ctl.request_extra = launch_mrq_abort ? mrq_pack_id : 0;
3015             /* this is the destination side mrq command */
3016             res = sgh_do_async_mrq(rep, o_fd_def_arr, rep->outfd, &o_fd_ctl,
3017                                    o_num_fd);
3018             rep->out_mrq_q_blks = 0;
3019         }
3020         goto fini;
3021     }
3022 
3023 try_again:
3024     if (clp->unbalanced_mrq) {
3025         iosub_str = "SG_IOSUBMIT(variable_blocking)";
3026         if (!after1 && (clp->verbose > 1)) {
3027             after1 = true;
3028             pr2serr_lk("%s: unbalanced %s\n", __func__, mrq_vb_s);
3029         }
3030         res = ioctl(fd, SG_IOSUBMIT, &ctl_v4);
3031     } else {
3032         if (clp->mrq_async) {
3033             iosub_str = "SG_IOSUBMIT(variable_blocking)";
3034             if (!after1 && (clp->verbose > 1)) {
3035                 after1 = true;
3036                 pr2serr_lk("%s: %s\n", __func__, mrq_vb_s);
3037             }
3038             res = ioctl(fd, SG_IOSUBMIT, &ctl_v4);
3039         } else if (clp->in_flags.mrq_svb || clp->out_flags.mrq_svb) {
3040             iosub_str = "SG_IOSUBMIT(shared_variable_blocking)";
3041             if (!after1 && (clp->verbose > 1)) {
3042                 after1 = true;
3043                 pr2serr_lk("%s: %s\n", __func__, mrq_svb_s);
3044             }
3045             res = ioctl(fd, SG_IOSUBMIT, &ctl_v4);
3046         } else {
3047             iosub_str = "SG_IO(ordered_blocking)";
3048             if (!after1 && (clp->verbose > 1)) {
3049                 after1 = true;
3050                 pr2serr_lk("%s: %s\n", __func__, mrq_blk_s);
3051             }
3052             res = ioctl(fd, SG_IO, &ctl_v4);
3053         }
3054     }
3055     if (res < 0) {
3056         int err = errno;
3057 
3058         if (E2BIG == err)
3059                 sg_take_snap(fd, id, true);
3060         else if (EBUSY == err) {
3061             ++num_ebusy;
3062             std::this_thread::yield();/* allow another thread to progress */
3063             goto try_again;
3064         }
3065         pr2serr_lk("%s: ioctl(%s, %s)-->%d, errno=%d: %s\n",
3066                    __func__, iosub_str, sg_flags_str(ctl_v4.flags, b_len, b),
3067                    res, err, strerror(err));
3068         res = -1;
3069         goto fini;
3070     }
3071     if (clp->verbose && vb_first_time.load()) {
3072         pr2serr_lk("First controlling object output by ioctl(%s), flags: "
3073                    "%s\n", iosub_str, sg_flags_str(ctl_v4.flags, b_len, b));
3074         vb_first_time.store(false);
3075     } else if (clp->verbose > 4)
3076         pr2serr_lk("%s: Controlling object output by ioctl(%s):\n",
3077                    __func__, iosub_str);
3078     if (clp->verbose > 4) {
3079         if (clp->verbose > 5)
3080             hex2stderr_lk((const uint8_t *)&ctl_v4, sizeof(ctl_v4), 1);
3081         v4hdr_out_lk("Controlling object after", &ctl_v4, id);
3082         if (clp->verbose > 5) {
3083             for (k = 0; k < nrq; ++k) {
3084                 pr2serr_lk("AFTER: def_arr[%d]:\n", k);
3085                 v4hdr_out_lk("normal v4 object", (a_v4p + k), id);
3086                 // hex2stderr_lk((const uint8_t *)(a_v4p + k), sizeof(*a_v4p),
3087                                   // 1);
3088             }
3089         }
3090     }
3091     in_fin_blks = 0;
3092     out_fin_blks = 0;
3093     num_good = process_mrq_response(rep, &ctl_v4, a_v4p, nrq, in_fin_blks,
3094                                     out_fin_blks);
3095     if (clp->verbose > 2)
3096         pr2serr_lk("%s: >>> num_good=%d, in_q/fin blks=%u/%u;  out_q/fin "
3097                    "blks=%u/%u\n", __func__, num_good, rep->in_mrq_q_blks,
3098                    in_fin_blks, rep->out_mrq_q_blks, out_fin_blks);
3099 
3100     if (num_good < 0)
3101         res = -1;
3102     else if (num_good < nrq) {
3103         int resid_blks = rep->in_mrq_q_blks - in_fin_blks;
3104 
3105         if (resid_blks > 0)
3106             gcoll.in_rem_count += resid_blks;
3107         resid_blks = rep->out_mrq_q_blks - out_fin_blks;
3108         if (resid_blks > 0)
3109             gcoll.out_rem_count += resid_blks;
3110 
3111         res = -1;
3112     }
3113     rep->in_mrq_q_blks = 0;
3114     rep->out_mrq_q_blks = 0;
3115 fini:
3116     def_arr.first.clear();
3117     def_arr.second.clear();
3118     if (cmd_ap)
3119         free(cmd_ap);
3120     if (launch_mrq_abort) {
3121         if (clp->verbose > 1)
3122             pr2serr_lk("[%d] %s: About to join MRQ abort thread, "
3123                        "mrq_id=%d\n", id, __func__, mrq_pack_id);
3124 
3125         void * vp;      /* not used */
3126         status = pthread_join(rep->mrq_abort_thread_id, &vp);
3127         if (0 != status) err_exit(status, "pthread_join");
3128     }
3129     return res;
3130 }
3131 
3132 /* Returns 0 on success, 1 if ENOMEM error else -1 for other errors. */
3133 static int
sg_start_io(Rq_elem * rep,mrq_arr_t & def_arr,int & pack_id,struct sg_io_extra * xtrp)3134 sg_start_io(Rq_elem * rep, mrq_arr_t & def_arr, int & pack_id,
3135             struct sg_io_extra *xtrp)
3136 {
3137     struct global_collection * clp = rep->clp;
3138     bool wr = rep->wr;
3139     bool fua = wr ? clp->out_flags.fua : clp->in_flags.fua;
3140     bool dpo = wr ? clp->out_flags.dpo : clp->in_flags.dpo;
3141     bool dio = wr ? clp->out_flags.dio : clp->in_flags.dio;
3142     bool mmap = wr ? clp->out_flags.mmap : clp->in_flags.mmap;
3143     bool noxfer = wr ? clp->out_flags.noxfer : clp->in_flags.noxfer;
3144     bool v4 = wr ? clp->out_flags.v4 : clp->in_flags.v4;
3145     bool qhead = wr ? clp->out_flags.qhead : clp->in_flags.qhead;
3146     bool qtail = wr ? clp->out_flags.qtail : clp->in_flags.qtail;
3147     bool polled = wr ? clp->out_flags.polled : clp->in_flags.polled;
3148     bool mout_if = wr ? clp->out_flags.mout_if : clp->in_flags.mout_if;
3149     bool prefetch = xtrp ? xtrp->prefetch : false;
3150     bool is_wr2 = xtrp ? xtrp->is_wr2 : false;
3151     int cdbsz = wr ? clp->cdbsz_out : clp->cdbsz_in;
3152     int flags = 0;
3153     int res, err, fd, b_len, nblks, blk_off;
3154     int64_t blk = wr ? rep->oblk : rep->iblk;
3155     struct sg_io_hdr * hp = &rep->io_hdr;
3156     struct sg_io_v4 * h4p = &rep->io_hdr4[xtrp ? xtrp->hpv4_ind : 0];
3157     const char * cp = "";
3158     const char * crwp;
3159     char b[80];
3160 
3161     b_len = sizeof(b);
3162     if (wr) {
3163         fd = is_wr2 ? rep->out2fd : rep->outfd;
3164         if (clp->verify) {
3165             crwp = is_wr2 ? "verifying2" : "verifying";
3166             if (prefetch)
3167                 crwp = is_wr2 ? "prefetch2" : "prefetch";
3168         } else
3169             crwp = is_wr2 ? "writing2" : "writing";
3170     } else {
3171         fd = rep->infd;
3172         crwp = "reading";
3173     }
3174     if (qhead)
3175         qtail = false;          /* qhead takes precedence */
3176 
3177     if (v4 && xtrp && xtrp->dout_is_split) {
3178         res = sg_build_scsi_cdb(rep->cmd, cdbsz, xtrp->blks,
3179                                 blk + (unsigned int)xtrp->blk_offset,
3180                                 clp->verify, true, fua, dpo);
3181     } else
3182         res = sg_build_scsi_cdb(rep->cmd, cdbsz, rep->num_blks, blk,
3183                                 wr ? clp->verify : false, wr, fua, dpo);
3184     if (res) {
3185         pr2serr_lk("%sbad cdb build, start_blk=%" PRId64 ", blocks=%d\n",
3186                    my_name, blk, rep->num_blks);
3187         return -1;
3188     }
3189     if (prefetch) {
3190         if (cdbsz == 10)
3191             rep->cmd[0] = SGP_PRE_FETCH10;
3192         else if (cdbsz == 16)
3193             rep->cmd[0] = SGP_PRE_FETCH16;
3194         else {
3195             pr2serr_lk("%sbad PRE-FETCH build, start_blk=%" PRId64 ", "
3196                        "blocks=%d\n", my_name, blk, rep->num_blks);
3197             return -1;
3198         }
3199         rep->cmd[1] = 0x2;      /* set IMMED (no fua or dpo) */
3200     }
3201     if (mmap && (clp->noshare || (rep->outregfd >= 0)))
3202         flags |= SG_FLAG_MMAP_IO;
3203     if (noxfer)
3204         flags |= SG_FLAG_NO_DXFER;
3205     if (dio)
3206         flags |= SG_FLAG_DIRECT_IO;
3207     if (polled)
3208         flags |= SGV4_FLAG_POLLED;
3209     if (qhead)
3210         flags |= SG_FLAG_Q_AT_HEAD;
3211     if (qtail)
3212         flags |= SG_FLAG_Q_AT_TAIL;
3213     if (mout_if)
3214         flags |= SGV4_FLAG_META_OUT_IF;
3215     if (rep->has_share) {
3216         flags |= SGV4_FLAG_SHARE;
3217         if (wr)
3218             flags |= SGV4_FLAG_NO_DXFER;
3219         else if (rep->outregfd < 0)
3220             flags |= SGV4_FLAG_NO_DXFER;
3221 
3222         cp = (wr ? " write_side active" : " read_side active");
3223     } else
3224         cp = (wr ? " write-side not sharing" : " read_side not sharing");
3225     if (rep->both_sg) {
3226         if (wr)
3227             pack_id = rep->rd_p_id + 1;
3228         else {
3229             pack_id = 2 * atomic_fetch_add(&mono_pack_id, 1);
3230             rep->rd_p_id = pack_id;
3231         }
3232     } else
3233         pack_id = atomic_fetch_add(&mono_pack_id, 1);    /* fetch before */
3234     rep->rq_id = pack_id;
3235     nblks = rep->num_blks;
3236     blk_off = 0;
3237     if (clp->verbose && 0 == clp->nmrqs && vb_first_time.load()) {
3238         vb_first_time.store(false);
3239         pr2serr("First normal IO: %s, flags: %s\n", cp,
3240                 sg_flags_str(flags, b_len, b));
3241     }
3242     if (v4) {
3243         memset(h4p, 0, sizeof(struct sg_io_v4));
3244         if (clp->nmrqs > 0) {
3245             if (rep->both_sg && (rep->outfd == fd))
3246                 flags |= SGV4_FLAG_DO_ON_OTHER;
3247         }
3248         if (xtrp && xtrp->dout_is_split && (nblks > 0)) {
3249             if (1 == xtrp->hpv4_ind) {
3250                 flags |= SGV4_FLAG_DOUT_OFFSET;
3251                 blk_off = xtrp->blk_offset;
3252                 h4p->spare_in = clp->bs * blk_off;
3253             }
3254             nblks = xtrp->blks;
3255             if ((0 == xtrp->hpv4_ind) && (nblks < rep->num_blks))
3256                 flags |= SGV4_FLAG_KEEP_SHARE;
3257         }
3258         if (clp->ofile2_given && wr && rep->has_share && ! is_wr2)
3259             flags |= SGV4_FLAG_KEEP_SHARE; /* set on first write only */
3260         else if (clp->fail_mask & 1)
3261             flags |= SGV4_FLAG_KEEP_SHARE; /* troublemaking .... */
3262     } else
3263         memset(hp, 0, sizeof(struct sg_io_hdr));
3264     if (clp->verbose > 3) {
3265         bool lock = true;
3266         char prefix[128];
3267 
3268         if (4 == clp->verbose) {
3269             snprintf(prefix, sizeof(prefix), "tid,rq_id=%d,%d: ", rep->id,
3270                      pack_id);
3271             lock = false;
3272         } else {
3273             prefix[0] = '\0';
3274             pr2serr_lk("%s tid,rq_id=%d,%d: SCSI %s%s %s, blk=%" PRId64
3275                        " num_blks=%d\n", __func__, rep->id, pack_id, crwp, cp,
3276                        sg_flags_str(flags, b_len, b), blk + blk_off, nblks);
3277         }
3278         lk_print_command_len(prefix, rep->cmd, cdbsz, lock);
3279     }
3280     if (v4)
3281         goto do_v4;     // <<<<<<<<<<<<<<< look further down
3282 
3283     hp->interface_id = 'S';
3284     hp->cmd_len = cdbsz;
3285     hp->cmdp = rep->cmd;
3286     hp->dxferp = get_buffp(rep);
3287     hp->dxfer_len = clp->bs * rep->num_blks;
3288     if (!wr)
3289         hp->dxfer_direction = SG_DXFER_FROM_DEV;
3290     else if (prefetch) {
3291         hp->dxfer_direction = SG_DXFER_NONE;
3292         hp->dxfer_len = 0;
3293         hp->dxferp = NULL;
3294     } else
3295         hp->dxfer_direction = SG_DXFER_TO_DEV;
3296     hp->mx_sb_len = sizeof(rep->sb);
3297     hp->sbp = rep->sb;
3298     hp->timeout = clp->cmd_timeout;
3299     hp->usr_ptr = rep;
3300     hp->pack_id = pack_id;
3301     hp->flags = flags;
3302 
3303     while (((res = write(fd, hp, sizeof(struct sg_io_hdr))) < 0) &&
3304            ((EINTR == errno) || (EAGAIN == errno) || (EBUSY == errno))) {
3305         if (EAGAIN == errno) {
3306             ++num_start_eagain;
3307 #ifdef SGH_DD_SNAP_DEV
3308             if (0 == (num_ebusy % 1000))
3309                 sg_take_snap(fd, rep->id, (clp->verbose > 2));
3310 #endif
3311         } else if (EBUSY == errno) {
3312             ++num_ebusy;
3313 #ifdef SGH_DD_SNAP_DEV
3314             if (0 == (num_ebusy % 1000))
3315                 sg_take_snap(fd, rep->id, (clp->verbose > 2));
3316 #endif
3317         }
3318         std::this_thread::yield();/* another thread may be able to progress */
3319     }
3320     err = errno;
3321     if (res < 0) {
3322         if (ENOMEM == err)
3323             return 1;
3324         pr2serr_lk("%s tid=%d: %s %s write(2) failed: %s\n", __func__,
3325                    rep->id, cp, sg_flags_str(hp->flags, b_len, b),
3326                    strerror(err));
3327         return -1;
3328     }
3329     return 0;
3330 
3331 do_v4:
3332     h4p->guard = 'Q';
3333     h4p->request_len = cdbsz;
3334     h4p->request = (uint64_t)rep->cmd;
3335     if (wr) {
3336         if (prefetch) {
3337             h4p->dout_xfer_len = 0;     // din_xfer_len is also 0
3338             h4p->dout_xferp = 0;
3339         } else {
3340             h4p->dout_xfer_len = clp->bs * nblks;
3341             h4p->dout_xferp = (uint64_t)get_buffp(rep);
3342         }
3343     } else if (nblks > 0) {
3344         h4p->din_xfer_len = clp->bs * nblks;
3345         h4p->din_xferp = (uint64_t)get_buffp(rep);
3346     }
3347     h4p->max_response_len = sizeof(rep->sb);
3348     h4p->response = (uint64_t)rep->sb;
3349     h4p->timeout = clp->cmd_timeout;
3350     h4p->usr_ptr = (uint64_t)rep;
3351     h4p->request_extra = pack_id;    /* this is the pack_id */
3352     h4p->flags = flags;
3353     if (clp->nmrqs > 0) {
3354         big_cdb cdb_arr;
3355         uint8_t * cmdp = &(cdb_arr[0]);
3356 
3357         if (wr)
3358             rep->out_mrq_q_blks += nblks;
3359         else
3360             rep->in_mrq_q_blks += nblks;
3361         memcpy(cmdp, rep->cmd, cdbsz);
3362         def_arr.first.push_back(*h4p);
3363         def_arr.second.push_back(cdb_arr);
3364         res = 0;
3365         if ((int)def_arr.first.size() >= clp->nmrqs) {
3366             res = sgh_do_deferred_mrq(rep, def_arr);
3367             if (res)
3368                 pr2serr_lk("%s tid=%d: sgh_do_deferred_mrq failed\n",
3369                            __func__, rep->id);
3370         }
3371         return res;
3372     }
3373     while (((res = ioctl(fd, SG_IOSUBMIT, h4p)) < 0) &&
3374            ((EINTR == errno) || (EAGAIN == errno) || (EBUSY == errno))) {
3375         if (EAGAIN == errno) {
3376             ++num_start_eagain;
3377 #ifdef SGH_DD_SNAP_DEV
3378             if (0 == (num_ebusy % 1000))
3379                 sg_take_snap(fd, rep->id, (clp->verbose > 2));
3380 #endif
3381         } else if (EBUSY == errno) {
3382             ++num_ebusy;
3383 #ifdef SGH_DD_SNAP_DEV
3384             if (0 == (num_ebusy % 1000))
3385                 sg_take_snap(fd, rep->id, (clp->verbose > 2));
3386 #endif
3387         }
3388         std::this_thread::yield();/* another thread may be able to progress */
3389     }
3390     err = errno;
3391     if (res < 0) {
3392         if (ENOMEM == err)
3393             return 1;
3394         if (E2BIG == err)
3395             sg_take_snap(fd, rep->id, true);
3396         pr2serr_lk("%s tid=%d: %s %s ioctl(2) failed: %s\n", __func__,
3397                    rep->id, cp, sg_flags_str(h4p->flags, b_len, b),
3398                    strerror(err));
3399         // v4hdr_out_lk("leadin", h4p, rep->id);
3400         return -1;
3401     }
3402     if ((clp->aen > 0) && (rep->rep_count > 0)) {
3403         if (0 == (rep->rq_id % clp->aen)) {
3404             struct timespec tspec = {0, 4000 /* 4 usecs */};
3405 
3406             nanosleep(&tspec, NULL);
3407 #if 0
3408             struct pollfd a_poll;
3409 
3410             a_poll.fd = fd;
3411             a_poll.events = POLL_IN;
3412             a_poll.revents = 0;
3413             res = poll(&a_poll, 1 /* element */, 1 /* millisecond */);
3414             if (res < 0)
3415                 pr2serr_lk("%s: poll() failed: %s [%d]\n",
3416                            __func__, safe_strerror(errno), errno);
3417             else if (0 == res) { /* timeout, cmd still inflight, so abort */
3418             }
3419 #endif
3420             ++num_abort_req;
3421             res = ioctl(fd, SG_IOABORT, h4p);
3422             if (res < 0) {
3423                 err = errno;
3424                 if (ENODATA == err) {
3425                     if (clp->verbose > 2)
3426                         pr2serr_lk("%s: ioctl(SG_IOABORT) no match on "
3427                                    "pack_id=%d\n", __func__, pack_id);
3428                 } else
3429                     pr2serr_lk("%s: ioctl(SG_IOABORT) failed: %s [%d]\n",
3430                                __func__, safe_strerror(err), err);
3431             } else {
3432                 ++num_abort_req_success;
3433                 if (clp->verbose > 2)
3434                     pr2serr_lk("%s: sent ioctl(SG_IOABORT) on rq_id=%d, "
3435                                "success\n", __func__, pack_id);
3436             }
3437         }   /* else got response, too late for timeout, so skip */
3438     }
3439     return 0;
3440 }
3441 
3442 /* 0 -> successful, SG_LIB_CAT_UNIT_ATTENTION or SG_LIB_CAT_ABORTED_COMMAND
3443    -> try again, SG_LIB_CAT_NOT_READY, SG_LIB_CAT_MEDIUM_HARD,
3444    -1 other errors */
3445 static int
sg_finish_io(bool wr,Rq_elem * rep,int pack_id,struct sg_io_extra * xtrp)3446 sg_finish_io(bool wr, Rq_elem * rep, int pack_id, struct sg_io_extra *xtrp)
3447 {
3448     struct global_collection * clp = rep->clp;
3449     bool v4 = wr ? clp->out_flags.v4 : clp->in_flags.v4;
3450     bool mout_if = wr ? clp->out_flags.mout_if : clp->in_flags.mout_if;
3451     bool is_wr2 = xtrp ? xtrp->is_wr2 : false;
3452     bool prefetch = xtrp ? xtrp->prefetch : false;
3453     int res, fd;
3454     int64_t blk = wr ? rep->oblk : rep->iblk;
3455     struct sg_io_hdr io_hdr;
3456     struct sg_io_hdr * hp;
3457     struct sg_io_v4 * h4p;
3458     const char *cp;
3459 
3460     if (wr) {
3461         fd = is_wr2 ? rep->out2fd : rep->outfd;
3462         cp = is_wr2 ? "writing2" : "writing";
3463         if (clp->verify) {
3464             cp = is_wr2 ? "verifying2" : "verifying";
3465             if (prefetch)
3466                 cp = is_wr2 ? "prefetch2" : "prefetch";
3467         }
3468     } else {
3469         fd = rep->infd;
3470         cp = "reading";
3471     }
3472     if (v4)
3473         goto do_v4;
3474     memset(&io_hdr, 0 , sizeof(struct sg_io_hdr));
3475     /* FORCE_PACK_ID active set only read packet with matching pack_id */
3476     io_hdr.interface_id = 'S';
3477     io_hdr.dxfer_direction = wr ? SG_DXFER_TO_DEV : SG_DXFER_FROM_DEV;
3478     io_hdr.pack_id = pack_id;
3479 
3480     while (((res = read(fd, &io_hdr, sizeof(struct sg_io_hdr))) < 0) &&
3481            ((EINTR == errno) || (EAGAIN == errno) || (EBUSY == errno))) {
3482         if (EAGAIN == errno) {
3483             ++num_fin_eagain;
3484 #ifdef SGH_DD_SNAP_DEV
3485             if (0 == (num_ebusy % 1000))
3486                 sg_take_snap(fd, rep->id, (clp->verbose > 2));
3487 #endif
3488         } else if (EBUSY == errno) {
3489             ++num_ebusy;
3490 #ifdef SGH_DD_SNAP_DEV
3491             if (0 == (num_ebusy % 1000))
3492                 sg_take_snap(fd, rep->id, (clp->verbose > 2));
3493 #endif
3494         }
3495         std::this_thread::yield();/* another thread may be able to progress */
3496     }
3497     if (res < 0) {
3498         perror("finishing io [read(2)] on sg device, error");
3499         return -1;
3500     }
3501     if (rep != (Rq_elem *)io_hdr.usr_ptr)
3502         err_exit(0, "sg_finish_io: bad usr_ptr, request-response mismatch\n");
3503     memcpy(&rep->io_hdr, &io_hdr, sizeof(struct sg_io_hdr));
3504     hp = &rep->io_hdr;
3505 
3506     res = sg_err_category3(hp);
3507     switch (res) {
3508     case SG_LIB_CAT_CLEAN:
3509     case SG_LIB_CAT_CONDITION_MET:
3510         break;
3511     case SG_LIB_CAT_RECOVERED:
3512         lk_chk_n_print3(cp, hp, false);
3513         break;
3514     case SG_LIB_CAT_ABORTED_COMMAND:
3515     case SG_LIB_CAT_UNIT_ATTENTION:
3516         if (clp->verbose > 3)
3517             lk_chk_n_print3(cp, hp, false);
3518         return res;
3519     case SG_LIB_CAT_MISCOMPARE:
3520         ++num_miscompare;
3521         // fall through
3522     case SG_LIB_CAT_NOT_READY:
3523     default:
3524         {
3525             char ebuff[EBUFF_SZ];
3526 
3527             snprintf(ebuff, EBUFF_SZ, "%s blk=%" PRId64, cp, blk);
3528             lk_chk_n_print3(ebuff, hp, clp->verbose > 1);
3529             return res;
3530         }
3531     }
3532     if ((wr ? clp->out_flags.dio : clp->in_flags.dio) &&
3533         (! (hp->info & SG_INFO_DIRECT_IO_MASK)))
3534         rep->dio_incomplete_count = 1; /* count dios done as indirect IO */
3535     else
3536         rep->dio_incomplete_count = 0;
3537     rep->resid = hp->resid;
3538     if (clp->verbose > 3)
3539         pr2serr_lk("%s: tid=%d: completed %s\n", __func__, rep->id, cp);
3540     return 0;
3541 
3542 do_v4:
3543     if (clp->nmrqs > 0) {
3544         rep->resid = 0;
3545         return 0;
3546     }
3547     h4p = &rep->io_hdr4[xtrp ? xtrp->hpv4_ind : 0];
3548     h4p->request_extra = pack_id;
3549     if (mout_if) {
3550         h4p->info = 0;
3551         h4p->din_resid = 0;
3552     }
3553     while (((res = ioctl(fd, SG_IORECEIVE, h4p)) < 0) &&
3554            ((EINTR == errno) || (EAGAIN == errno) || (EBUSY == errno))) {
3555         if (EAGAIN == errno) {
3556             ++num_fin_eagain;
3557 #ifdef SGH_DD_SNAP_DEV
3558             if (0 == (num_ebusy % 1000))
3559                 sg_take_snap(fd, rep->id, (clp->verbose > 2));
3560 #endif
3561         } else if (EBUSY == errno) {
3562             ++num_ebusy;
3563 #ifdef SGH_DD_SNAP_DEV
3564             if (0 == (num_ebusy % 1000))
3565                 sg_take_snap(fd, rep->id, (clp->verbose > 2));
3566 #endif
3567         }
3568         std::this_thread::yield();/* another thread may be able to progress */
3569     }
3570     if (res < 0) {
3571         perror("finishing io [SG_IORECEIVE] on sg device, error");
3572         return -1;
3573     }
3574     if (mout_if && (0 == h4p->info) && (0 == h4p->din_resid))
3575         goto all_good;
3576     if (rep != (Rq_elem *)h4p->usr_ptr)
3577         err_exit(0, "sg_finish_io: bad usr_ptr, request-response mismatch\n");
3578     res = sg_err_category_new(h4p->device_status, h4p->transport_status,
3579                               h4p->driver_status,
3580                               (const uint8_t *)h4p->response,
3581                               h4p->response_len);
3582     switch (res) {
3583     case SG_LIB_CAT_CLEAN:
3584     case SG_LIB_CAT_CONDITION_MET:
3585         break;
3586     case SG_LIB_CAT_RECOVERED:
3587         lk_chk_n_print4(cp, h4p, false);
3588         break;
3589     case SG_LIB_CAT_ABORTED_COMMAND:
3590     case SG_LIB_CAT_UNIT_ATTENTION:
3591         if (clp->verbose > 3)
3592             lk_chk_n_print4(cp, h4p, false);
3593         return res;
3594     case SG_LIB_CAT_MISCOMPARE:
3595         ++num_miscompare;
3596         // fall through
3597     case SG_LIB_CAT_NOT_READY:
3598     default:
3599         {
3600             char ebuff[EBUFF_SZ];
3601 
3602             snprintf(ebuff, EBUFF_SZ, "%s rq_id=%d, blk=%" PRId64, cp,
3603                      pack_id, blk);
3604             lk_chk_n_print4(ebuff, h4p, clp->verbose > 1);
3605             if ((clp->verbose > 4) && h4p->info)
3606                 pr2serr_lk(" info=0x%x sg_info_check=%d direct=%d "
3607                            "detaching=%d aborted=%d\n", h4p->info,
3608                            !!(h4p->info & SG_INFO_CHECK),
3609                            !!(h4p->info & SG_INFO_DIRECT_IO),
3610                            !!(h4p->info & SG_INFO_DEVICE_DETACHING),
3611                            !!(h4p->info & SG_INFO_ABORTED));
3612             return res;
3613         }
3614     }
3615     if ((wr ? clp->out_flags.dio : clp->in_flags.dio) &&
3616         ! (h4p->info & SG_INFO_DIRECT_IO))
3617         rep->dio_incomplete_count = 1; /* count dios done as indirect IO */
3618     else
3619         rep->dio_incomplete_count = 0;
3620     rep->resid = h4p->din_resid;
3621     if (clp->verbose > 4) {
3622         pr2serr_lk("%s: tid,rq_id=%d,%d: completed %s\n", __func__, rep->id,
3623                    pack_id, cp);
3624         if ((clp->verbose > 4) && h4p->info)
3625             pr2serr_lk(" info=0x%x sg_info_check=%d direct=%d "
3626                        "detaching=%d aborted=%d\n", h4p->info,
3627                        !!(h4p->info & SG_INFO_CHECK),
3628                        !!(h4p->info & SG_INFO_DIRECT_IO),
3629                        !!(h4p->info & SG_INFO_DEVICE_DETACHING),
3630                        !!(h4p->info & SG_INFO_ABORTED));
3631     }
3632 all_good:
3633     return 0;
3634 }
3635 
3636 /* Returns reserved_buffer_size/mmap_size if success, else 0 for failure */
3637 static int
sg_prepare_resbuf(int fd,bool is_in,struct global_collection * clp,uint8_t ** mmpp)3638 sg_prepare_resbuf(int fd, bool is_in, struct global_collection *clp,
3639                   uint8_t **mmpp)
3640 {
3641     static bool done = false;
3642     bool def_res = is_in ? clp->in_flags.defres : clp->out_flags.defres;
3643     bool no_dur = is_in ? clp->in_flags.no_dur : clp->out_flags.no_dur;
3644     bool masync = is_in ? clp->in_flags.masync : clp->out_flags.masync;
3645     bool wq_excl = is_in ? clp->in_flags.wq_excl : clp->out_flags.wq_excl;
3646     bool skip_thresh = is_in ? clp->in_flags.no_thresh :
3647                                clp->out_flags.no_thresh;
3648     int res, t;
3649     int num = 0;
3650     uint8_t *mmp;
3651     struct sg_extended_info sei {};
3652     struct sg_extended_info * seip = &sei;
3653 
3654     res = ioctl(fd, SG_GET_VERSION_NUM, &t);
3655     if ((res < 0) || (t < 40000)) {
3656         if (ioctl(fd, SG_GET_RESERVED_SIZE, &num) < 0) {
3657             perror("SG_GET_RESERVED_SIZE ioctl failed");
3658             return 0;
3659         }
3660         if (! done) {
3661             done = true;
3662             sg_version_lt_4 = true;
3663             pr2serr_lk("%ssg driver prior to 4.0.00, reduced functionality\n",
3664                        my_name);
3665         }
3666         goto bypass;
3667     }
3668     if (! sg_version_ge_40045)
3669         goto bypass;
3670     if (clp->elem_sz >= 4096) {
3671         seip->sei_rd_mask |= SG_SEIM_SGAT_ELEM_SZ;
3672         res = ioctl(fd, SG_SET_GET_EXTENDED, seip);
3673         if (res < 0)
3674             pr2serr_lk("%s%s: SG_SET_GET_EXTENDED(SGAT_ELEM_SZ) rd "
3675                        "error: %s\n", my_name, __func__, strerror(errno));
3676         if (clp->elem_sz != (int)seip->sgat_elem_sz) {
3677             memset(seip, 0, sizeof(*seip));
3678             seip->sei_wr_mask |= SG_SEIM_SGAT_ELEM_SZ;
3679             seip->sgat_elem_sz = clp->elem_sz;
3680             res = ioctl(fd, SG_SET_GET_EXTENDED, seip);
3681             if (res < 0)
3682                 pr2serr_lk("%s%s: SG_SET_GET_EXTENDED(SGAT_ELEM_SZ) wr "
3683                            "error: %s\n", my_name, __func__, strerror(errno));
3684         }
3685     }
3686     if (no_dur || masync || skip_thresh) {
3687         memset(seip, 0, sizeof(*seip));
3688         seip->sei_wr_mask |= SG_SEIM_CTL_FLAGS;
3689         if (no_dur) {
3690             seip->ctl_flags_wr_mask |= SG_CTL_FLAGM_NO_DURATION;
3691             seip->ctl_flags |= SG_CTL_FLAGM_NO_DURATION;
3692         }
3693         if (masync) {
3694             seip->ctl_flags_wr_mask |= SG_CTL_FLAGM_MORE_ASYNC;
3695             seip->ctl_flags |= SG_CTL_FLAGM_MORE_ASYNC;
3696         }
3697         if (wq_excl) {
3698             seip->ctl_flags_wr_mask |= SG_CTL_FLAGM_EXCL_WAITQ;
3699             seip->ctl_flags |= SG_CTL_FLAGM_EXCL_WAITQ;
3700         }
3701         if (skip_thresh) {
3702             seip->tot_fd_thresh = 0;
3703             sei.sei_wr_mask |= SG_SEIM_TOT_FD_THRESH;
3704         }
3705         res = ioctl(fd, SG_SET_GET_EXTENDED, seip);
3706         if (res < 0)
3707             pr2serr_lk("%s%s: SG_SET_GET_EXTENDED(NO_DURATION) error: %s\n",
3708                        my_name, __func__, strerror(errno));
3709     }
3710 bypass:
3711     if (! def_res) {
3712         num = clp->bs * clp->bpt;
3713         res = ioctl(fd, SG_SET_RESERVED_SIZE, &num);
3714         if (res < 0) {
3715             perror("sgh_dd: SG_SET_RESERVED_SIZE error");
3716             return 0;
3717         } else {
3718             int nn;
3719 
3720             res = ioctl(fd, SG_GET_RESERVED_SIZE, &nn);
3721             if (res < 0) {
3722                 perror("sgh_dd: SG_GET_RESERVED_SIZE error");
3723                 return 0;
3724             }
3725             if (nn < num) {
3726                 pr2serr_lk("%s: SG_GET_RESERVED_SIZE shows size truncated, "
3727                            "wanted %d got %d\n", __func__, num, nn);
3728                 return 0;
3729             }
3730         }
3731         if (mmpp) {
3732             mmp = (uint8_t *)mmap(NULL, num, PROT_READ | PROT_WRITE,
3733                                   MAP_SHARED, fd, 0);
3734             if (MAP_FAILED == mmp) {
3735                 int err = errno;
3736 
3737                 pr2serr_lk("%s%s: sz=%d, fd=%d, mmap() failed: %s\n",
3738                            my_name, __func__, num, fd, strerror(err));
3739                 return 0;
3740             }
3741             *mmpp = mmp;
3742         }
3743     }
3744     t = 1;
3745     res = ioctl(fd, SG_SET_FORCE_PACK_ID, &t);
3746     if (res < 0)
3747         perror("sgh_dd: SG_SET_FORCE_PACK_ID error");
3748     if (clp->unit_nanosec && sg_version_ge_40045) {
3749         memset(seip, 0, sizeof(*seip));
3750         seip->sei_wr_mask |= SG_SEIM_CTL_FLAGS;
3751         seip->ctl_flags_wr_mask |= SG_CTL_FLAGM_TIME_IN_NS;
3752         seip->ctl_flags |= SG_CTL_FLAGM_TIME_IN_NS;
3753         if (ioctl(fd, SG_SET_GET_EXTENDED, seip) < 0) {
3754             pr2serr_lk("ioctl(EXTENDED(TIME_IN_NS)) failed, errno=%d %s\n",
3755                        errno, strerror(errno));
3756         }
3757     }
3758     t = 1;
3759     res = ioctl(fd, SG_SET_DEBUG, &t);  /* more info in the kernel log */
3760     if (res < 0)
3761         perror("sgs_dd: SG_SET_DEBUG error");
3762     return (res < 0) ? 0 : num;
3763 }
3764 
3765 static bool
process_flags(const char * arg,struct flags_t * fp)3766 process_flags(const char * arg, struct flags_t * fp)
3767 {
3768     char buff[256];
3769     char * cp;
3770     char * np;
3771 
3772     strncpy(buff, arg, sizeof(buff));
3773     buff[sizeof(buff) - 1] = '\0';
3774     if ('\0' == buff[0]) {
3775         pr2serr("no flag found\n");
3776         return false;
3777     }
3778     cp = buff;
3779     do {
3780         np = strchr(cp, ',');
3781         if (np)
3782             *np++ = '\0';
3783         if (0 == strcmp(cp, "00"))
3784             fp->zero = true;
3785         else if (0 == strcmp(cp, "append"))
3786             fp->append = true;
3787         else if (0 == strcmp(cp, "coe"))
3788             fp->coe = true;
3789         else if (0 == strcmp(cp, "defres"))
3790             fp->defres = true;
3791         else if (0 == strcmp(cp, "dio"))
3792             fp->dio = true;
3793         else if (0 == strcmp(cp, "direct"))
3794             fp->direct = true;
3795         else if (0 == strcmp(cp, "dpo"))
3796             fp->dpo = true;
3797         else if (0 == strcmp(cp, "dsync"))
3798             fp->dsync = true;
3799         else if (0 == strcmp(cp, "excl"))
3800             fp->excl = true;
3801         else if (0 == strcmp(cp, "ff"))
3802             fp->ff = true;
3803         else if (0 == strcmp(cp, "fua"))
3804             fp->fua = true;
3805         else if (0 == strcmp(cp, "hipri"))
3806             fp->polled = true;
3807         else if (0 == strcmp(cp, "masync"))
3808             fp->masync = true;
3809         else if (0 == strcmp(cp, "mmap"))
3810             ++fp->mmap;         /* mmap > 1 stops munmap() being called */
3811         else if (0 == strcmp(cp, "mrq_imm"))
3812             fp->mrq_immed = true;
3813         else if (0 == strcmp(cp, "mrq_immed"))
3814             fp->mrq_immed = true;
3815         else if (0 == strcmp(cp, "mrq_svb"))
3816             fp->mrq_svb = true;
3817         else if (0 == strcmp(cp, "nodur"))
3818             fp->no_dur = true;
3819         else if (0 == strcmp(cp, "no_dur"))
3820             fp->no_dur = true;
3821         else if (0 == strcmp(cp, "nocreat"))
3822             fp->nocreat = true;
3823         else if (0 == strcmp(cp, "noshare"))
3824             fp->noshare = true;
3825         else if (0 == strcmp(cp, "no_share"))
3826             fp->noshare = true;
3827         else if (0 == strcmp(cp, "no_thresh"))
3828             fp->no_thresh = true;
3829         else if (0 == strcmp(cp, "no-thresh"))
3830             fp->no_thresh = true;
3831         else if (0 == strcmp(cp, "nothresh"))
3832             fp->no_thresh = true;
3833         else if (0 == strcmp(cp, "no_unshare"))
3834             fp->no_unshare = true;
3835         else if (0 == strcmp(cp, "no-unshare"))
3836             fp->no_unshare = true;
3837         else if (0 == strcmp(cp, "no_waitq"))
3838             fp->no_waitq = true;
3839         else if (0 == strcmp(cp, "no-waitq"))
3840             fp->no_waitq = true;
3841         else if (0 == strcmp(cp, "nowaitq"))
3842             fp->no_waitq = true;
3843         else if (0 == strcmp(cp, "noxfer"))
3844             fp->noxfer = true;
3845         else if (0 == strcmp(cp, "no_xfer"))
3846             fp->noxfer = true;
3847         else if (0 == strcmp(cp, "null"))
3848             ;
3849         else if (0 == strcmp(cp, "polled"))
3850             fp->polled = true;
3851         else if (0 == strcmp(cp, "qhead"))
3852             fp->qhead = true;
3853         else if (0 == strcmp(cp, "qtail"))
3854             fp->qtail = true;
3855         else if (0 == strcmp(cp, "random"))
3856             fp->random = true;
3857         else if ((0 == strcmp(cp, "mout_if")) || (0 == strcmp(cp, "mout-if")))
3858             fp->mout_if = true;
3859         else if (0 == strcmp(cp, "same_fds"))
3860             fp->same_fds = true;
3861         else if (0 == strcmp(cp, "swait"))
3862             fp->swait = true;
3863         else if (0 == strcmp(cp, "v3"))
3864             fp->v3 = true;
3865         else if (0 == strcmp(cp, "v4")) {
3866             fp->v4 = true;
3867             fp->v4_given = true;
3868         } else if (0 == strcmp(cp, "wq_excl"))
3869             fp->wq_excl = true;
3870         else {
3871             pr2serr("unrecognised flag: %s\n", cp);
3872             return false;
3873         }
3874         cp = np;
3875     } while (cp);
3876     return true;
3877 }
3878 
3879 /* Returns the number of times 'ch' is found in string 's' given the
3880  * string's length. */
3881 static int
num_chs_in_str(const char * s,int slen,int ch)3882 num_chs_in_str(const char * s, int slen, int ch)
3883 {
3884     int res = 0;
3885 
3886     while (--slen >= 0) {
3887         if (ch == s[slen])
3888             ++res;
3889     }
3890     return res;
3891 }
3892 
3893 static int
sg_in_open(struct global_collection * clp,const char * inf,uint8_t ** mmpp,int * mmap_lenp)3894 sg_in_open(struct global_collection *clp, const char *inf, uint8_t **mmpp,
3895            int * mmap_lenp)
3896 {
3897     int fd, n;
3898     int flags = O_RDWR;
3899 
3900     if (clp->in_flags.direct)
3901         flags |= O_DIRECT;
3902     if (clp->in_flags.excl)
3903         flags |= O_EXCL;
3904     if (clp->in_flags.dsync)
3905         flags |= O_SYNC;
3906 
3907     if ((fd = open(inf, flags)) < 0) {
3908         int err = errno;
3909         char ebuff[EBUFF_SZ];
3910 
3911         snprintf(ebuff, EBUFF_SZ, "%s: could not open %s for sg reading",
3912                  __func__, inf);
3913         perror(ebuff);
3914         return -sg_convert_errno(err);
3915     }
3916     n = sg_prepare_resbuf(fd, true, clp, mmpp);
3917     if (n <= 0) {
3918         close(fd);
3919         return -SG_LIB_FILE_ERROR;
3920     }
3921     if (clp->noshare)
3922         sg_noshare_enlarge(fd, clp->verbose > 3);
3923     if (mmap_lenp)
3924         *mmap_lenp = n;
3925     return fd;
3926 }
3927 
3928 static int
sg_out_open(struct global_collection * clp,const char * outf,uint8_t ** mmpp,int * mmap_lenp)3929 sg_out_open(struct global_collection *clp, const char *outf, uint8_t **mmpp,
3930             int * mmap_lenp)
3931 {
3932     int fd, n;
3933     int flags = O_RDWR;
3934 
3935     if (clp->out_flags.direct)
3936         flags |= O_DIRECT;
3937     if (clp->out_flags.excl)
3938         flags |= O_EXCL;
3939     if (clp->out_flags.dsync)
3940         flags |= O_SYNC;
3941 
3942     if ((fd = open(outf, flags)) < 0) {
3943         int err = errno;
3944         char ebuff[EBUFF_SZ];
3945 
3946         snprintf(ebuff,  EBUFF_SZ, "%s: could not open %s for sg %s",
3947                  __func__, outf, (clp->verify ? "verifying" : "writing"));
3948         perror(ebuff);
3949         return -sg_convert_errno(err);
3950     }
3951     n = sg_prepare_resbuf(fd, false, clp, mmpp);
3952     if (n <= 0) {
3953         close(fd);
3954         return -SG_LIB_FILE_ERROR;
3955     }
3956     if (clp->noshare)
3957         sg_noshare_enlarge(fd, clp->verbose > 3);
3958     if (mmap_lenp)
3959         *mmap_lenp = n;
3960     return fd;
3961 }
3962 
3963 /* Process arguments given to 'conv=" option. Returns 0 on success,
3964  * 1 on error. */
3965 static int
process_conv(const char * arg,struct flags_t * ifp,struct flags_t * ofp)3966 process_conv(const char * arg, struct flags_t * ifp, struct flags_t * ofp)
3967 {
3968     char buff[256];
3969     char * cp;
3970     char * np;
3971 
3972     strncpy(buff, arg, sizeof(buff));
3973     buff[sizeof(buff) - 1] = '\0';
3974     if ('\0' == buff[0]) {
3975         pr2serr("no conversions found\n");
3976         return 1;
3977     }
3978     cp = buff;
3979     do {
3980         np = strchr(cp, ',');
3981         if (np)
3982             *np++ = '\0';
3983         if (0 == strcmp(cp, "nocreat"))
3984             ofp->nocreat = true;
3985         else if (0 == strcmp(cp, "noerror"))
3986             ifp->coe = true;         /* will still fail on write error */
3987         else if (0 == strcmp(cp, "notrunc"))
3988             ;         /* this is the default action of sg_dd so ignore */
3989         else if (0 == strcmp(cp, "null"))
3990             ;
3991         else if (0 == strcmp(cp, "sync"))
3992             ;   /* dd(susv4): pad errored block(s) with zeros but sg_dd does
3993                  * that by default. Typical dd use: 'conv=noerror,sync' */
3994         else {
3995             pr2serr("unrecognised flag: %s\n", cp);
3996             return 1;
3997         }
3998         cp = np;
3999     } while (cp);
4000     return 0;
4001 }
4002 
4003 #define STR_SZ 1024
4004 #define INOUTF_SZ 512
4005 
4006 static int
parse_cmdline_sanity(int argc,char * argv[],struct global_collection * clp,char * inf,char * outf,char * out2f,char * outregf)4007 parse_cmdline_sanity(int argc, char * argv[], struct global_collection * clp,
4008                      char * inf, char * outf, char * out2f, char * outregf)
4009 {
4010     bool verbose_given = false;
4011     bool version_given = false;
4012     bool verify_given = false;
4013     bool bpt_given = false;
4014     int ibs = 0;
4015     int obs = 0;
4016     int k, keylen, n, res;
4017     char str[STR_SZ];
4018     char * key;
4019     char * buf;
4020     const char * cp;
4021 
4022     for (k = 1; k < argc; k++) {
4023         if (argv[k]) {
4024             strncpy(str, argv[k], STR_SZ);
4025             str[STR_SZ - 1] = '\0';
4026         }
4027         else
4028             continue;
4029         for (key = str, buf = key; *buf && *buf != '=';)
4030             buf++;
4031         if (*buf)
4032             *buf++ = '\0';
4033         keylen = strlen(key);
4034         if (0 == strcmp(key, "ae")) {
4035             clp->aen = sg_get_num(buf);
4036             if (clp->aen < 0) {
4037                 pr2serr("%sbad AEN argument to 'ae=', want 0 or higher\n",
4038                         my_name);
4039                 return SG_LIB_SYNTAX_ERROR;
4040             }
4041             cp = strchr(buf, ',');
4042             if (cp) {
4043                 clp->m_aen = sg_get_num(cp + 1);
4044                 if (clp->m_aen < 0) {
4045                     pr2serr("%sbad MAEN argument to 'ae=', want 0 or "
4046                             "higher\n", my_name);
4047                     return SG_LIB_SYNTAX_ERROR;
4048                 }
4049                 clp->m_aen_given = true;
4050             }
4051             clp->aen_given = true;
4052         } else if (0 == strcmp(key, "bpt")) {
4053             clp->bpt = sg_get_num(buf);
4054             if ((clp->bpt < 0) || (clp->bpt > MAX_BPT_VALUE)) {
4055                 pr2serr("%sbad argument to 'bpt='\n", my_name);
4056                 return SG_LIB_SYNTAX_ERROR;
4057             }
4058             bpt_given = true;
4059         } else if (0 == strcmp(key, "bs")) {
4060             clp->bs = sg_get_num(buf);
4061             if ((clp->bs < 0) || (clp->bs > MAX_BPT_VALUE)) {
4062                 pr2serr("%sbad argument to 'bs='\n", my_name);
4063                 return SG_LIB_SYNTAX_ERROR;
4064             }
4065         } else if (0 == strcmp(key, "cdbsz")) {
4066             clp->cdbsz_in = sg_get_num(buf);
4067             if ((clp->cdbsz_in < 6) || (clp->cdbsz_in > 32)) {
4068                 pr2serr("%s'cdbsz' expects 6, 10, 12, 16 or 32\n", my_name);
4069                 return SG_LIB_SYNTAX_ERROR;
4070             }
4071             clp->cdbsz_out = clp->cdbsz_in;
4072             clp->cdbsz_given = true;
4073         } else if (0 == strcmp(key, "coe")) {
4074             clp->in_flags.coe = !! sg_get_num(buf);
4075             clp->out_flags.coe = clp->in_flags.coe;
4076         } else if (0 == strcmp(key, "conv")) {
4077             if (process_conv(buf, &clp->in_flags, &clp->out_flags)) {
4078                 pr2serr("%s: bad argument to 'conv='\n", my_name);
4079                 return SG_LIB_SYNTAX_ERROR;
4080             }
4081         } else if (0 == strcmp(key, "count")) {
4082             if (0 != strcmp("-1", buf)) {
4083                 dd_count = sg_get_llnum(buf);
4084                 if ((dd_count < 0) || (dd_count > MAX_COUNT_SKIP_SEEK)) {
4085                     pr2serr("%sbad argument to 'count='\n", my_name);
4086                     return SG_LIB_SYNTAX_ERROR;
4087                 }
4088             }   /* treat 'count=-1' as calculate count (same as not given) */
4089         } else if (0 == strcmp(key, "dio")) {
4090             clp->in_flags.dio = !! sg_get_num(buf);
4091             clp->out_flags.dio = clp->in_flags.dio;
4092         } else if (0 == strcmp(key, "elemsz_kb")) {
4093             n = sg_get_num(buf);
4094             if (n < 1) {
4095                 pr2serr("elemsz_kb=EKB wants an integer > 0\n");
4096                 return SG_LIB_SYNTAX_ERROR;
4097             }
4098             if (n & (n - 1)) {
4099                 pr2serr("elemsz_kb=EKB wants EKB to be power of 2\n");
4100                 return SG_LIB_SYNTAX_ERROR;
4101             }
4102             clp->elem_sz = n * 1024;
4103         } else if ((0 == strcmp(key, "fail_mask")) ||
4104                    (0 == strcmp(key, "fail-mask"))) {
4105             clp->fail_mask = sg_get_num(buf);
4106             if (clp->fail_mask < 0) {
4107                 pr2serr("fail_mask: couldn't decode argument\n");
4108                 return SG_LIB_SYNTAX_ERROR;
4109             }
4110         } else if (0 == strcmp(key, "fua")) {
4111             n = sg_get_num(buf);
4112             if (n & 1)
4113                 clp->out_flags.fua = true;
4114             if (n & 2)
4115                 clp->in_flags.fua = true;
4116         } else if (0 == strcmp(key, "ibs")) {
4117             ibs = sg_get_num(buf);
4118             if ((ibs < 0) || (ibs > MAX_BPT_VALUE)) {
4119                 pr2serr("%sbad argument to 'ibs='\n", my_name);
4120                 return SG_LIB_SYNTAX_ERROR;
4121             }
4122         } else if (0 == strcmp(key, "if")) {
4123             if ('\0' != inf[0]) {
4124                 pr2serr("Second 'if=' argument??\n");
4125                 return SG_LIB_SYNTAX_ERROR;
4126             } else {
4127                 memcpy(inf, buf, INOUTF_SZ);
4128                 inf[INOUTF_SZ - 1] = '\0';      /* noisy compiler */
4129             }
4130         } else if (0 == strcmp(key, "iflag")) {
4131             if (! process_flags(buf, &clp->in_flags)) {
4132                 pr2serr("%sbad argument to 'iflag='\n", my_name);
4133                 return SG_LIB_SYNTAX_ERROR;
4134             }
4135         } else if (0 == strcmp(key, "mrq")) {
4136             if (isdigit(buf[0]))
4137                 cp = buf;
4138             else {
4139                 if ('I' == isupper(buf[0]))
4140                     clp->is_mrq_i = true;
4141                 else if ('O' == isupper(buf[0]))
4142                     clp->is_mrq_o = true;
4143                 else {
4144                     pr2serr("%sonly mrq=i,NRQS or mrq=o,NRQS allowed here\n",
4145                             my_name);
4146                     return SG_LIB_SYNTAX_ERROR;
4147                 }
4148                 cp = strchr(buf, ',');
4149                 ++cp;
4150             }
4151             clp->nmrqs = sg_get_num(cp);
4152             if (clp->nmrqs < 0) {
4153                 pr2serr("%sbad argument to 'mrq='\n", my_name);
4154                 return SG_LIB_SYNTAX_ERROR;
4155             }
4156             cp = strchr(cp, ',');
4157             if (cp && ('C' == toupper(cp[1])))
4158                 clp->mrq_cmds = true;
4159         } else if (0 == strcmp(key, "noshare")) {
4160             clp->noshare = !! sg_get_num(buf);
4161         } else if (0 == strcmp(key, "obs")) {
4162             obs = sg_get_num(buf);
4163             if ((obs < 0) || (obs > MAX_BPT_VALUE)) {
4164                 pr2serr("%sbad argument to 'obs='\n", my_name);
4165                 return SG_LIB_SYNTAX_ERROR;
4166             }
4167         } else if (strcmp(key, "of2") == 0) {
4168             if ('\0' != out2f[0]) {
4169                 pr2serr("Second OFILE2 argument??\n");
4170                 return SG_LIB_CONTRADICT;
4171             } else {
4172                 memcpy(out2f, buf, INOUTF_SZ);
4173                 out2f[INOUTF_SZ - 1] = '\0';    /* noisy compiler */
4174             }
4175         } else if (strcmp(key, "ofreg") == 0) {
4176             if ('\0' != outregf[0]) {
4177                 pr2serr("Second OFREG argument??\n");
4178                 return SG_LIB_CONTRADICT;
4179             } else {
4180                 memcpy(outregf, buf, INOUTF_SZ);
4181                 outregf[INOUTF_SZ - 1] = '\0';  /* noisy compiler */
4182             }
4183         } else if (0 == strcmp(key, "ofsplit")) {
4184             clp->ofsplit = sg_get_num(buf);
4185             if (-1 == clp->ofsplit) {
4186                 pr2serr("%sbad argument to 'ofsplit='\n", my_name);
4187                 return SG_LIB_SYNTAX_ERROR;
4188             }
4189         } else if (strcmp(key, "of") == 0) {
4190             if ('\0' != outf[0]) {
4191                 pr2serr("Second 'of=' argument??\n");
4192                 return SG_LIB_SYNTAX_ERROR;
4193             } else {
4194                 memcpy(outf, buf, INOUTF_SZ);
4195                 outf[INOUTF_SZ - 1] = '\0';     /* noisy compiler */
4196             }
4197         } else if (0 == strcmp(key, "oflag")) {
4198             if (! process_flags(buf, &clp->out_flags)) {
4199                 pr2serr("%sbad argument to 'oflag='\n", my_name);
4200                 return SG_LIB_SYNTAX_ERROR;
4201             }
4202         } else if (0 == strcmp(key, "sdt")) {
4203             cp = strchr(buf, ',');
4204             n = sg_get_num(buf);
4205             if (n < 0) {
4206                 pr2serr("%sbad argument to 'sdt=CRT[,ICT]'\n", my_name);
4207                 return SG_LIB_SYNTAX_ERROR;
4208             }
4209             clp->sdt_crt = n;
4210             if (cp) {
4211                 n = sg_get_num(cp + 1);
4212                 if (n < 0) {
4213                     pr2serr("%sbad 2nd argument to 'sdt=CRT,ICT'\n",
4214                             my_name);
4215                     return SG_LIB_SYNTAX_ERROR;
4216                 }
4217                 clp->sdt_ict = n;
4218             }
4219         } else if (0 == strcmp(key, "seek")) {
4220             clp->seek = sg_get_llnum(buf);
4221             if (clp->seek < 0) {
4222                 pr2serr("%sbad argument to 'seek='\n", my_name);
4223                 return SG_LIB_SYNTAX_ERROR;
4224             }
4225         } else if (0 == strcmp(key, "skip")) {
4226             clp->skip = sg_get_llnum(buf);
4227             if (clp->skip < 0) {
4228                 pr2serr("%sbad argument to 'skip='\n", my_name);
4229                 return SG_LIB_SYNTAX_ERROR;
4230             }
4231         } else if (0 == strcmp(key, "sync"))
4232             do_sync = !! sg_get_num(buf);
4233         else if (0 == strcmp(key, "thr"))
4234             num_threads = sg_get_num(buf);
4235         else if (0 == strcmp(key, "time")) {
4236             do_time = sg_get_num(buf);
4237             if (do_time < 0) {
4238                 pr2serr("%sbad argument to 'time=0|1|2'\n", my_name);
4239                 return SG_LIB_SYNTAX_ERROR;
4240             }
4241             cp = strchr(buf, ',');
4242             if (cp) {
4243                 n = sg_get_num(cp + 1);
4244                 if (n < 0) {
4245                     pr2serr("%sbad argument to 'time=0|1|2,TO'\n", my_name);
4246                     return SG_LIB_SYNTAX_ERROR;
4247                 }
4248                 clp->cmd_timeout = n ? (n * 1000) : DEF_TIMEOUT;
4249             }
4250         } else if (0 == strcmp(key, "unshare"))
4251             clp->unshare = !! sg_get_num(buf);  /* default: true */
4252         else if (0 == strncmp(key, "verb", 4))
4253             clp->verbose = sg_get_num(buf);
4254         else if ((keylen > 1) && ('-' == key[0]) && ('-' != key[1])) {
4255             res = 0;
4256             n = num_chs_in_str(key + 1, keylen - 1, 'c');
4257             clp->chkaddr += n;
4258             res += n;
4259             n = num_chs_in_str(key + 1, keylen - 1, 'd');
4260             clp->dry_run += n;
4261             res += n;
4262             n = num_chs_in_str(key + 1, keylen - 1, 'h');
4263             clp->help += n;
4264             res += n;
4265             n = num_chs_in_str(key + 1, keylen - 1, 'p');
4266             if (n > 0)
4267                 clp->prefetch = true;
4268             res += n;
4269             n = num_chs_in_str(key + 1, keylen - 1, 'v');
4270             if (n > 0)
4271                 verbose_given = true;
4272             clp->verbose += n;   /* -v  ---> --verbose */
4273             res += n;
4274             n = num_chs_in_str(key + 1, keylen - 1, 'V');
4275             if (n > 0)
4276                 version_given = true;
4277             res += n;
4278             n = num_chs_in_str(key + 1, keylen - 1, 'x');
4279             if (n > 0)
4280                 verify_given = true;
4281             res += n;
4282 
4283             if (res < (keylen - 1)) {
4284                 pr2serr("Unrecognised short option in '%s', try '--help'\n",
4285                         key);
4286                 return SG_LIB_SYNTAX_ERROR;
4287             }
4288         } else if (0 == strncmp(key, "--chkaddr", 9))
4289             ++clp->chkaddr;
4290         else if ((0 == strncmp(key, "--dry-run", 9)) ||
4291                  (0 == strncmp(key, "--dry_run", 9)))
4292             ++clp->dry_run;
4293         else if ((0 == strncmp(key, "--help", 6)) ||
4294                    (0 == strcmp(key, "-?")))
4295             ++clp->help;
4296         else if ((0 == strncmp(key, "--prefetch", 10)) ||
4297                  (0 == strncmp(key, "--pre-fetch", 11)))
4298             clp->prefetch = true;
4299         else if (0 == strncmp(key, "--verb", 6)) {
4300             verbose_given = true;
4301             ++clp->verbose;      /* --verbose */
4302         } else if (0 == strncmp(key, "--veri", 6))
4303             verify_given = true;
4304         else if (0 == strncmp(key, "--vers", 6))
4305             version_given = true;
4306         else {
4307             pr2serr("Unrecognized option '%s'\n", key);
4308             pr2serr("For more information use '--help' or '-h'\n");
4309             return SG_LIB_SYNTAX_ERROR;
4310         }
4311     }
4312 
4313 #ifdef DEBUG
4314     pr2serr("In DEBUG mode, ");
4315     if (verbose_given && version_given) {
4316         pr2serr("but override: '-vV' given, zero verbose and continue\n");
4317         verbose_given = false;
4318         version_given = false;
4319         clp->verbose = 0;
4320     } else if (! verbose_given) {
4321         pr2serr("set '-vv'\n");
4322         clp->verbose = 2;
4323     } else
4324         pr2serr("keep verbose=%d\n", clp->verbose);
4325 #else
4326     if (verbose_given && version_given)
4327         pr2serr("Not in DEBUG mode, so '-vV' has no special action\n");
4328 #endif
4329     if (version_given) {
4330         pr2serr("%s%s\n", my_name, version_str);
4331         return SG_LIB_OK_FALSE;
4332     }
4333     if (clp->help > 0) {
4334         usage(clp->help);
4335         return SG_LIB_OK_FALSE;
4336     }
4337     if (clp->bs <= 0) {
4338         clp->bs = DEF_BLOCK_SIZE;
4339         pr2serr("Assume default 'bs' ((logical) block size) of %d bytes\n",
4340                 clp->bs);
4341     }
4342     if (verify_given) {
4343         pr2serr("Doing verify/cmp rather than copy\n");
4344         clp->verify = true;
4345     }
4346     if ((ibs && (ibs != clp->bs)) || (obs && (obs != clp->bs))) {
4347         pr2serr("If 'ibs' or 'obs' given must be same as 'bs'\n");
4348         usage(0);
4349         return SG_LIB_SYNTAX_ERROR;
4350     }
4351     if ((clp->skip < 0) || (clp->seek < 0)) {
4352         pr2serr("skip and seek cannot be negative\n");
4353         return SG_LIB_SYNTAX_ERROR;
4354     }
4355     if (clp->out_flags.append) {
4356         if (clp->seek > 0) {
4357             pr2serr("Can't use both append and seek switches\n");
4358             return SG_LIB_SYNTAX_ERROR;
4359         }
4360         if (verify_given) {
4361             pr2serr("Can't use both append and verify switches\n");
4362             return SG_LIB_SYNTAX_ERROR;
4363         }
4364     }
4365     if (clp->bpt < 1) {
4366         pr2serr("bpt must be greater than 0\n");
4367         return SG_LIB_SYNTAX_ERROR;
4368     }
4369     if (clp->in_flags.mmap && clp->out_flags.mmap) {
4370         pr2serr("mmap flag on both IFILE and OFILE doesn't work\n");
4371         return SG_LIB_SYNTAX_ERROR;
4372     }
4373     if (! clp->noshare) {
4374         if (clp->in_flags.noshare || clp->out_flags.noshare)
4375             clp->noshare = true;
4376     }
4377     if (clp->unshare) {
4378         if (clp->in_flags.no_unshare || clp->out_flags.no_unshare)
4379             clp->unshare = false;
4380     }
4381     if (clp->out_flags.mmap && ! clp->noshare) {
4382         pr2serr("oflag=mmap needs either noshare=1\n");
4383         return SG_LIB_SYNTAX_ERROR;
4384     }
4385     if ((clp->in_flags.mmap || clp->out_flags.mmap) &&
4386         (clp->in_flags.same_fds || clp->out_flags.same_fds)) {
4387         pr2serr("can't have both 'mmap' and 'same_fds' flags\n");
4388         return SG_LIB_SYNTAX_ERROR;
4389     }
4390     if ((! clp->noshare) && (clp->in_flags.dio || clp->out_flags.dio)) {
4391         pr2serr("dio flag can only be used with noshare=1\n");
4392         return SG_LIB_SYNTAX_ERROR;
4393     }
4394     if (clp->nmrqs > 0) {
4395         if (clp->in_flags.mrq_immed || clp->out_flags.mrq_immed)
4396             clp->mrq_async = true;
4397     }
4398     /* defaulting transfer size to 128*2048 for CD/DVDs is too large
4399        for the block layer in lk 2.6 and results in an EIO on the
4400        SG_IO ioctl. So reduce it in that case. */
4401     if ((clp->bs >= 2048) && (! bpt_given))
4402         clp->bpt = DEF_BLOCKS_PER_2048TRANSFER;
4403     if (clp->ofsplit >= clp->bpt) {
4404         pr2serr("ofsplit when given must be less than BPT\n");
4405         return SG_LIB_SYNTAX_ERROR;
4406     }
4407     if ((num_threads < 1) || (num_threads > MAX_NUM_THREADS)) {
4408         pr2serr("too few or too many threads requested\n");
4409         usage(1);
4410         return SG_LIB_SYNTAX_ERROR;
4411     }
4412     if (clp->in_flags.swait || clp->out_flags.swait) {
4413         if (clp->verbose)
4414             pr2serr("the 'swait' flag is now ignored\n");
4415         /* remnants ... */
4416         if (clp->in_flags.swait && (! clp->out_flags.swait))
4417             clp->out_flags.swait = true;
4418     }
4419     clp->unit_nanosec = (do_time > 1) || !!getenv("SG3_UTILS_LINUX_NANO");
4420 #if 0
4421     if (clp->verbose) {
4422         pr2serr("%sif=%s skip=%" PRId64 " of=%s seek=%" PRId64 " count=%"
4423                 PRId64, my_name, inf, clp->skip, outf, clp->seek, dd_count);
4424         if (clp->nmrqs > 0)
4425             pr2serr(" mrq=%d%s\n", clp->nmrqs, (clp->mrq_cmds ? ",C" : ""));
4426         else
4427             pr2serr("\n");
4428     }
4429 #endif
4430     return 0;
4431 }
4432 
4433 
4434 int
main(int argc,char * argv[])4435 main(int argc, char * argv[])
4436 {
4437     char inf[INOUTF_SZ];
4438     char outf[INOUTF_SZ];
4439     char out2f[INOUTF_SZ];
4440     char outregf[INOUTF_SZ];
4441     int res, k, err;
4442     int64_t in_num_sect = 0;
4443     int64_t out_num_sect = 0;
4444     int in_sect_sz, out_sect_sz, status, flags;
4445     void * vp;
4446     const char * ccp = NULL;
4447     const char * cc2p;
4448     struct global_collection * clp = &gcoll;
4449     Thread_info thread_arr[MAX_NUM_THREADS] {};
4450     char ebuff[EBUFF_SZ];
4451 #if SG_LIB_ANDROID
4452     struct sigaction actions;
4453 
4454     memset(&actions, 0, sizeof(actions));
4455     sigemptyset(&actions.sa_mask);
4456     actions.sa_flags = 0;
4457     actions.sa_handler = thread_exit_handler;
4458     sigaction(SIGUSR1, &actions, NULL);
4459     sigaction(SIGUSR2, &actions, NULL);
4460 #endif
4461     /* memset(clp, 0, sizeof(*clp)); */
4462     clp->bpt = DEF_BLOCKS_PER_TRANSFER;
4463     clp->cmd_timeout = DEF_TIMEOUT;
4464     clp->in_type = FT_OTHER;
4465     /* change dd's default: if of=OFILE not given, assume /dev/null */
4466     clp->out_type = FT_DEV_NULL;
4467     clp->out2_type = FT_DEV_NULL;
4468     clp->cdbsz_in = DEF_SCSI_CDBSZ;
4469     clp->cdbsz_out = DEF_SCSI_CDBSZ;
4470     clp->sdt_ict = DEF_SDT_ICT_MS;
4471     clp->sdt_crt = DEF_SDT_CRT_SEC;
4472     clp->nmrqs = DEF_NUM_MRQS;
4473     clp->unshare = true;
4474     inf[0] = '\0';
4475     outf[0] = '\0';
4476     out2f[0] = '\0';
4477     outregf[0] = '\0';
4478     fetch_sg_version();
4479     if (sg_version >= 40045)
4480         sg_version_ge_40045 = true;
4481 
4482     res = parse_cmdline_sanity(argc, argv, clp, inf, outf, out2f, outregf);
4483     if (SG_LIB_OK_FALSE == res)
4484         return 0;
4485     if (res)
4486         return res;
4487     if (sg_version > 40000) {
4488         if (! clp->in_flags.v3)
4489             clp->in_flags.v4 = true;
4490         if (! clp->out_flags.v3)
4491             clp->out_flags.v4 = true;
4492     }
4493 
4494     install_handler(SIGINT, interrupt_handler);
4495     install_handler(SIGQUIT, interrupt_handler);
4496     install_handler(SIGPIPE, interrupt_handler);
4497     install_handler(SIGUSR1, siginfo_handler);
4498     install_handler(SIGUSR2, siginfo2_handler);
4499 
4500     clp->infd = STDIN_FILENO;
4501     clp->outfd = STDOUT_FILENO;
4502     if (clp->in_flags.ff && clp->in_flags.zero) {
4503         ccp = "<addr_as_data>";
4504         cc2p = "addr_as_data";
4505     } else if (clp->in_flags.ff) {
4506         ccp = "<0xff bytes>";
4507         cc2p = "ff";
4508     } else if (clp->in_flags.random) {
4509         ccp = "<random>";
4510         cc2p = "random";
4511     } else if (clp->in_flags.zero) {
4512         ccp = "<zero bytes>";
4513         cc2p = "00";
4514     }
4515     if (ccp) {
4516         if (inf[0]) {
4517             pr2serr("%siflag=%s and if=%s contradict\n", my_name, cc2p, inf);
4518             return SG_LIB_CONTRADICT;
4519         }
4520         clp->in_type = FT_RANDOM_0_FF;
4521         clp->infp = ccp;
4522         clp->infd = -1;
4523     } else if (inf[0] && ('-' != inf[0])) {
4524         clp->in_type = dd_filetype(inf, clp->in_st_size);
4525 
4526         if (FT_ERROR == clp->in_type) {
4527             pr2serr("%sunable to access %s\n", my_name, inf);
4528             return SG_LIB_FILE_ERROR;
4529         } else if (FT_ST == clp->in_type) {
4530             pr2serr("%sunable to use scsi tape device %s\n", my_name, inf);
4531             return SG_LIB_FILE_ERROR;
4532         } else if (FT_CHAR == clp->in_type) {
4533             pr2serr("%sunable to use unknown char device %s\n", my_name, inf);
4534             return SG_LIB_FILE_ERROR;
4535         } else if (FT_SG == clp->in_type) {
4536             clp->infd = sg_in_open(clp, inf, NULL, NULL);
4537             if (clp->verbose > 2)
4538                 pr2serr("using sg v%c interface on %s\n",
4539                         (clp->in_flags.v4 ? '4' : '3'), inf);
4540             if (clp->infd < 0)
4541                 return -clp->infd;
4542         } else {
4543             flags = O_RDONLY;
4544             if (clp->in_flags.direct)
4545                 flags |= O_DIRECT;
4546             if (clp->in_flags.excl)
4547                 flags |= O_EXCL;
4548             if (clp->in_flags.dsync)
4549                 flags |= O_SYNC;
4550 
4551             if ((clp->infd = open(inf, flags)) < 0) {
4552                 err = errno;
4553                 snprintf(ebuff, EBUFF_SZ, "%scould not open %s for reading",
4554                          my_name, inf);
4555                 perror(ebuff);
4556                 return sg_convert_errno(err);
4557             } else if (clp->skip > 0) {
4558                 off64_t offset = clp->skip;
4559 
4560                 offset *= clp->bs;       /* could exceed 32 here! */
4561                 if (lseek64(clp->infd, offset, SEEK_SET) < 0) {
4562                     err = errno;
4563                     snprintf(ebuff, EBUFF_SZ, "%scouldn't skip to required "
4564                              "position on %s", my_name, inf);
4565                     perror(ebuff);
4566                     return sg_convert_errno(err);
4567                 }
4568             }
4569         }
4570         clp->infp = inf;
4571         if ((clp->in_flags.v3 || clp->in_flags.v4_given) &&
4572             (FT_SG != clp->in_type)) {
4573             clp->in_flags.v3 = false;
4574             clp->in_flags.v4 = false;
4575             pr2serr("%siflag= v3 and v4 both ignored when IFILE is not sg "
4576                     "device\n", my_name);
4577         }
4578     }
4579     if (clp->verbose && (clp->in_flags.no_waitq || clp->out_flags.no_waitq))
4580         pr2serr("no_waitq: flag no longer does anything\n");
4581     if (outf[0])
4582         clp->ofile_given = true;
4583     if (outf[0] && ('-' != outf[0])) {
4584         clp->out_type = dd_filetype(outf, clp->out_st_size);
4585 
4586         if ((FT_SG != clp->out_type) && clp->verify) {
4587             pr2serr("%s --verify only supported by sg OFILEs\n", my_name);
4588             return SG_LIB_FILE_ERROR;
4589         } else if (FT_ST == clp->out_type) {
4590             pr2serr("%sunable to use scsi tape device %s\n", my_name, outf);
4591             return SG_LIB_FILE_ERROR;
4592         } else if (FT_CHAR == clp->out_type) {
4593             pr2serr("%sunable to use unknown char device %s\n", my_name, outf);
4594             return SG_LIB_FILE_ERROR;
4595         } else if (FT_SG == clp->out_type) {
4596             clp->outfd = sg_out_open(clp, outf, NULL, NULL);
4597             if (clp->verbose > 2)
4598                 pr2serr("using sg v%c interface on %s\n",
4599                         (clp->out_flags.v4 ? '4' : '3'), outf);
4600             if (clp->outfd < 0)
4601                 return -clp->outfd;
4602         } else if (FT_DEV_NULL == clp->out_type)
4603             clp->outfd = -1; /* don't bother opening */
4604         else {
4605             flags = O_WRONLY;
4606             if (! clp->out_flags.nocreat)
4607                 flags |= O_CREAT;
4608             if (clp->out_flags.direct)
4609                 flags |= O_DIRECT;
4610             if (clp->out_flags.excl)
4611                 flags |= O_EXCL;
4612             if (clp->out_flags.dsync)
4613                 flags |= O_SYNC;
4614             if (clp->out_flags.append)
4615                 flags |= O_APPEND;
4616 
4617             if ((clp->outfd = open(outf, flags, 0666)) < 0) {
4618                 err = errno;
4619                 snprintf(ebuff, EBUFF_SZ, "%scould not open %s for "
4620                          "writing", my_name, outf);
4621                 perror(ebuff);
4622                 return sg_convert_errno(err);
4623             }
4624             if (clp->seek > 0) {
4625                 off64_t offset = clp->seek;
4626 
4627                 offset *= clp->bs;       /* could exceed 32 bits here! */
4628                 if (lseek64(clp->outfd, offset, SEEK_SET) < 0) {
4629                     err = errno;
4630                     snprintf(ebuff, EBUFF_SZ, "%scouldn't seek to required "
4631                              "position on %s", my_name, outf);
4632                     perror(ebuff);
4633                     return sg_convert_errno(err);
4634                 }
4635             }
4636         }
4637         clp->outfp = outf;
4638         if ((clp->out_flags.v3 || clp->out_flags.v4_given) &&
4639             (FT_SG != clp->out_type)) {
4640             clp->out_flags.v3 = false;
4641             clp->out_flags.v4 = false;
4642             pr2serr("%soflag= v3 and v4 both ignored when OFILE is not sg "
4643                     "device\n", my_name);
4644         }
4645     }
4646 
4647     if (out2f[0])
4648         clp->ofile2_given = true;
4649     if (out2f[0] && ('-' != out2f[0])) {
4650         off_t out2_st_size;
4651 
4652         clp->out2_type = dd_filetype(out2f, out2_st_size);
4653         if (FT_ST == clp->out2_type) {
4654             pr2serr("%sunable to use scsi tape device %s\n", my_name, out2f);
4655             return SG_LIB_FILE_ERROR;
4656         }
4657         else if (FT_SG == clp->out2_type) {
4658             clp->out2fd = sg_out_open(clp, out2f, NULL, NULL);
4659             if (clp->out2fd < 0)
4660                 return -clp->out2fd;
4661         }
4662         else if (FT_DEV_NULL == clp->out2_type)
4663             clp->out2fd = -1; /* don't bother opening */
4664         else {
4665             flags = O_WRONLY;
4666             if (! clp->out_flags.nocreat)
4667                 flags |= O_CREAT;
4668             if (clp->out_flags.direct)
4669                 flags |= O_DIRECT;
4670             if (clp->out_flags.excl)
4671                 flags |= O_EXCL;
4672             if (clp->out_flags.dsync)
4673                 flags |= O_SYNC;
4674             if (clp->out_flags.append)
4675                 flags |= O_APPEND;
4676 
4677             if ((clp->out2fd = open(out2f, flags, 0666)) < 0) {
4678                 err = errno;
4679                 snprintf(ebuff, EBUFF_SZ, "%scould not open %s for "
4680                          "writing", my_name, out2f);
4681                 perror(ebuff);
4682                 return sg_convert_errno(err);
4683             }
4684             if (clp->seek > 0) {
4685                 off64_t offset = clp->seek;
4686 
4687                 offset *= clp->bs;       /* could exceed 32 bits here! */
4688                 if (lseek64(clp->out2fd, offset, SEEK_SET) < 0) {
4689                     err = errno;
4690                     snprintf(ebuff, EBUFF_SZ, "%scouldn't seek to required "
4691                              "position on %s", my_name, out2f);
4692                     perror(ebuff);
4693                     return sg_convert_errno(err);
4694                 }
4695             }
4696         }
4697         clp->out2fp = out2f;
4698     }
4699     if ((FT_SG == clp->in_type ) && (FT_SG == clp->out_type)) {
4700         if (clp->nmrqs > 0) {
4701             if (clp->is_mrq_i == clp->is_mrq_o) {
4702                 if (clp->ofsplit > 0) {
4703                     if (0 != (clp->nmrqs % 3)) {
4704                         pr2serr("When both IFILE+OFILE sg devices and OSP>0, "
4705                                 "mrq=NRQS must be divisible by 3\n");
4706                         pr2serr("    triple NRQS to avoid error\n");
4707                         clp->nmrqs *= 3;
4708                     }
4709                 } else if (0 != (clp->nmrqs % 2)) {
4710                     pr2serr("When both IFILE+OFILE sg devices (and OSP=0), "
4711                             "mrq=NRQS must be even\n");
4712                     pr2serr("    double NRQS to avoid error\n");
4713                     clp->nmrqs *= 2;
4714                 }
4715             }
4716             if (clp->is_mrq_i && clp->is_mrq_o)
4717                 ;
4718             else if (clp->is_mrq_i || clp->is_mrq_o)
4719                 clp->unbalanced_mrq = true;
4720         }
4721         if (clp->in_flags.v4_given && (! clp->out_flags.v3)) {
4722             if (! clp->out_flags.v4_given) {
4723                 clp->out_flags.v4 = true;
4724                 if (clp->verbose)
4725                     pr2serr("Changing OFILE from v3 to v4, use oflag=v3 to "
4726                             "force v3\n");
4727             }
4728         }
4729         if (clp->out_flags.v4_given && (! clp->in_flags.v3)) {
4730             if (! clp->in_flags.v4_given) {
4731                 clp->in_flags.v4 = true;
4732                 if (clp->verbose)
4733                     pr2serr("Changing IFILE from v3 to v4, use iflag=v3 to "
4734                             "force v3\n");
4735             }
4736         }
4737 #if 0
4738         if (clp->mrq_async && !(clp->noshare)) {
4739             pr2serr("With mrq_immed also need noshare on sg-->sg copy\n");
4740             return SG_LIB_SYNTAX_ERROR;
4741         }
4742 #endif
4743     } else if ((FT_SG == clp->in_type ) || (FT_SG == clp->out_type)) {
4744         if (clp->nmrqs > 0)
4745             clp->unbalanced_mrq = true;
4746     }
4747     if (outregf[0]) {
4748         off_t outrf_st_size;
4749         int ftyp = dd_filetype(outregf, outrf_st_size);
4750 
4751         clp->outreg_type = ftyp;
4752         if (! ((FT_OTHER == ftyp) || (FT_ERROR == ftyp) ||
4753                (FT_DEV_NULL == ftyp))) {
4754             pr2serr("File: %s can only be regular file or pipe (or "
4755                     "/dev/null)\n", outregf);
4756             return SG_LIB_SYNTAX_ERROR;
4757         }
4758         if ((clp->outregfd = open(outregf, O_WRONLY | O_CREAT, 0666)) < 0) {
4759             err = errno;
4760             snprintf(ebuff, EBUFF_SZ, "could not open %s for writing",
4761                      outregf);
4762             perror(ebuff);
4763             return sg_convert_errno(err);
4764         }
4765         if (clp->verbose > 1)
4766             pr2serr("ofreg=%s opened okay, fd=%d\n", outregf, clp->outregfd);
4767         if (FT_ERROR == ftyp)
4768             clp->outreg_type = FT_OTHER;        /* regular file created */
4769     } else
4770         clp->outregfd = -1;
4771 
4772     if ((STDIN_FILENO == clp->infd) && (STDOUT_FILENO == clp->outfd)) {
4773         pr2serr("Won't default both IFILE to stdin _and_ OFILE to "
4774                 "/dev/null\n");
4775         pr2serr("For more information use '--help' or '-h'\n");
4776         return SG_LIB_SYNTAX_ERROR;
4777     }
4778     if (dd_count < 0) {
4779         in_num_sect = -1;
4780         if (FT_SG == clp->in_type) {
4781             res = scsi_read_capacity(clp->infd, &in_num_sect, &in_sect_sz);
4782             if (2 == res) {
4783                 pr2serr("Unit attention, media changed(in), continuing\n");
4784                 res = scsi_read_capacity(clp->infd, &in_num_sect,
4785                                          &in_sect_sz);
4786             }
4787             if (0 != res) {
4788                 if (res == SG_LIB_CAT_INVALID_OP)
4789                     pr2serr("read capacity not supported on %s\n", inf);
4790                 else if (res == SG_LIB_CAT_NOT_READY)
4791                     pr2serr("read capacity failed, %s not ready\n", inf);
4792                 else
4793                     pr2serr("Unable to read capacity on %s\n", inf);
4794                 return SG_LIB_FILE_ERROR;
4795             } else if (clp->bs != in_sect_sz) {
4796                 pr2serr(">> warning: logical block size on %s confusion: "
4797                         "bs=%d, device claims=%d\n", clp->infp, clp->bs,
4798                         in_sect_sz);
4799                 return SG_LIB_FILE_ERROR;
4800             }
4801         } else if (FT_BLOCK == clp->in_type) {
4802             if (0 != read_blkdev_capacity(clp->infd, &in_num_sect,
4803                                           &in_sect_sz)) {
4804                 pr2serr("Unable to read block capacity on %s\n", inf);
4805                 in_num_sect = -1;
4806             }
4807             if (clp->bs != in_sect_sz) {
4808                 pr2serr("logical block size on %s confusion; bs=%d, from "
4809                         "device=%d\n", inf, clp->bs, in_sect_sz);
4810                 in_num_sect = -1;
4811             }
4812         } else if (FT_OTHER == clp->in_type) {
4813             in_num_sect = clp->in_st_size / clp->bs;
4814             if (clp->in_st_size % clp->bs) {
4815                 ++in_num_sect;
4816                 pr2serr("Warning: the file size of %s is not a multiple of BS "
4817                         "[%d]\n", inf, clp->bs);
4818             }
4819         }
4820         if (in_num_sect > clp->skip)
4821             in_num_sect -= clp->skip;
4822 
4823         out_num_sect = -1;
4824         if (FT_SG == clp->out_type) {
4825             res = scsi_read_capacity(clp->outfd, &out_num_sect, &out_sect_sz);
4826             if (2 == res) {
4827                 pr2serr("Unit attention, media changed(out), continuing\n");
4828                 res = scsi_read_capacity(clp->outfd, &out_num_sect,
4829                                          &out_sect_sz);
4830             }
4831             if (0 != res) {
4832                 if (res == SG_LIB_CAT_INVALID_OP)
4833                     pr2serr("read capacity not supported on %s\n", outf);
4834                 else if (res == SG_LIB_CAT_NOT_READY)
4835                     pr2serr("read capacity failed, %s not ready\n", outf);
4836                 else
4837                     pr2serr("Unable to read capacity on %s\n", outf);
4838                 out_num_sect = -1;
4839                 return SG_LIB_FILE_ERROR;
4840             } else if (clp->bs != out_sect_sz) {
4841                 pr2serr(">> warning: logical block size on %s confusion: "
4842                         "bs=%d, device claims=%d\n", clp->outfp, clp->bs,
4843                         out_sect_sz);
4844                 return SG_LIB_FILE_ERROR;
4845             }
4846         } else if (FT_BLOCK == clp->out_type) {
4847             if (0 != read_blkdev_capacity(clp->outfd, &out_num_sect,
4848                                           &out_sect_sz)) {
4849                 pr2serr("Unable to read block capacity on %s\n", outf);
4850                 out_num_sect = -1;
4851             }
4852             if (clp->bs != out_sect_sz) {
4853                 pr2serr("logical block size on %s confusion: bs=%d, from "
4854                         "device=%d\n", outf, clp->bs, out_sect_sz);
4855                 out_num_sect = -1;
4856             }
4857         } else if (FT_OTHER == clp->out_type) {
4858             out_num_sect = clp->out_st_size / clp->bs;
4859             if (clp->out_st_size % clp->bs) {
4860                 ++out_num_sect;
4861                 pr2serr("Warning: the file size of %s is not a multiple of BS "
4862                         "[%d]\n", outf, clp->bs);
4863             }
4864         }
4865         if (out_num_sect > clp->seek)
4866             out_num_sect -= clp->seek;
4867 
4868         if (in_num_sect > 0) {
4869             if (out_num_sect > 0)
4870                 dd_count = (in_num_sect > out_num_sect) ? out_num_sect :
4871                                                           in_num_sect;
4872             else
4873                 dd_count = in_num_sect;
4874         }
4875         else
4876             dd_count = out_num_sect;
4877     }
4878     if (clp->verbose > 2)
4879         pr2serr("Start of loop, count=%" PRId64 ", in_num_sect=%" PRId64
4880                 ", out_num_sect=%" PRId64 "\n", dd_count, in_num_sect,
4881                 out_num_sect);
4882     if (dd_count < 0) {
4883         pr2serr("Couldn't calculate count, please give one\n");
4884         return SG_LIB_CAT_OTHER;
4885     }
4886     if (! clp->cdbsz_given) {
4887         if ((FT_SG == clp->in_type) && (MAX_SCSI_CDBSZ != clp->cdbsz_in) &&
4888             (((dd_count + clp->skip) > UINT_MAX) || (clp->bpt > USHRT_MAX))) {
4889             pr2serr("Note: SCSI command size increased to 16 bytes (for "
4890                     "'if')\n");
4891             clp->cdbsz_in = MAX_SCSI_CDBSZ;
4892         }
4893         if ((FT_SG == clp->out_type) && (MAX_SCSI_CDBSZ != clp->cdbsz_out) &&
4894             (((dd_count + clp->seek) > UINT_MAX) || (clp->bpt > USHRT_MAX))) {
4895             pr2serr("Note: SCSI command size increased to 16 bytes (for "
4896                     "'of')\n");
4897             clp->cdbsz_out = MAX_SCSI_CDBSZ;
4898         }
4899     }
4900 
4901     // clp->in_count = dd_count;
4902     clp->in_rem_count = dd_count;
4903     clp->out_count = dd_count;
4904     clp->out_rem_count = dd_count;
4905     clp->out_blk = clp->seek;
4906     status = pthread_mutex_init(&clp->in_mutex, NULL);
4907     if (0 != status) err_exit(status, "init in_mutex");
4908     status = pthread_mutex_init(&clp->out_mutex, NULL);
4909     if (0 != status) err_exit(status, "init out_mutex");
4910     status = pthread_mutex_init(&clp->out2_mutex, NULL);
4911     if (0 != status) err_exit(status, "init out2_mutex");
4912     status = pthread_cond_init(&clp->out_sync_cv, NULL);
4913     if (0 != status) err_exit(status, "init out_sync_cv");
4914 
4915     if (clp->dry_run > 0) {
4916         pr2serr("Due to --dry-run option, bypass copy/read\n");
4917         goto fini;
4918     }
4919     if (! clp->ofile_given)
4920         pr2serr("of=OFILE not given so only read from IFILE, to output to "
4921                 "stdout use 'of=-'\n");
4922 
4923     sigemptyset(&signal_set);
4924     sigaddset(&signal_set, SIGINT);
4925     sigaddset(&signal_set, SIGUSR2);
4926     status = pthread_sigmask(SIG_BLOCK, &signal_set, &orig_signal_set);
4927     if (0 != status) err_exit(status, "pthread_sigmask");
4928     status = pthread_create(&sig_listen_thread_id, NULL,
4929                             sig_listen_thread, (void *)clp);
4930     if (0 != status) err_exit(status, "pthread_create, sig...");
4931 
4932     if (do_time) {
4933         start_tm.tv_sec = 0;
4934         start_tm.tv_usec = 0;
4935         gettimeofday(&start_tm, NULL);
4936     }
4937 
4938 /* vvvvvvvvvvv  Start worker threads  vvvvvvvvvvvvvvvvvvvvvvvv */
4939     if ((clp->out_rem_count.load() > 0) && (num_threads > 0)) {
4940         Thread_info *tip = thread_arr + 0;
4941 
4942         tip->gcp = clp;
4943         tip->id = 0;
4944         /* Run 1 work thread to shake down infant retryable stuff */
4945         status = pthread_mutex_lock(&clp->out_mutex);
4946         if (0 != status) err_exit(status, "lock out_mutex");
4947         status = pthread_create(&tip->a_pthr, NULL, read_write_thread,
4948                                 (void *)tip);
4949         if (0 != status) err_exit(status, "pthread_create");
4950 
4951         /* wait for any broadcast */
4952         pthread_cleanup_push(cleanup_out, (void *)clp);
4953         status = pthread_cond_wait(&clp->out_sync_cv, &clp->out_mutex);
4954         if (0 != status) err_exit(status, "cond out_sync_cv");
4955         pthread_cleanup_pop(0);
4956         status = pthread_mutex_unlock(&clp->out_mutex);
4957         if (0 != status) err_exit(status, "unlock out_mutex");
4958 
4959         /* now start the rest of the threads */
4960         for (k = 1; k < num_threads; ++k) {
4961             tip = thread_arr + k;
4962             tip->gcp = clp;
4963             tip->id = k;
4964             status = pthread_create(&tip->a_pthr, NULL, read_write_thread,
4965                                     (void *)tip);
4966             if (0 != status) err_exit(status, "pthread_create");
4967         }
4968 
4969         /* now wait for worker threads to finish */
4970         for (k = 0; k < num_threads; ++k) {
4971             tip = thread_arr + k;
4972             status = pthread_join(tip->a_pthr, &vp);
4973             if (0 != status) err_exit(status, "pthread_join");
4974             if (clp->verbose > 2)
4975                 pr2serr_lk("%d <-- Worker thread terminated, vp=%s\n", k,
4976                            ((vp == clp) ? "clp" : "NULL (or !clp)"));
4977         }
4978     }   /* started worker threads and here after they have all exited */
4979 
4980     if (do_time && (start_tm.tv_sec || start_tm.tv_usec))
4981         calc_duration_throughput(0);
4982 
4983     shutting_down = true;
4984     status = pthread_join(sig_listen_thread_id, &vp);
4985     if (0 != status) err_exit(status, "pthread_join");
4986 #if 0
4987     /* pthread_cancel() has issues and is not supported in Android */
4988     status = pthread_kill(sig_listen_thread_id, SIGUSR2);
4989     if (0 != status) err_exit(status, "pthread_kill");
4990     std::this_thread::yield();      // not enough it seems
4991     {   /* allow time for SIGUSR2 signal to get through */
4992         struct timespec tspec = {0, 400000}; /* 400 usecs */
4993 
4994         nanosleep(&tspec, NULL);
4995     }
4996 #endif
4997 
4998     if (do_sync) {
4999         if (FT_SG == clp->out_type) {
5000             pr2serr_lk(">> Synchronizing cache on %s\n", outf);
5001             res = sg_ll_sync_cache_10(clp->outfd, 0, 0, 0, 0, 0, false, 0);
5002             if (SG_LIB_CAT_UNIT_ATTENTION == res) {
5003                 pr2serr_lk("Unit attention(out), continuing\n");
5004                 res = sg_ll_sync_cache_10(clp->outfd, 0, 0, 0, 0, 0, false,
5005                                           0);
5006             }
5007             if (0 != res)
5008                 pr2serr_lk("Unable to synchronize cache\n");
5009         }
5010         if (FT_SG == clp->out2_type) {
5011             pr2serr_lk(">> Synchronizing cache on %s\n", out2f);
5012             res = sg_ll_sync_cache_10(clp->out2fd, 0, 0, 0, 0, 0, false, 0);
5013             if (SG_LIB_CAT_UNIT_ATTENTION == res) {
5014                 pr2serr_lk("Unit attention(out2), continuing\n");
5015                 res = sg_ll_sync_cache_10(clp->out2fd, 0, 0, 0, 0, 0, false,
5016                                           0);
5017             }
5018             if (0 != res)
5019                 pr2serr_lk("Unable to synchronize cache (of2)\n");
5020         }
5021     }
5022 
5023 fini:
5024     if ((STDIN_FILENO != clp->infd) && (clp->infd >= 0))
5025         close(clp->infd);
5026     if ((STDOUT_FILENO != clp->outfd) && (FT_DEV_NULL != clp->out_type) &&
5027         (clp->outfd >= 0))
5028         close(clp->outfd);
5029     if ((clp->out2fd >= 0) && (STDOUT_FILENO != clp->out2fd) &&
5030         (FT_DEV_NULL != clp->out2_type))
5031         close(clp->out2fd);
5032     if ((clp->outregfd >= 0) && (STDOUT_FILENO != clp->outregfd) &&
5033         (FT_DEV_NULL != clp->outreg_type))
5034         close(clp->outregfd);
5035     res = exit_status;
5036     if ((0 != clp->out_count.load()) && (0 == clp->dry_run)) {
5037         pr2serr(">>>> Some error occurred, remaining blocks=%" PRId64 "\n",
5038                 clp->out_count.load());
5039         if (0 == res)
5040             res = SG_LIB_CAT_OTHER;
5041     }
5042     print_stats("");
5043     if (clp->dio_incomplete_count.load()) {
5044         int fd;
5045         char c;
5046 
5047         pr2serr(">> Direct IO requested but incomplete %d times\n",
5048                 clp->dio_incomplete_count.load());
5049         if ((fd = open(sg_allow_dio, O_RDONLY)) >= 0) {
5050             if (1 == read(fd, &c, 1)) {
5051                 if ('0' == c)
5052                     pr2serr(">>> %s set to '0' but should be set to '1' for "
5053                             "direct IO\n", sg_allow_dio);
5054             }
5055             close(fd);
5056         }
5057     }
5058     if (clp->sum_of_resids.load())
5059         pr2serr(">> Non-zero sum of residual counts=%d\n",
5060                clp->sum_of_resids.load());
5061     if (clp->verbose && (num_start_eagain > 0))
5062         pr2serr("Number of start EAGAINs: %d\n", num_start_eagain.load());
5063     if (clp->verbose && (num_fin_eagain > 0))
5064         pr2serr("Number of finish EAGAINs: %d\n", num_fin_eagain.load());
5065     if (clp->verbose && (num_ebusy > 0))
5066         pr2serr("Number of EBUSYs: %d\n", num_ebusy.load());
5067     if (clp->verbose && clp->aen_given && (num_abort_req > 0)) {
5068         pr2serr("Number of Aborts: %d\n", num_abort_req.load());
5069         pr2serr("Number of successful Aborts: %d\n",
5070                 num_abort_req_success.load());
5071     }
5072     if (clp->verbose && clp->m_aen_given && (num_mrq_abort_req > 0)) {
5073         pr2serr("Number of MRQ Aborts: %d\n", num_mrq_abort_req.load());
5074         pr2serr("Number of successful MRQ Aborts: %d\n",
5075                 num_mrq_abort_req_success.load());
5076     }
5077     if (clp->verbose && (num_miscompare > 0))
5078         pr2serr("Number of miscompare%s: %d\n",
5079                 (num_miscompare > 1) ? "s" : "", num_miscompare.load());
5080     if (clp->verbose > 1) {
5081         if (clp->verbose > 3)
5082             pr2serr("Final pack_id=%d, mrq_id=%d\n", mono_pack_id.load(),
5083                     mono_mrq_id.load());
5084         pr2serr("Number of SG_GET_NUM_WAITING calls=%ld\n",
5085                 num_waiting_calls.load());
5086     }
5087     if (clp->verify && (SG_LIB_CAT_MISCOMPARE == res))
5088         pr2serr("Verify/compare failed due to miscompare\n");
5089     return (res >= 0) ? res : SG_LIB_CAT_OTHER;
5090 }
5091