1*30b9430bSXin Li /*
2*30b9430bSXin Li * unbwt.c for libdivsufsort
3*30b9430bSXin Li * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4*30b9430bSXin Li *
5*30b9430bSXin Li * Permission is hereby granted, free of charge, to any person
6*30b9430bSXin Li * obtaining a copy of this software and associated documentation
7*30b9430bSXin Li * files (the "Software"), to deal in the Software without
8*30b9430bSXin Li * restriction, including without limitation the rights to use,
9*30b9430bSXin Li * copy, modify, merge, publish, distribute, sublicense, and/or sell
10*30b9430bSXin Li * copies of the Software, and to permit persons to whom the
11*30b9430bSXin Li * Software is furnished to do so, subject to the following
12*30b9430bSXin Li * conditions:
13*30b9430bSXin Li *
14*30b9430bSXin Li * The above copyright notice and this permission notice shall be
15*30b9430bSXin Li * included in all copies or substantial portions of the Software.
16*30b9430bSXin Li *
17*30b9430bSXin Li * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18*30b9430bSXin Li * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19*30b9430bSXin Li * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20*30b9430bSXin Li * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21*30b9430bSXin Li * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22*30b9430bSXin Li * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23*30b9430bSXin Li * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24*30b9430bSXin Li * OTHER DEALINGS IN THE SOFTWARE.
25*30b9430bSXin Li */
26*30b9430bSXin Li
27*30b9430bSXin Li #if HAVE_CONFIG_H
28*30b9430bSXin Li # include "config.h"
29*30b9430bSXin Li #endif
30*30b9430bSXin Li #include <stdio.h>
31*30b9430bSXin Li #if HAVE_STRING_H
32*30b9430bSXin Li # include <string.h>
33*30b9430bSXin Li #endif
34*30b9430bSXin Li #if HAVE_STDLIB_H
35*30b9430bSXin Li # include <stdlib.h>
36*30b9430bSXin Li #endif
37*30b9430bSXin Li #if HAVE_MEMORY_H
38*30b9430bSXin Li # include <memory.h>
39*30b9430bSXin Li #endif
40*30b9430bSXin Li #if HAVE_STDDEF_H
41*30b9430bSXin Li # include <stddef.h>
42*30b9430bSXin Li #endif
43*30b9430bSXin Li #if HAVE_STRINGS_H
44*30b9430bSXin Li # include <strings.h>
45*30b9430bSXin Li #endif
46*30b9430bSXin Li #if HAVE_SYS_TYPES_H
47*30b9430bSXin Li # include <sys/types.h>
48*30b9430bSXin Li #endif
49*30b9430bSXin Li #if HAVE_IO_H && HAVE_FCNTL_H
50*30b9430bSXin Li # include <io.h>
51*30b9430bSXin Li # include <fcntl.h>
52*30b9430bSXin Li #endif
53*30b9430bSXin Li #include <time.h>
54*30b9430bSXin Li #include <divsufsort.h>
55*30b9430bSXin Li #include "lfs.h"
56*30b9430bSXin Li
57*30b9430bSXin Li
58*30b9430bSXin Li static
59*30b9430bSXin Li size_t
read_int(FILE * fp,saidx_t * n)60*30b9430bSXin Li read_int(FILE *fp, saidx_t *n) {
61*30b9430bSXin Li unsigned char c[4];
62*30b9430bSXin Li size_t m = fread(c, sizeof(unsigned char), 4, fp);
63*30b9430bSXin Li if(m == 4) {
64*30b9430bSXin Li *n = (c[0] << 0) | (c[1] << 8) |
65*30b9430bSXin Li (c[2] << 16) | (c[3] << 24);
66*30b9430bSXin Li }
67*30b9430bSXin Li return m;
68*30b9430bSXin Li }
69*30b9430bSXin Li
70*30b9430bSXin Li static
71*30b9430bSXin Li void
print_help(const char * progname,int status)72*30b9430bSXin Li print_help(const char *progname, int status) {
73*30b9430bSXin Li fprintf(stderr,
74*30b9430bSXin Li "unbwt, an inverse burrows-wheeler transform program, version %s.\n",
75*30b9430bSXin Li divsufsort_version());
76*30b9430bSXin Li fprintf(stderr, "usage: %s INFILE OUTFILE\n\n", progname);
77*30b9430bSXin Li exit(status);
78*30b9430bSXin Li }
79*30b9430bSXin Li
80*30b9430bSXin Li int
main(int argc,const char * argv[])81*30b9430bSXin Li main(int argc, const char *argv[]) {
82*30b9430bSXin Li FILE *fp, *ofp;
83*30b9430bSXin Li const char *fname, *ofname;
84*30b9430bSXin Li sauchar_t *T;
85*30b9430bSXin Li saidx_t *A;
86*30b9430bSXin Li LFS_OFF_T n;
87*30b9430bSXin Li size_t m;
88*30b9430bSXin Li saidx_t pidx;
89*30b9430bSXin Li clock_t start, finish;
90*30b9430bSXin Li saint_t err, blocksize, needclose = 3;
91*30b9430bSXin Li
92*30b9430bSXin Li /* Check arguments. */
93*30b9430bSXin Li if((argc == 1) ||
94*30b9430bSXin Li (strcmp(argv[1], "-h") == 0) ||
95*30b9430bSXin Li (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
96*30b9430bSXin Li if(argc != 3) { print_help(argv[0], EXIT_FAILURE); }
97*30b9430bSXin Li
98*30b9430bSXin Li /* Open a file for reading. */
99*30b9430bSXin Li if(strcmp(argv[1], "-") != 0) {
100*30b9430bSXin Li #if HAVE_FOPEN_S
101*30b9430bSXin Li if(fopen_s(&fp, fname = argv[1], "rb") != 0) {
102*30b9430bSXin Li #else
103*30b9430bSXin Li if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) {
104*30b9430bSXin Li #endif
105*30b9430bSXin Li fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
106*30b9430bSXin Li perror(NULL);
107*30b9430bSXin Li exit(EXIT_FAILURE);
108*30b9430bSXin Li }
109*30b9430bSXin Li } else {
110*30b9430bSXin Li #if HAVE__SETMODE && HAVE__FILENO
111*30b9430bSXin Li if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
112*30b9430bSXin Li fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
113*30b9430bSXin Li perror(NULL);
114*30b9430bSXin Li exit(EXIT_FAILURE);
115*30b9430bSXin Li }
116*30b9430bSXin Li #endif
117*30b9430bSXin Li fp = stdin;
118*30b9430bSXin Li fname = "stdin";
119*30b9430bSXin Li needclose ^= 1;
120*30b9430bSXin Li }
121*30b9430bSXin Li
122*30b9430bSXin Li /* Open a file for writing. */
123*30b9430bSXin Li if(strcmp(argv[2], "-") != 0) {
124*30b9430bSXin Li #if HAVE_FOPEN_S
125*30b9430bSXin Li if(fopen_s(&ofp, ofname = argv[2], "wb") != 0) {
126*30b9430bSXin Li #else
127*30b9430bSXin Li if((ofp = LFS_FOPEN(ofname = argv[2], "wb")) == NULL) {
128*30b9430bSXin Li #endif
129*30b9430bSXin Li fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname);
130*30b9430bSXin Li perror(NULL);
131*30b9430bSXin Li exit(EXIT_FAILURE);
132*30b9430bSXin Li }
133*30b9430bSXin Li } else {
134*30b9430bSXin Li #if HAVE__SETMODE && HAVE__FILENO
135*30b9430bSXin Li if(_setmode(_fileno(stdout), _O_BINARY) == -1) {
136*30b9430bSXin Li fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
137*30b9430bSXin Li perror(NULL);
138*30b9430bSXin Li exit(EXIT_FAILURE);
139*30b9430bSXin Li }
140*30b9430bSXin Li #endif
141*30b9430bSXin Li ofp = stdout;
142*30b9430bSXin Li ofname = "stdout";
143*30b9430bSXin Li needclose ^= 2;
144*30b9430bSXin Li }
145*30b9430bSXin Li
146*30b9430bSXin Li /* Read the blocksize. */
147*30b9430bSXin Li if(read_int(fp, &blocksize) != 4) {
148*30b9430bSXin Li fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname);
149*30b9430bSXin Li perror(NULL);
150*30b9430bSXin Li exit(EXIT_FAILURE);
151*30b9430bSXin Li }
152*30b9430bSXin Li
153*30b9430bSXin Li /* Allocate 5blocksize bytes of memory. */
154*30b9430bSXin Li T = (sauchar_t *)malloc(blocksize * sizeof(sauchar_t));
155*30b9430bSXin Li A = (saidx_t *)malloc(blocksize * sizeof(saidx_t));
156*30b9430bSXin Li if((T == NULL) || (A == NULL)) {
157*30b9430bSXin Li fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
158*30b9430bSXin Li exit(EXIT_FAILURE);
159*30b9430bSXin Li }
160*30b9430bSXin Li
161*30b9430bSXin Li fprintf(stderr, "UnBWT (blocksize %" PRIdSAINT_T ") ... ", blocksize);
162*30b9430bSXin Li start = clock();
163*30b9430bSXin Li for(n = 0; (m = read_int(fp, &pidx)) != 0; n += m) {
164*30b9430bSXin Li /* Read blocksize bytes of data. */
165*30b9430bSXin Li if((m != 4) || ((m = fread(T, sizeof(sauchar_t), blocksize, fp)) == 0)) {
166*30b9430bSXin Li fprintf(stderr, "%s: %s `%s': ",
167*30b9430bSXin Li argv[0],
168*30b9430bSXin Li (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
169*30b9430bSXin Li fname);
170*30b9430bSXin Li perror(NULL);
171*30b9430bSXin Li exit(EXIT_FAILURE);
172*30b9430bSXin Li }
173*30b9430bSXin Li
174*30b9430bSXin Li /* Inverse Burrows-Wheeler Transform. */
175*30b9430bSXin Li if((err = inverse_bw_transform(T, T, A, m, pidx)) != 0) {
176*30b9430bSXin Li fprintf(stderr, "%s (reverseBWT): %s.\n",
177*30b9430bSXin Li argv[0],
178*30b9430bSXin Li (err == -1) ? "Invalid data" : "Cannot allocate memory");
179*30b9430bSXin Li exit(EXIT_FAILURE);
180*30b9430bSXin Li }
181*30b9430bSXin Li
182*30b9430bSXin Li /* Write m bytes of data. */
183*30b9430bSXin Li if(fwrite(T, sizeof(sauchar_t), m, ofp) != m) {
184*30b9430bSXin Li fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
185*30b9430bSXin Li perror(NULL);
186*30b9430bSXin Li exit(EXIT_FAILURE);
187*30b9430bSXin Li }
188*30b9430bSXin Li }
189*30b9430bSXin Li if(ferror(fp)) {
190*30b9430bSXin Li fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname);
191*30b9430bSXin Li perror(NULL);
192*30b9430bSXin Li exit(EXIT_FAILURE);
193*30b9430bSXin Li }
194*30b9430bSXin Li finish = clock();
195*30b9430bSXin Li fprintf(stderr, "%" PRIdOFF_T " bytes: %.4f sec\n",
196*30b9430bSXin Li n, (double)(finish - start) / (double)CLOCKS_PER_SEC);
197*30b9430bSXin Li
198*30b9430bSXin Li /* Close files */
199*30b9430bSXin Li if(needclose & 1) { fclose(fp); }
200*30b9430bSXin Li if(needclose & 2) { fclose(ofp); }
201*30b9430bSXin Li
202*30b9430bSXin Li /* Deallocate memory. */
203*30b9430bSXin Li free(A);
204*30b9430bSXin Li free(T);
205*30b9430bSXin Li
206*30b9430bSXin Li return 0;
207*30b9430bSXin Li }
208