1 /* csv module */
2 
3 /*
4 
5 This module provides the low-level underpinnings of a CSV reading/writing
6 module.  Users should not use this module directly, but import the csv.py
7 module instead.
8 
9 */
10 
11 #define MODULE_VERSION "1.0"
12 
13 #include "Python.h"
14 #include "structmember.h"         // PyMemberDef
15 #include <stdbool.h>
16 
17 /*[clinic input]
18 module _csv
19 [clinic start generated code]*/
20 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=385118b71aa43706]*/
21 
22 #include "clinic/_csv.c.h"
23 #define NOT_SET ((Py_UCS4)-1)
24 #define EOL ((Py_UCS4)-2)
25 
26 
27 typedef struct {
28     PyObject *error_obj;   /* CSV exception */
29     PyObject *dialects;   /* Dialect registry */
30     PyTypeObject *dialect_type;
31     PyTypeObject *reader_type;
32     PyTypeObject *writer_type;
33     long field_limit;   /* max parsed field size */
34     PyObject *str_write;
35 } _csvstate;
36 
37 static struct PyModuleDef _csvmodule;
38 
39 static inline _csvstate*
get_csv_state(PyObject * module)40 get_csv_state(PyObject *module)
41 {
42     void *state = PyModule_GetState(module);
43     assert(state != NULL);
44     return (_csvstate *)state;
45 }
46 
47 static int
_csv_clear(PyObject * module)48 _csv_clear(PyObject *module)
49 {
50     _csvstate *module_state = PyModule_GetState(module);
51     Py_CLEAR(module_state->error_obj);
52     Py_CLEAR(module_state->dialects);
53     Py_CLEAR(module_state->dialect_type);
54     Py_CLEAR(module_state->reader_type);
55     Py_CLEAR(module_state->writer_type);
56     Py_CLEAR(module_state->str_write);
57     return 0;
58 }
59 
60 static int
_csv_traverse(PyObject * module,visitproc visit,void * arg)61 _csv_traverse(PyObject *module, visitproc visit, void *arg)
62 {
63     _csvstate *module_state = PyModule_GetState(module);
64     Py_VISIT(module_state->error_obj);
65     Py_VISIT(module_state->dialects);
66     Py_VISIT(module_state->dialect_type);
67     Py_VISIT(module_state->reader_type);
68     Py_VISIT(module_state->writer_type);
69     return 0;
70 }
71 
72 static void
_csv_free(void * module)73 _csv_free(void *module)
74 {
75    _csv_clear((PyObject *)module);
76 }
77 
78 typedef enum {
79     START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
80     IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
81     EAT_CRNL,AFTER_ESCAPED_CRNL
82 } ParserState;
83 
84 typedef enum {
85     QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
86 } QuoteStyle;
87 
88 typedef struct {
89     QuoteStyle style;
90     const char *name;
91 } StyleDesc;
92 
93 static const StyleDesc quote_styles[] = {
94     { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
95     { QUOTE_ALL,        "QUOTE_ALL" },
96     { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
97     { QUOTE_NONE,       "QUOTE_NONE" },
98     { 0 }
99 };
100 
101 typedef struct {
102     PyObject_HEAD
103 
104     char doublequote;           /* is " represented by ""? */
105     char skipinitialspace;      /* ignore spaces following delimiter? */
106     char strict;                /* raise exception on bad CSV */
107     int quoting;                /* style of quoting to write */
108     Py_UCS4 delimiter;          /* field separator */
109     Py_UCS4 quotechar;          /* quote character */
110     Py_UCS4 escapechar;         /* escape character */
111     PyObject *lineterminator;   /* string to write between records */
112 
113 } DialectObj;
114 
115 typedef struct {
116     PyObject_HEAD
117 
118     PyObject *input_iter;   /* iterate over this for input lines */
119 
120     DialectObj *dialect;    /* parsing dialect */
121 
122     PyObject *fields;           /* field list for current record */
123     ParserState state;          /* current CSV parse state */
124     Py_UCS4 *field;             /* temporary buffer */
125     Py_ssize_t field_size;      /* size of allocated buffer */
126     Py_ssize_t field_len;       /* length of current field */
127     int numeric_field;          /* treat field as numeric */
128     unsigned long line_num;     /* Source-file line number */
129 } ReaderObj;
130 
131 typedef struct {
132     PyObject_HEAD
133 
134     PyObject *write;    /* write output lines to this file */
135 
136     DialectObj *dialect;    /* parsing dialect */
137 
138     Py_UCS4 *rec;            /* buffer for parser.join */
139     Py_ssize_t rec_size;        /* size of allocated record */
140     Py_ssize_t rec_len;         /* length of record */
141     int num_fields;             /* number of fields in record */
142 
143     PyObject *error_obj;       /* cached error object */
144 } WriterObj;
145 
146 /*
147  * DIALECT class
148  */
149 
150 static PyObject *
get_dialect_from_registry(PyObject * name_obj,_csvstate * module_state)151 get_dialect_from_registry(PyObject *name_obj, _csvstate *module_state)
152 {
153     PyObject *dialect_obj;
154 
155     dialect_obj = PyDict_GetItemWithError(module_state->dialects, name_obj);
156     if (dialect_obj == NULL) {
157         if (!PyErr_Occurred())
158             PyErr_Format(module_state->error_obj, "unknown dialect");
159     }
160     else
161         Py_INCREF(dialect_obj);
162 
163     return dialect_obj;
164 }
165 
166 static PyObject *
get_char_or_None(Py_UCS4 c)167 get_char_or_None(Py_UCS4 c)
168 {
169     if (c == NOT_SET) {
170         Py_RETURN_NONE;
171     }
172     else
173         return PyUnicode_FromOrdinal(c);
174 }
175 
176 static PyObject *
Dialect_get_lineterminator(DialectObj * self,void * Py_UNUSED (ignored))177 Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
178 {
179     Py_XINCREF(self->lineterminator);
180     return self->lineterminator;
181 }
182 
183 static PyObject *
Dialect_get_delimiter(DialectObj * self,void * Py_UNUSED (ignored))184 Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
185 {
186     return get_char_or_None(self->delimiter);
187 }
188 
189 static PyObject *
Dialect_get_escapechar(DialectObj * self,void * Py_UNUSED (ignored))190 Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
191 {
192     return get_char_or_None(self->escapechar);
193 }
194 
195 static PyObject *
Dialect_get_quotechar(DialectObj * self,void * Py_UNUSED (ignored))196 Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
197 {
198     return get_char_or_None(self->quotechar);
199 }
200 
201 static PyObject *
Dialect_get_quoting(DialectObj * self,void * Py_UNUSED (ignored))202 Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
203 {
204     return PyLong_FromLong(self->quoting);
205 }
206 
207 static int
_set_bool(const char * name,char * target,PyObject * src,bool dflt)208 _set_bool(const char *name, char *target, PyObject *src, bool dflt)
209 {
210     if (src == NULL)
211         *target = dflt;
212     else {
213         int b = PyObject_IsTrue(src);
214         if (b < 0)
215             return -1;
216         *target = (char)b;
217     }
218     return 0;
219 }
220 
221 static int
_set_int(const char * name,int * target,PyObject * src,int dflt)222 _set_int(const char *name, int *target, PyObject *src, int dflt)
223 {
224     if (src == NULL)
225         *target = dflt;
226     else {
227         int value;
228         if (!PyLong_CheckExact(src)) {
229             PyErr_Format(PyExc_TypeError,
230                          "\"%s\" must be an integer", name);
231             return -1;
232         }
233         value = _PyLong_AsInt(src);
234         if (value == -1 && PyErr_Occurred()) {
235             return -1;
236         }
237         *target = value;
238     }
239     return 0;
240 }
241 
242 static int
_set_char_or_none(const char * name,Py_UCS4 * target,PyObject * src,Py_UCS4 dflt)243 _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
244 {
245     if (src == NULL) {
246         *target = dflt;
247     }
248     else {
249         *target = NOT_SET;
250         if (src != Py_None) {
251             if (!PyUnicode_Check(src)) {
252                 PyErr_Format(PyExc_TypeError,
253                     "\"%s\" must be string or None, not %.200s", name,
254                     Py_TYPE(src)->tp_name);
255                 return -1;
256             }
257             Py_ssize_t len = PyUnicode_GetLength(src);
258             if (len < 0) {
259                 return -1;
260             }
261             if (len != 1) {
262                 PyErr_Format(PyExc_TypeError,
263                     "\"%s\" must be a 1-character string",
264                     name);
265                 return -1;
266             }
267             /* PyUnicode_READY() is called in PyUnicode_GetLength() */
268             *target = PyUnicode_READ_CHAR(src, 0);
269         }
270     }
271     return 0;
272 }
273 
274 static int
_set_char(const char * name,Py_UCS4 * target,PyObject * src,Py_UCS4 dflt)275 _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
276 {
277     if (src == NULL) {
278         *target = dflt;
279     }
280     else {
281         if (!PyUnicode_Check(src)) {
282             PyErr_Format(PyExc_TypeError,
283                          "\"%s\" must be string, not %.200s", name,
284                          Py_TYPE(src)->tp_name);
285                 return -1;
286         }
287         Py_ssize_t len = PyUnicode_GetLength(src);
288         if (len < 0) {
289             return -1;
290         }
291         if (len != 1) {
292             PyErr_Format(PyExc_TypeError,
293                          "\"%s\" must be a 1-character string",
294                          name);
295             return -1;
296         }
297         /* PyUnicode_READY() is called in PyUnicode_GetLength() */
298         *target = PyUnicode_READ_CHAR(src, 0);
299     }
300     return 0;
301 }
302 
303 static int
_set_str(const char * name,PyObject ** target,PyObject * src,const char * dflt)304 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
305 {
306     if (src == NULL)
307         *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
308     else {
309         if (src == Py_None)
310             *target = NULL;
311         else if (!PyUnicode_Check(src)) {
312             PyErr_Format(PyExc_TypeError,
313                          "\"%s\" must be a string", name);
314             return -1;
315         }
316         else {
317             if (PyUnicode_READY(src) == -1)
318                 return -1;
319             Py_INCREF(src);
320             Py_XSETREF(*target, src);
321         }
322     }
323     return 0;
324 }
325 
326 static int
dialect_check_quoting(int quoting)327 dialect_check_quoting(int quoting)
328 {
329     const StyleDesc *qs;
330 
331     for (qs = quote_styles; qs->name; qs++) {
332         if ((int)qs->style == quoting)
333             return 0;
334     }
335     PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
336     return -1;
337 }
338 
339 #define D_OFF(x) offsetof(DialectObj, x)
340 
341 static struct PyMemberDef Dialect_memberlist[] = {
342     { "skipinitialspace",   T_BOOL, D_OFF(skipinitialspace), READONLY },
343     { "doublequote",        T_BOOL, D_OFF(doublequote), READONLY },
344     { "strict",             T_BOOL, D_OFF(strict), READONLY },
345     { NULL }
346 };
347 
348 static PyGetSetDef Dialect_getsetlist[] = {
349     { "delimiter",          (getter)Dialect_get_delimiter},
350     { "escapechar",             (getter)Dialect_get_escapechar},
351     { "lineterminator",         (getter)Dialect_get_lineterminator},
352     { "quotechar",              (getter)Dialect_get_quotechar},
353     { "quoting",                (getter)Dialect_get_quoting},
354     {NULL},
355 };
356 
357 static void
Dialect_dealloc(DialectObj * self)358 Dialect_dealloc(DialectObj *self)
359 {
360     PyTypeObject *tp = Py_TYPE(self);
361     PyObject_GC_UnTrack(self);
362     tp->tp_clear((PyObject *)self);
363     PyObject_GC_Del(self);
364     Py_DECREF(tp);
365 }
366 
367 static char *dialect_kws[] = {
368     "dialect",
369     "delimiter",
370     "doublequote",
371     "escapechar",
372     "lineterminator",
373     "quotechar",
374     "quoting",
375     "skipinitialspace",
376     "strict",
377     NULL
378 };
379 
380 static _csvstate *
_csv_state_from_type(PyTypeObject * type,const char * name)381 _csv_state_from_type(PyTypeObject *type, const char *name)
382 {
383     PyObject *module = PyType_GetModuleByDef(type, &_csvmodule);
384     if (module == NULL) {
385         return NULL;
386     }
387     _csvstate *module_state = PyModule_GetState(module);
388     if (module_state == NULL) {
389         PyErr_Format(PyExc_SystemError,
390                      "%s: No _csv module state found", name);
391         return NULL;
392     }
393     return module_state;
394 }
395 
396 static PyObject *
dialect_new(PyTypeObject * type,PyObject * args,PyObject * kwargs)397 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
398 {
399     DialectObj *self;
400     PyObject *ret = NULL;
401     PyObject *dialect = NULL;
402     PyObject *delimiter = NULL;
403     PyObject *doublequote = NULL;
404     PyObject *escapechar = NULL;
405     PyObject *lineterminator = NULL;
406     PyObject *quotechar = NULL;
407     PyObject *quoting = NULL;
408     PyObject *skipinitialspace = NULL;
409     PyObject *strict = NULL;
410 
411     if (!PyArg_ParseTupleAndKeywords(args, kwargs,
412                                      "|OOOOOOOOO", dialect_kws,
413                                      &dialect,
414                                      &delimiter,
415                                      &doublequote,
416                                      &escapechar,
417                                      &lineterminator,
418                                      &quotechar,
419                                      &quoting,
420                                      &skipinitialspace,
421                                      &strict))
422         return NULL;
423 
424     _csvstate *module_state = _csv_state_from_type(type, "dialect_new");
425     if (module_state == NULL) {
426         return NULL;
427     }
428 
429     if (dialect != NULL) {
430         if (PyUnicode_Check(dialect)) {
431             dialect = get_dialect_from_registry(dialect, module_state);
432             if (dialect == NULL)
433                 return NULL;
434         }
435         else
436             Py_INCREF(dialect);
437         /* Can we reuse this instance? */
438         if (PyObject_TypeCheck(dialect, module_state->dialect_type) &&
439             delimiter == NULL &&
440             doublequote == NULL &&
441             escapechar == NULL &&
442             lineterminator == NULL &&
443             quotechar == NULL &&
444             quoting == NULL &&
445             skipinitialspace == NULL &&
446             strict == NULL)
447             return dialect;
448     }
449 
450     self = (DialectObj *)type->tp_alloc(type, 0);
451     if (self == NULL) {
452         Py_CLEAR(dialect);
453         return NULL;
454     }
455     self->lineterminator = NULL;
456 
457     Py_XINCREF(delimiter);
458     Py_XINCREF(doublequote);
459     Py_XINCREF(escapechar);
460     Py_XINCREF(lineterminator);
461     Py_XINCREF(quotechar);
462     Py_XINCREF(quoting);
463     Py_XINCREF(skipinitialspace);
464     Py_XINCREF(strict);
465     if (dialect != NULL) {
466 #define DIALECT_GETATTR(v, n)                            \
467         do {                                             \
468             if (v == NULL) {                             \
469                 v = PyObject_GetAttrString(dialect, n);  \
470                 if (v == NULL)                           \
471                     PyErr_Clear();                       \
472             }                                            \
473         } while (0)
474         DIALECT_GETATTR(delimiter, "delimiter");
475         DIALECT_GETATTR(doublequote, "doublequote");
476         DIALECT_GETATTR(escapechar, "escapechar");
477         DIALECT_GETATTR(lineterminator, "lineterminator");
478         DIALECT_GETATTR(quotechar, "quotechar");
479         DIALECT_GETATTR(quoting, "quoting");
480         DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
481         DIALECT_GETATTR(strict, "strict");
482     }
483 
484     /* check types and convert to C values */
485 #define DIASET(meth, name, target, src, dflt) \
486     if (meth(name, target, src, dflt)) \
487         goto err
488     DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
489     DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
490     DIASET(_set_char_or_none, "escapechar", &self->escapechar, escapechar, NOT_SET);
491     DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
492     DIASET(_set_char_or_none, "quotechar", &self->quotechar, quotechar, '"');
493     DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
494     DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
495     DIASET(_set_bool, "strict", &self->strict, strict, false);
496 
497     /* validate options */
498     if (dialect_check_quoting(self->quoting))
499         goto err;
500     if (self->delimiter == NOT_SET) {
501         PyErr_SetString(PyExc_TypeError,
502                         "\"delimiter\" must be a 1-character string");
503         goto err;
504     }
505     if (quotechar == Py_None && quoting == NULL)
506         self->quoting = QUOTE_NONE;
507     if (self->quoting != QUOTE_NONE && self->quotechar == NOT_SET) {
508         PyErr_SetString(PyExc_TypeError,
509                         "quotechar must be set if quoting enabled");
510         goto err;
511     }
512     if (self->lineterminator == NULL) {
513         PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
514         goto err;
515     }
516 
517     ret = (PyObject *)self;
518     Py_INCREF(self);
519 err:
520     Py_CLEAR(self);
521     Py_CLEAR(dialect);
522     Py_CLEAR(delimiter);
523     Py_CLEAR(doublequote);
524     Py_CLEAR(escapechar);
525     Py_CLEAR(lineterminator);
526     Py_CLEAR(quotechar);
527     Py_CLEAR(quoting);
528     Py_CLEAR(skipinitialspace);
529     Py_CLEAR(strict);
530     return ret;
531 }
532 
533 /* Since dialect is now a heap type, it inherits pickling method for
534  * protocol 0 and 1 from object, therefore it needs to be overridden */
535 
536 PyDoc_STRVAR(dialect_reduce_doc, "raises an exception to avoid pickling");
537 
538 static PyObject *
Dialect_reduce(PyObject * self,PyObject * args)539 Dialect_reduce(PyObject *self, PyObject *args) {
540     PyErr_Format(PyExc_TypeError,
541         "cannot pickle '%.100s' instances", _PyType_Name(Py_TYPE(self)));
542     return NULL;
543 }
544 
545 static struct PyMethodDef dialect_methods[] = {
546     {"__reduce__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
547     {"__reduce_ex__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
548     {NULL, NULL}
549 };
550 
551 PyDoc_STRVAR(Dialect_Type_doc,
552 "CSV dialect\n"
553 "\n"
554 "The Dialect type records CSV parsing and generation options.\n");
555 
556 static int
Dialect_clear(DialectObj * self)557 Dialect_clear(DialectObj *self)
558 {
559     Py_CLEAR(self->lineterminator);
560     return 0;
561 }
562 
563 static int
Dialect_traverse(DialectObj * self,visitproc visit,void * arg)564 Dialect_traverse(DialectObj *self, visitproc visit, void *arg)
565 {
566     Py_VISIT(self->lineterminator);
567     Py_VISIT(Py_TYPE(self));
568     return 0;
569 }
570 
571 static PyType_Slot Dialect_Type_slots[] = {
572     {Py_tp_doc, (char*)Dialect_Type_doc},
573     {Py_tp_members, Dialect_memberlist},
574     {Py_tp_getset, Dialect_getsetlist},
575     {Py_tp_new, dialect_new},
576     {Py_tp_methods, dialect_methods},
577     {Py_tp_dealloc, Dialect_dealloc},
578     {Py_tp_clear, Dialect_clear},
579     {Py_tp_traverse, Dialect_traverse},
580     {0, NULL}
581 };
582 
583 PyType_Spec Dialect_Type_spec = {
584     .name = "_csv.Dialect",
585     .basicsize = sizeof(DialectObj),
586     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
587               Py_TPFLAGS_IMMUTABLETYPE),
588     .slots = Dialect_Type_slots,
589 };
590 
591 
592 /*
593  * Return an instance of the dialect type, given a Python instance or kwarg
594  * description of the dialect
595  */
596 static PyObject *
_call_dialect(_csvstate * module_state,PyObject * dialect_inst,PyObject * kwargs)597 _call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
598 {
599     PyObject *type = (PyObject *)module_state->dialect_type;
600     if (dialect_inst) {
601         return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
602     }
603     else {
604         return PyObject_VectorcallDict(type, NULL, 0, kwargs);
605     }
606 }
607 
608 /*
609  * READER
610  */
611 static int
parse_save_field(ReaderObj * self)612 parse_save_field(ReaderObj *self)
613 {
614     PyObject *field;
615 
616     field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
617                                       (void *) self->field, self->field_len);
618     if (field == NULL)
619         return -1;
620     self->field_len = 0;
621     if (self->numeric_field) {
622         PyObject *tmp;
623 
624         self->numeric_field = 0;
625         tmp = PyNumber_Float(field);
626         Py_DECREF(field);
627         if (tmp == NULL)
628             return -1;
629         field = tmp;
630     }
631     if (PyList_Append(self->fields, field) < 0) {
632         Py_DECREF(field);
633         return -1;
634     }
635     Py_DECREF(field);
636     return 0;
637 }
638 
639 static int
parse_grow_buff(ReaderObj * self)640 parse_grow_buff(ReaderObj *self)
641 {
642     assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
643 
644     Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
645     Py_UCS4 *field_new = self->field;
646     PyMem_Resize(field_new, Py_UCS4, field_size_new);
647     if (field_new == NULL) {
648         PyErr_NoMemory();
649         return 0;
650     }
651     self->field = field_new;
652     self->field_size = field_size_new;
653     return 1;
654 }
655 
656 static int
parse_add_char(ReaderObj * self,_csvstate * module_state,Py_UCS4 c)657 parse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
658 {
659     if (self->field_len >= module_state->field_limit) {
660         PyErr_Format(module_state->error_obj,
661                      "field larger than field limit (%ld)",
662                      module_state->field_limit);
663         return -1;
664     }
665     if (self->field_len == self->field_size && !parse_grow_buff(self))
666         return -1;
667     self->field[self->field_len++] = c;
668     return 0;
669 }
670 
671 static int
parse_process_char(ReaderObj * self,_csvstate * module_state,Py_UCS4 c)672 parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
673 {
674     DialectObj *dialect = self->dialect;
675 
676     switch (self->state) {
677     case START_RECORD:
678         /* start of record */
679         if (c == EOL)
680             /* empty line - return [] */
681             break;
682         else if (c == '\n' || c == '\r') {
683             self->state = EAT_CRNL;
684             break;
685         }
686         /* normal character - handle as START_FIELD */
687         self->state = START_FIELD;
688         /* fallthru */
689     case START_FIELD:
690         /* expecting field */
691         if (c == '\n' || c == '\r' || c == EOL) {
692             /* save empty field - return [fields] */
693             if (parse_save_field(self) < 0)
694                 return -1;
695             self->state = (c == EOL ? START_RECORD : EAT_CRNL);
696         }
697         else if (c == dialect->quotechar &&
698                  dialect->quoting != QUOTE_NONE) {
699             /* start quoted field */
700             self->state = IN_QUOTED_FIELD;
701         }
702         else if (c == dialect->escapechar) {
703             /* possible escaped character */
704             self->state = ESCAPED_CHAR;
705         }
706         else if (c == ' ' && dialect->skipinitialspace)
707             /* ignore spaces at start of field */
708             ;
709         else if (c == dialect->delimiter) {
710             /* save empty field */
711             if (parse_save_field(self) < 0)
712                 return -1;
713         }
714         else {
715             /* begin new unquoted field */
716             if (dialect->quoting == QUOTE_NONNUMERIC)
717                 self->numeric_field = 1;
718             if (parse_add_char(self, module_state, c) < 0)
719                 return -1;
720             self->state = IN_FIELD;
721         }
722         break;
723 
724     case ESCAPED_CHAR:
725         if (c == '\n' || c=='\r') {
726             if (parse_add_char(self, module_state, c) < 0)
727                 return -1;
728             self->state = AFTER_ESCAPED_CRNL;
729             break;
730         }
731         if (c == EOL)
732             c = '\n';
733         if (parse_add_char(self, module_state, c) < 0)
734             return -1;
735         self->state = IN_FIELD;
736         break;
737 
738     case AFTER_ESCAPED_CRNL:
739         if (c == EOL)
740             break;
741         /*fallthru*/
742 
743     case IN_FIELD:
744         /* in unquoted field */
745         if (c == '\n' || c == '\r' || c == EOL) {
746             /* end of line - return [fields] */
747             if (parse_save_field(self) < 0)
748                 return -1;
749             self->state = (c == EOL ? START_RECORD : EAT_CRNL);
750         }
751         else if (c == dialect->escapechar) {
752             /* possible escaped character */
753             self->state = ESCAPED_CHAR;
754         }
755         else if (c == dialect->delimiter) {
756             /* save field - wait for new field */
757             if (parse_save_field(self) < 0)
758                 return -1;
759             self->state = START_FIELD;
760         }
761         else {
762             /* normal character - save in field */
763             if (parse_add_char(self, module_state, c) < 0)
764                 return -1;
765         }
766         break;
767 
768     case IN_QUOTED_FIELD:
769         /* in quoted field */
770         if (c == EOL)
771             ;
772         else if (c == dialect->escapechar) {
773             /* Possible escape character */
774             self->state = ESCAPE_IN_QUOTED_FIELD;
775         }
776         else if (c == dialect->quotechar &&
777                  dialect->quoting != QUOTE_NONE) {
778             if (dialect->doublequote) {
779                 /* doublequote; " represented by "" */
780                 self->state = QUOTE_IN_QUOTED_FIELD;
781             }
782             else {
783                 /* end of quote part of field */
784                 self->state = IN_FIELD;
785             }
786         }
787         else {
788             /* normal character - save in field */
789             if (parse_add_char(self, module_state, c) < 0)
790                 return -1;
791         }
792         break;
793 
794     case ESCAPE_IN_QUOTED_FIELD:
795         if (c == EOL)
796             c = '\n';
797         if (parse_add_char(self, module_state, c) < 0)
798             return -1;
799         self->state = IN_QUOTED_FIELD;
800         break;
801 
802     case QUOTE_IN_QUOTED_FIELD:
803         /* doublequote - seen a quote in a quoted field */
804         if (dialect->quoting != QUOTE_NONE &&
805             c == dialect->quotechar) {
806             /* save "" as " */
807             if (parse_add_char(self, module_state, c) < 0)
808                 return -1;
809             self->state = IN_QUOTED_FIELD;
810         }
811         else if (c == dialect->delimiter) {
812             /* save field - wait for new field */
813             if (parse_save_field(self) < 0)
814                 return -1;
815             self->state = START_FIELD;
816         }
817         else if (c == '\n' || c == '\r' || c == EOL) {
818             /* end of line - return [fields] */
819             if (parse_save_field(self) < 0)
820                 return -1;
821             self->state = (c == EOL ? START_RECORD : EAT_CRNL);
822         }
823         else if (!dialect->strict) {
824             if (parse_add_char(self, module_state, c) < 0)
825                 return -1;
826             self->state = IN_FIELD;
827         }
828         else {
829             /* illegal */
830             PyErr_Format(module_state->error_obj, "'%c' expected after '%c'",
831                             dialect->delimiter,
832                             dialect->quotechar);
833             return -1;
834         }
835         break;
836 
837     case EAT_CRNL:
838         if (c == '\n' || c == '\r')
839             ;
840         else if (c == EOL)
841             self->state = START_RECORD;
842         else {
843             PyErr_Format(module_state->error_obj,
844                          "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
845             return -1;
846         }
847         break;
848 
849     }
850     return 0;
851 }
852 
853 static int
parse_reset(ReaderObj * self)854 parse_reset(ReaderObj *self)
855 {
856     Py_XSETREF(self->fields, PyList_New(0));
857     if (self->fields == NULL)
858         return -1;
859     self->field_len = 0;
860     self->state = START_RECORD;
861     self->numeric_field = 0;
862     return 0;
863 }
864 
865 static PyObject *
Reader_iternext(ReaderObj * self)866 Reader_iternext(ReaderObj *self)
867 {
868     PyObject *fields = NULL;
869     Py_UCS4 c;
870     Py_ssize_t pos, linelen;
871     unsigned int kind;
872     const void *data;
873     PyObject *lineobj;
874 
875     _csvstate *module_state = _csv_state_from_type(Py_TYPE(self),
876                                                    "Reader.__next__");
877     if (module_state == NULL) {
878         return NULL;
879     }
880 
881     if (parse_reset(self) < 0)
882         return NULL;
883     do {
884         lineobj = PyIter_Next(self->input_iter);
885         if (lineobj == NULL) {
886             /* End of input OR exception */
887             if (!PyErr_Occurred() && (self->field_len != 0 ||
888                                       self->state == IN_QUOTED_FIELD)) {
889                 if (self->dialect->strict)
890                     PyErr_SetString(module_state->error_obj,
891                                     "unexpected end of data");
892                 else if (parse_save_field(self) >= 0)
893                     break;
894             }
895             return NULL;
896         }
897         if (!PyUnicode_Check(lineobj)) {
898             PyErr_Format(module_state->error_obj,
899                          "iterator should return strings, "
900                          "not %.200s "
901                          "(the file should be opened in text mode)",
902                          Py_TYPE(lineobj)->tp_name
903                 );
904             Py_DECREF(lineobj);
905             return NULL;
906         }
907         if (PyUnicode_READY(lineobj) == -1) {
908             Py_DECREF(lineobj);
909             return NULL;
910         }
911         ++self->line_num;
912         kind = PyUnicode_KIND(lineobj);
913         data = PyUnicode_DATA(lineobj);
914         pos = 0;
915         linelen = PyUnicode_GET_LENGTH(lineobj);
916         while (linelen--) {
917             c = PyUnicode_READ(kind, data, pos);
918             if (parse_process_char(self, module_state, c) < 0) {
919                 Py_DECREF(lineobj);
920                 goto err;
921             }
922             pos++;
923         }
924         Py_DECREF(lineobj);
925         if (parse_process_char(self, module_state, EOL) < 0)
926             goto err;
927     } while (self->state != START_RECORD);
928 
929     fields = self->fields;
930     self->fields = NULL;
931 err:
932     return fields;
933 }
934 
935 static void
Reader_dealloc(ReaderObj * self)936 Reader_dealloc(ReaderObj *self)
937 {
938     PyTypeObject *tp = Py_TYPE(self);
939     PyObject_GC_UnTrack(self);
940     tp->tp_clear((PyObject *)self);
941     if (self->field != NULL) {
942         PyMem_Free(self->field);
943         self->field = NULL;
944     }
945     PyObject_GC_Del(self);
946     Py_DECREF(tp);
947 }
948 
949 static int
Reader_traverse(ReaderObj * self,visitproc visit,void * arg)950 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
951 {
952     Py_VISIT(self->dialect);
953     Py_VISIT(self->input_iter);
954     Py_VISIT(self->fields);
955     Py_VISIT(Py_TYPE(self));
956     return 0;
957 }
958 
959 static int
Reader_clear(ReaderObj * self)960 Reader_clear(ReaderObj *self)
961 {
962     Py_CLEAR(self->dialect);
963     Py_CLEAR(self->input_iter);
964     Py_CLEAR(self->fields);
965     return 0;
966 }
967 
968 PyDoc_STRVAR(Reader_Type_doc,
969 "CSV reader\n"
970 "\n"
971 "Reader objects are responsible for reading and parsing tabular data\n"
972 "in CSV format.\n"
973 );
974 
975 static struct PyMethodDef Reader_methods[] = {
976     { NULL, NULL }
977 };
978 #define R_OFF(x) offsetof(ReaderObj, x)
979 
980 static struct PyMemberDef Reader_memberlist[] = {
981     { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
982     { "line_num", T_ULONG, R_OFF(line_num), READONLY },
983     { NULL }
984 };
985 
986 
987 static PyType_Slot Reader_Type_slots[] = {
988     {Py_tp_doc, (char*)Reader_Type_doc},
989     {Py_tp_traverse, Reader_traverse},
990     {Py_tp_iter, PyObject_SelfIter},
991     {Py_tp_iternext, Reader_iternext},
992     {Py_tp_methods, Reader_methods},
993     {Py_tp_members, Reader_memberlist},
994     {Py_tp_clear, Reader_clear},
995     {Py_tp_dealloc, Reader_dealloc},
996     {0, NULL}
997 };
998 
999 PyType_Spec Reader_Type_spec = {
1000     .name = "_csv.reader",
1001     .basicsize = sizeof(ReaderObj),
1002     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1003               Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION),
1004     .slots = Reader_Type_slots
1005 };
1006 
1007 
1008 static PyObject *
csv_reader(PyObject * module,PyObject * args,PyObject * keyword_args)1009 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
1010 {
1011     PyObject * iterator, * dialect = NULL;
1012     _csvstate *module_state = get_csv_state(module);
1013     ReaderObj * self = PyObject_GC_New(
1014         ReaderObj,
1015         module_state->reader_type);
1016 
1017     if (!self)
1018         return NULL;
1019 
1020     self->dialect = NULL;
1021     self->fields = NULL;
1022     self->input_iter = NULL;
1023     self->field = NULL;
1024     self->field_size = 0;
1025     self->line_num = 0;
1026 
1027     if (parse_reset(self) < 0) {
1028         Py_DECREF(self);
1029         return NULL;
1030     }
1031 
1032     if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
1033         Py_DECREF(self);
1034         return NULL;
1035     }
1036     self->input_iter = PyObject_GetIter(iterator);
1037     if (self->input_iter == NULL) {
1038         Py_DECREF(self);
1039         return NULL;
1040     }
1041     self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1042                                                 keyword_args);
1043     if (self->dialect == NULL) {
1044         Py_DECREF(self);
1045         return NULL;
1046     }
1047 
1048     PyObject_GC_Track(self);
1049     return (PyObject *)self;
1050 }
1051 
1052 /*
1053  * WRITER
1054  */
1055 /* ---------------------------------------------------------------- */
1056 static void
join_reset(WriterObj * self)1057 join_reset(WriterObj *self)
1058 {
1059     self->rec_len = 0;
1060     self->num_fields = 0;
1061 }
1062 
1063 #define MEM_INCR 32768
1064 
1065 /* Calculate new record length or append field to record.  Return new
1066  * record length.
1067  */
1068 static Py_ssize_t
join_append_data(WriterObj * self,unsigned int field_kind,const void * field_data,Py_ssize_t field_len,int * quoted,int copy_phase)1069 join_append_data(WriterObj *self, unsigned int field_kind, const void *field_data,
1070                  Py_ssize_t field_len, int *quoted,
1071                  int copy_phase)
1072 {
1073     DialectObj *dialect = self->dialect;
1074     int i;
1075     Py_ssize_t rec_len;
1076 
1077 #define INCLEN \
1078     do {\
1079         if (!copy_phase && rec_len == PY_SSIZE_T_MAX) {    \
1080             goto overflow; \
1081         } \
1082         rec_len++; \
1083     } while(0)
1084 
1085 #define ADDCH(c)                                \
1086     do {\
1087         if (copy_phase) \
1088             self->rec[rec_len] = c;\
1089         INCLEN;\
1090     } while(0)
1091 
1092     rec_len = self->rec_len;
1093 
1094     /* If this is not the first field we need a field separator */
1095     if (self->num_fields > 0)
1096         ADDCH(dialect->delimiter);
1097 
1098     /* Handle preceding quote */
1099     if (copy_phase && *quoted)
1100         ADDCH(dialect->quotechar);
1101 
1102     /* Copy/count field data */
1103     /* If field is null just pass over */
1104     for (i = 0; field_data && (i < field_len); i++) {
1105         Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
1106         int want_escape = 0;
1107 
1108         if (c == dialect->delimiter ||
1109             c == dialect->escapechar ||
1110             c == dialect->quotechar  ||
1111             PyUnicode_FindChar(
1112                 dialect->lineterminator, c, 0,
1113                 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
1114             if (dialect->quoting == QUOTE_NONE)
1115                 want_escape = 1;
1116             else {
1117                 if (c == dialect->quotechar) {
1118                     if (dialect->doublequote)
1119                         ADDCH(dialect->quotechar);
1120                     else
1121                         want_escape = 1;
1122                 }
1123                 else if (c == dialect->escapechar) {
1124                     want_escape = 1;
1125                 }
1126                 if (!want_escape)
1127                     *quoted = 1;
1128             }
1129             if (want_escape) {
1130                 if (dialect->escapechar == NOT_SET) {
1131                     PyErr_Format(self->error_obj,
1132                                  "need to escape, but no escapechar set");
1133                     return -1;
1134                 }
1135                 ADDCH(dialect->escapechar);
1136             }
1137         }
1138         /* Copy field character into record buffer.
1139          */
1140         ADDCH(c);
1141     }
1142 
1143     if (*quoted) {
1144         if (copy_phase)
1145             ADDCH(dialect->quotechar);
1146         else {
1147             INCLEN; /* starting quote */
1148             INCLEN; /* ending quote */
1149         }
1150     }
1151     return rec_len;
1152 
1153   overflow:
1154     PyErr_NoMemory();
1155     return -1;
1156 #undef ADDCH
1157 #undef INCLEN
1158 }
1159 
1160 static int
join_check_rec_size(WriterObj * self,Py_ssize_t rec_len)1161 join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
1162 {
1163     assert(rec_len >= 0);
1164 
1165     if (rec_len > self->rec_size) {
1166         size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1167         Py_UCS4 *rec_new = self->rec;
1168         PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1169         if (rec_new == NULL) {
1170             PyErr_NoMemory();
1171             return 0;
1172         }
1173         self->rec = rec_new;
1174         self->rec_size = (Py_ssize_t)rec_size_new;
1175     }
1176     return 1;
1177 }
1178 
1179 static int
join_append(WriterObj * self,PyObject * field,int quoted)1180 join_append(WriterObj *self, PyObject *field, int quoted)
1181 {
1182     unsigned int field_kind = -1;
1183     const void *field_data = NULL;
1184     Py_ssize_t field_len = 0;
1185     Py_ssize_t rec_len;
1186 
1187     if (field != NULL) {
1188         if (PyUnicode_READY(field) == -1)
1189             return 0;
1190         field_kind = PyUnicode_KIND(field);
1191         field_data = PyUnicode_DATA(field);
1192         field_len = PyUnicode_GET_LENGTH(field);
1193     }
1194     rec_len = join_append_data(self, field_kind, field_data, field_len,
1195                                &quoted, 0);
1196     if (rec_len < 0)
1197         return 0;
1198 
1199     /* grow record buffer if necessary */
1200     if (!join_check_rec_size(self, rec_len))
1201         return 0;
1202 
1203     self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1204                                      &quoted, 1);
1205     self->num_fields++;
1206 
1207     return 1;
1208 }
1209 
1210 static int
join_append_lineterminator(WriterObj * self)1211 join_append_lineterminator(WriterObj *self)
1212 {
1213     Py_ssize_t terminator_len, i;
1214     unsigned int term_kind;
1215     const void *term_data;
1216 
1217     terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
1218     if (terminator_len == -1)
1219         return 0;
1220 
1221     /* grow record buffer if necessary */
1222     if (!join_check_rec_size(self, self->rec_len + terminator_len))
1223         return 0;
1224 
1225     term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1226     term_data = PyUnicode_DATA(self->dialect->lineterminator);
1227     for (i = 0; i < terminator_len; i++)
1228         self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
1229     self->rec_len += terminator_len;
1230 
1231     return 1;
1232 }
1233 
1234 PyDoc_STRVAR(csv_writerow_doc,
1235 "writerow(iterable)\n"
1236 "\n"
1237 "Construct and write a CSV record from an iterable of fields.  Non-string\n"
1238 "elements will be converted to string.");
1239 
1240 static PyObject *
csv_writerow(WriterObj * self,PyObject * seq)1241 csv_writerow(WriterObj *self, PyObject *seq)
1242 {
1243     DialectObj *dialect = self->dialect;
1244     PyObject *iter, *field, *line, *result;
1245 
1246     iter = PyObject_GetIter(seq);
1247     if (iter == NULL) {
1248         if (PyErr_ExceptionMatches(PyExc_TypeError)) {
1249             PyErr_Format(self->error_obj,
1250                          "iterable expected, not %.200s",
1251                          Py_TYPE(seq)->tp_name);
1252         }
1253         return NULL;
1254     }
1255 
1256     /* Join all fields in internal buffer.
1257      */
1258     join_reset(self);
1259     while ((field = PyIter_Next(iter))) {
1260         int append_ok;
1261         int quoted;
1262 
1263         switch (dialect->quoting) {
1264         case QUOTE_NONNUMERIC:
1265             quoted = !PyNumber_Check(field);
1266             break;
1267         case QUOTE_ALL:
1268             quoted = 1;
1269             break;
1270         default:
1271             quoted = 0;
1272             break;
1273         }
1274 
1275         if (PyUnicode_Check(field)) {
1276             append_ok = join_append(self, field, quoted);
1277             Py_DECREF(field);
1278         }
1279         else if (field == Py_None) {
1280             append_ok = join_append(self, NULL, quoted);
1281             Py_DECREF(field);
1282         }
1283         else {
1284             PyObject *str;
1285 
1286             str = PyObject_Str(field);
1287             Py_DECREF(field);
1288             if (str == NULL) {
1289                 Py_DECREF(iter);
1290                 return NULL;
1291             }
1292             append_ok = join_append(self, str, quoted);
1293             Py_DECREF(str);
1294         }
1295         if (!append_ok) {
1296             Py_DECREF(iter);
1297             return NULL;
1298         }
1299     }
1300     Py_DECREF(iter);
1301     if (PyErr_Occurred())
1302         return NULL;
1303 
1304     if (self->num_fields > 0 && self->rec_len == 0) {
1305         if (dialect->quoting == QUOTE_NONE) {
1306             PyErr_Format(self->error_obj,
1307                 "single empty field record must be quoted");
1308             return NULL;
1309         }
1310         self->num_fields--;
1311         if (!join_append(self, NULL, 1))
1312             return NULL;
1313     }
1314 
1315     /* Add line terminator.
1316      */
1317     if (!join_append_lineterminator(self)) {
1318         return NULL;
1319     }
1320 
1321     line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1322                                      (void *) self->rec, self->rec_len);
1323     if (line == NULL) {
1324         return NULL;
1325     }
1326     result = PyObject_CallOneArg(self->write, line);
1327     Py_DECREF(line);
1328     return result;
1329 }
1330 
1331 PyDoc_STRVAR(csv_writerows_doc,
1332 "writerows(iterable of iterables)\n"
1333 "\n"
1334 "Construct and write a series of iterables to a csv file.  Non-string\n"
1335 "elements will be converted to string.");
1336 
1337 static PyObject *
csv_writerows(WriterObj * self,PyObject * seqseq)1338 csv_writerows(WriterObj *self, PyObject *seqseq)
1339 {
1340     PyObject *row_iter, *row_obj, *result;
1341 
1342     row_iter = PyObject_GetIter(seqseq);
1343     if (row_iter == NULL) {
1344         return NULL;
1345     }
1346     while ((row_obj = PyIter_Next(row_iter))) {
1347         result = csv_writerow(self, row_obj);
1348         Py_DECREF(row_obj);
1349         if (!result) {
1350             Py_DECREF(row_iter);
1351             return NULL;
1352         }
1353         else
1354              Py_DECREF(result);
1355     }
1356     Py_DECREF(row_iter);
1357     if (PyErr_Occurred())
1358         return NULL;
1359     Py_RETURN_NONE;
1360 }
1361 
1362 static struct PyMethodDef Writer_methods[] = {
1363     { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1364     { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1365     { NULL, NULL }
1366 };
1367 
1368 #define W_OFF(x) offsetof(WriterObj, x)
1369 
1370 static struct PyMemberDef Writer_memberlist[] = {
1371     { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1372     { NULL }
1373 };
1374 
1375 static int
Writer_traverse(WriterObj * self,visitproc visit,void * arg)1376 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1377 {
1378     Py_VISIT(self->dialect);
1379     Py_VISIT(self->write);
1380     Py_VISIT(self->error_obj);
1381     Py_VISIT(Py_TYPE(self));
1382     return 0;
1383 }
1384 
1385 static int
Writer_clear(WriterObj * self)1386 Writer_clear(WriterObj *self)
1387 {
1388     Py_CLEAR(self->dialect);
1389     Py_CLEAR(self->write);
1390     Py_CLEAR(self->error_obj);
1391     return 0;
1392 }
1393 
1394 static void
Writer_dealloc(WriterObj * self)1395 Writer_dealloc(WriterObj *self)
1396 {
1397     PyTypeObject *tp = Py_TYPE(self);
1398     PyObject_GC_UnTrack(self);
1399     tp->tp_clear((PyObject *)self);
1400     if (self->rec != NULL) {
1401         PyMem_Free(self->rec);
1402     }
1403     PyObject_GC_Del(self);
1404     Py_DECREF(tp);
1405 }
1406 
1407 PyDoc_STRVAR(Writer_Type_doc,
1408 "CSV writer\n"
1409 "\n"
1410 "Writer objects are responsible for generating tabular data\n"
1411 "in CSV format from sequence input.\n"
1412 );
1413 
1414 static PyType_Slot Writer_Type_slots[] = {
1415     {Py_tp_doc, (char*)Writer_Type_doc},
1416     {Py_tp_traverse, Writer_traverse},
1417     {Py_tp_clear, Writer_clear},
1418     {Py_tp_dealloc, Writer_dealloc},
1419     {Py_tp_methods, Writer_methods},
1420     {Py_tp_members, Writer_memberlist},
1421     {0, NULL}
1422 };
1423 
1424 PyType_Spec Writer_Type_spec = {
1425     .name = "_csv.writer",
1426     .basicsize = sizeof(WriterObj),
1427     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1428               Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION),
1429     .slots = Writer_Type_slots,
1430 };
1431 
1432 
1433 static PyObject *
csv_writer(PyObject * module,PyObject * args,PyObject * keyword_args)1434 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1435 {
1436     PyObject * output_file, * dialect = NULL;
1437     _csvstate *module_state = get_csv_state(module);
1438     WriterObj * self = PyObject_GC_New(WriterObj, module_state->writer_type);
1439 
1440     if (!self)
1441         return NULL;
1442 
1443     self->dialect = NULL;
1444     self->write = NULL;
1445 
1446     self->rec = NULL;
1447     self->rec_size = 0;
1448     self->rec_len = 0;
1449     self->num_fields = 0;
1450 
1451     self->error_obj = Py_NewRef(module_state->error_obj);
1452 
1453     if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1454         Py_DECREF(self);
1455         return NULL;
1456     }
1457     if (_PyObject_LookupAttr(output_file,
1458                              module_state->str_write,
1459                              &self->write) < 0) {
1460         Py_DECREF(self);
1461         return NULL;
1462     }
1463     if (self->write == NULL || !PyCallable_Check(self->write)) {
1464         PyErr_SetString(PyExc_TypeError,
1465                         "argument 1 must have a \"write\" method");
1466         Py_DECREF(self);
1467         return NULL;
1468     }
1469     self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1470                                                 keyword_args);
1471     if (self->dialect == NULL) {
1472         Py_DECREF(self);
1473         return NULL;
1474     }
1475     PyObject_GC_Track(self);
1476     return (PyObject *)self;
1477 }
1478 
1479 /*
1480  * DIALECT REGISTRY
1481  */
1482 
1483 /*[clinic input]
1484 _csv.list_dialects
1485 
1486 Return a list of all known dialect names.
1487 
1488     names = csv.list_dialects()
1489 [clinic start generated code]*/
1490 
1491 static PyObject *
_csv_list_dialects_impl(PyObject * module)1492 _csv_list_dialects_impl(PyObject *module)
1493 /*[clinic end generated code: output=a5b92b215b006a6d input=8953943eb17d98ab]*/
1494 {
1495     return PyDict_Keys(get_csv_state(module)->dialects);
1496 }
1497 
1498 static PyObject *
csv_register_dialect(PyObject * module,PyObject * args,PyObject * kwargs)1499 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1500 {
1501     PyObject *name_obj, *dialect_obj = NULL;
1502     _csvstate *module_state = get_csv_state(module);
1503     PyObject *dialect;
1504 
1505     if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1506         return NULL;
1507     if (!PyUnicode_Check(name_obj)) {
1508         PyErr_SetString(PyExc_TypeError,
1509                         "dialect name must be a string");
1510         return NULL;
1511     }
1512     if (PyUnicode_READY(name_obj) == -1)
1513         return NULL;
1514     dialect = _call_dialect(module_state, dialect_obj, kwargs);
1515     if (dialect == NULL)
1516         return NULL;
1517     if (PyDict_SetItem(module_state->dialects, name_obj, dialect) < 0) {
1518         Py_DECREF(dialect);
1519         return NULL;
1520     }
1521     Py_DECREF(dialect);
1522     Py_RETURN_NONE;
1523 }
1524 
1525 
1526 /*[clinic input]
1527 _csv.unregister_dialect
1528 
1529     name: object
1530 
1531 Delete the name/dialect mapping associated with a string name.
1532 
1533     csv.unregister_dialect(name)
1534 [clinic start generated code]*/
1535 
1536 static PyObject *
_csv_unregister_dialect_impl(PyObject * module,PyObject * name)1537 _csv_unregister_dialect_impl(PyObject *module, PyObject *name)
1538 /*[clinic end generated code: output=0813ebca6c058df4 input=6b5c1557bf60c7e7]*/
1539 {
1540     _csvstate *module_state = get_csv_state(module);
1541     if (PyDict_DelItem(module_state->dialects, name) < 0) {
1542         if (PyErr_ExceptionMatches(PyExc_KeyError)) {
1543             PyErr_Format(module_state->error_obj, "unknown dialect");
1544         }
1545         return NULL;
1546     }
1547     Py_RETURN_NONE;
1548 }
1549 
1550 /*[clinic input]
1551 _csv.get_dialect
1552 
1553     name: object
1554 
1555 Return the dialect instance associated with name.
1556 
1557     dialect = csv.get_dialect(name)
1558 [clinic start generated code]*/
1559 
1560 static PyObject *
_csv_get_dialect_impl(PyObject * module,PyObject * name)1561 _csv_get_dialect_impl(PyObject *module, PyObject *name)
1562 /*[clinic end generated code: output=aa988cd573bebebb input=edf9ddab32e448fb]*/
1563 {
1564     return get_dialect_from_registry(name, get_csv_state(module));
1565 }
1566 
1567 /*[clinic input]
1568 _csv.field_size_limit
1569 
1570     new_limit: object = NULL
1571 
1572 Sets an upper limit on parsed fields.
1573 
1574     csv.field_size_limit([limit])
1575 
1576 Returns old limit. If limit is not given, no new limit is set and
1577 the old limit is returned
1578 [clinic start generated code]*/
1579 
1580 static PyObject *
_csv_field_size_limit_impl(PyObject * module,PyObject * new_limit)1581 _csv_field_size_limit_impl(PyObject *module, PyObject *new_limit)
1582 /*[clinic end generated code: output=f2799ecd908e250b input=cec70e9226406435]*/
1583 {
1584     _csvstate *module_state = get_csv_state(module);
1585     long old_limit = module_state->field_limit;
1586     if (new_limit != NULL) {
1587         if (!PyLong_CheckExact(new_limit)) {
1588             PyErr_Format(PyExc_TypeError,
1589                          "limit must be an integer");
1590             return NULL;
1591         }
1592         module_state->field_limit = PyLong_AsLong(new_limit);
1593         if (module_state->field_limit == -1 && PyErr_Occurred()) {
1594             module_state->field_limit = old_limit;
1595             return NULL;
1596         }
1597     }
1598     return PyLong_FromLong(old_limit);
1599 }
1600 
1601 static PyType_Slot error_slots[] = {
1602     {0, NULL},
1603 };
1604 
1605 PyType_Spec error_spec = {
1606     .name = "_csv.Error",
1607     .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
1608     .slots = error_slots,
1609 };
1610 
1611 /*
1612  * MODULE
1613  */
1614 
1615 PyDoc_STRVAR(csv_module_doc,
1616 "CSV parsing and writing.\n"
1617 "\n"
1618 "This module provides classes that assist in the reading and writing\n"
1619 "of Comma Separated Value (CSV) files, and implements the interface\n"
1620 "described by PEP 305.  Although many CSV files are simple to parse,\n"
1621 "the format is not formally defined by a stable specification and\n"
1622 "is subtle enough that parsing lines of a CSV file with something\n"
1623 "like line.split(\",\") is bound to fail.  The module supports three\n"
1624 "basic APIs: reading, writing, and registration of dialects.\n"
1625 "\n"
1626 "\n"
1627 "DIALECT REGISTRATION:\n"
1628 "\n"
1629 "Readers and writers support a dialect argument, which is a convenient\n"
1630 "handle on a group of settings.  When the dialect argument is a string,\n"
1631 "it identifies one of the dialects previously registered with the module.\n"
1632 "If it is a class or instance, the attributes of the argument are used as\n"
1633 "the settings for the reader or writer:\n"
1634 "\n"
1635 "    class excel:\n"
1636 "        delimiter = ','\n"
1637 "        quotechar = '\"'\n"
1638 "        escapechar = None\n"
1639 "        doublequote = True\n"
1640 "        skipinitialspace = False\n"
1641 "        lineterminator = '\\r\\n'\n"
1642 "        quoting = QUOTE_MINIMAL\n"
1643 "\n"
1644 "SETTINGS:\n"
1645 "\n"
1646 "    * quotechar - specifies a one-character string to use as the\n"
1647 "        quoting character.  It defaults to '\"'.\n"
1648 "    * delimiter - specifies a one-character string to use as the\n"
1649 "        field separator.  It defaults to ','.\n"
1650 "    * skipinitialspace - specifies how to interpret spaces which\n"
1651 "        immediately follow a delimiter.  It defaults to False, which\n"
1652 "        means that spaces immediately following a delimiter is part\n"
1653 "        of the following field.\n"
1654 "    * lineterminator -  specifies the character sequence which should\n"
1655 "        terminate rows.\n"
1656 "    * quoting - controls when quotes should be generated by the writer.\n"
1657 "        It can take on any of the following module constants:\n"
1658 "\n"
1659 "        csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1660 "            field contains either the quotechar or the delimiter\n"
1661 "        csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1662 "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1663 "            fields which do not parse as integers or floating point\n"
1664 "            numbers.\n"
1665 "        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1666 "    * escapechar - specifies a one-character string used to escape\n"
1667 "        the delimiter when quoting is set to QUOTE_NONE.\n"
1668 "    * doublequote - controls the handling of quotes inside fields.  When\n"
1669 "        True, two consecutive quotes are interpreted as one during read,\n"
1670 "        and when writing, each quote character embedded in the data is\n"
1671 "        written as two quotes\n");
1672 
1673 PyDoc_STRVAR(csv_reader_doc,
1674 "    csv_reader = reader(iterable [, dialect='excel']\n"
1675 "                        [optional keyword args])\n"
1676 "    for row in csv_reader:\n"
1677 "        process(row)\n"
1678 "\n"
1679 "The \"iterable\" argument can be any object that returns a line\n"
1680 "of input for each iteration, such as a file object or a list.  The\n"
1681 "optional \"dialect\" parameter is discussed below.  The function\n"
1682 "also accepts optional keyword arguments which override settings\n"
1683 "provided by the dialect.\n"
1684 "\n"
1685 "The returned object is an iterator.  Each iteration returns a row\n"
1686 "of the CSV file (which can span multiple input lines).\n");
1687 
1688 PyDoc_STRVAR(csv_writer_doc,
1689 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1690 "                            [optional keyword args])\n"
1691 "    for row in sequence:\n"
1692 "        csv_writer.writerow(row)\n"
1693 "\n"
1694 "    [or]\n"
1695 "\n"
1696 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1697 "                            [optional keyword args])\n"
1698 "    csv_writer.writerows(rows)\n"
1699 "\n"
1700 "The \"fileobj\" argument can be any object that supports the file API.\n");
1701 
1702 PyDoc_STRVAR(csv_register_dialect_doc,
1703 "Create a mapping from a string name to a dialect class.\n"
1704 "    dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
1705 
1706 static struct PyMethodDef csv_methods[] = {
1707     { "reader", _PyCFunction_CAST(csv_reader),
1708         METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1709     { "writer", _PyCFunction_CAST(csv_writer),
1710         METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1711     { "register_dialect", _PyCFunction_CAST(csv_register_dialect),
1712         METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1713     _CSV_LIST_DIALECTS_METHODDEF
1714     _CSV_UNREGISTER_DIALECT_METHODDEF
1715     _CSV_GET_DIALECT_METHODDEF
1716     _CSV_FIELD_SIZE_LIMIT_METHODDEF
1717     { NULL, NULL }
1718 };
1719 
1720 static int
csv_exec(PyObject * module)1721 csv_exec(PyObject *module) {
1722     const StyleDesc *style;
1723     PyObject *temp;
1724     _csvstate *module_state = get_csv_state(module);
1725 
1726     temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL);
1727     module_state->dialect_type = (PyTypeObject *)temp;
1728     if (PyModule_AddObjectRef(module, "Dialect", temp) < 0) {
1729         return -1;
1730     }
1731 
1732     temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL);
1733     module_state->reader_type = (PyTypeObject *)temp;
1734     if (PyModule_AddObjectRef(module, "Reader", temp) < 0) {
1735         return -1;
1736     }
1737 
1738     temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL);
1739     module_state->writer_type = (PyTypeObject *)temp;
1740     if (PyModule_AddObjectRef(module, "Writer", temp) < 0) {
1741         return -1;
1742     }
1743 
1744     /* Add version to the module. */
1745     if (PyModule_AddStringConstant(module, "__version__",
1746                                    MODULE_VERSION) == -1) {
1747         return -1;
1748     }
1749 
1750     /* Set the field limit */
1751     module_state->field_limit = 128 * 1024;
1752 
1753     /* Add _dialects dictionary */
1754     module_state->dialects = PyDict_New();
1755     if (PyModule_AddObjectRef(module, "_dialects", module_state->dialects) < 0) {
1756         return -1;
1757     }
1758 
1759     /* Add quote styles into dictionary */
1760     for (style = quote_styles; style->name; style++) {
1761         if (PyModule_AddIntConstant(module, style->name,
1762                                     style->style) == -1)
1763             return -1;
1764     }
1765 
1766     /* Add the CSV exception object to the module. */
1767     PyObject *bases = PyTuple_Pack(1, PyExc_Exception);
1768     if (bases == NULL) {
1769         return -1;
1770     }
1771     module_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec,
1772                                                        bases);
1773     Py_DECREF(bases);
1774     if (module_state->error_obj == NULL) {
1775         return -1;
1776     }
1777     if (PyModule_AddType(module, (PyTypeObject *)module_state->error_obj) != 0) {
1778         return -1;
1779     }
1780 
1781     module_state->str_write = PyUnicode_InternFromString("write");
1782     if (module_state->str_write == NULL) {
1783         return -1;
1784     }
1785     return 0;
1786 }
1787 
1788 static PyModuleDef_Slot csv_slots[] = {
1789     {Py_mod_exec, csv_exec},
1790     {0, NULL}
1791 };
1792 
1793 static struct PyModuleDef _csvmodule = {
1794     PyModuleDef_HEAD_INIT,
1795     "_csv",
1796     csv_module_doc,
1797     sizeof(_csvstate),
1798     csv_methods,
1799     csv_slots,
1800     _csv_traverse,
1801     _csv_clear,
1802     _csv_free
1803 };
1804 
1805 PyMODINIT_FUNC
PyInit__csv(void)1806 PyInit__csv(void)
1807 {
1808     return PyModuleDef_Init(&_csvmodule);
1809 }
1810